[FFmpeg-devel] [PATCH] lavc: support subtitles charset conversion.

Clément Bœsch ubitux at gmail.com
Mon Dec 31 10:22:34 CET 2012


---
 Changelog                  |  1 +
 configure                  |  2 ++
 libavcodec/avcodec.h       |  7 +++++
 libavcodec/options_table.h |  1 +
 libavcodec/utils.c         | 65 +++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/Changelog b/Changelog
index b0a0de5..f2bc343 100644
--- a/Changelog
+++ b/Changelog
@@ -52,6 +52,7 @@ version <next>:
 - NIST Sphere demuxer
 - MPL2, VPlayer, MPlayer, AQTitle, PJS and SubViewer v1 subtitles demuxers and decoders
 - Sony Wave64 muxer
+- Subtitles charset conversion
 
 
 version 1.0:
diff --git a/configure b/configure
index faea887..0360fc9 100755
--- a/configure
+++ b/configure
@@ -1380,6 +1380,7 @@ HAVE_LIST="
     glob
     gnu_as
     ibm_asm
+    iconv
     inet_aton
     io_h
     isatty
@@ -3695,6 +3696,7 @@ check_func  getopt
 check_func  getrusage
 check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss
 check_func  gettimeofday
+check_func  iconv
 check_func  inet_aton $network_extralibs
 check_func  isatty
 check_func  localtime_r
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 012a31c..7a3761d 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -3188,6 +3188,13 @@ typedef struct AVCodecContext {
      * - encoding: unused
      */
     AVDictionary *metadata;
+
+    /**
+     * Charset of the input subtitles file.
+     * - decoding: set by user
+     * - encoding: unused
+     */
+    char *sub_charset;
 } AVCodecContext;
 
 AVRational av_codec_get_pkt_timebase         (const AVCodecContext *avctx);
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index 2a31fa6..38d493d 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -403,6 +403,7 @@ static const AVOption options[]={
 {"ka", "Karaoke",            0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_KARAOKE },           INT_MIN, INT_MAX, A|E, "audio_service_type"},
 {"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT, {.i64=AV_SAMPLE_FMT_NONE}, -1, AV_SAMPLE_FMT_NB-1, A|D, "request_sample_fmt"},
 {"pkt_timebase", NULL, OFFSET(pkt_timebase), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0, INT_MAX, 0},
+{"sub_charset", "set input text subtitles charset", OFFSET(sub_charset), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, S|D},
 {NULL},
 };
 
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 918954a..a9a5168 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -47,6 +47,9 @@
 #include <stdarg.h>
 #include <limits.h>
 #include <float.h>
+#if HAVE_ICONV
+# include <iconv.h>
+#endif
 
 volatile int ff_avcodec_locked;
 static int volatile entangled_thread_counter = 0;
@@ -1058,6 +1061,12 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
             ret = AVERROR(EINVAL);
             goto free_and_end;
         }
+        if (!HAVE_ICONV && avctx->codec_type == AVMEDIA_TYPE_SUBTITLE && avctx->sub_charset) {
+            av_log(avctx, AV_LOG_ERROR, "Charset subtitles conversion needs "
+                   "a libavcodec built with iconv support.\n");
+            ret = AVERROR(EINVAL);
+            goto free_and_end;
+        }
     }
 end:
     ff_unlock_avcodec();
@@ -1816,6 +1825,56 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
     return ret;
 }
 
+#if HAVE_ICONV
+static int recode_subtitle(AVCodecContext *avctx, AVSubtitle *sub)
+{
+    int i, ret = 0;
+    iconv_t cd;
+
+    if (!avctx->sub_charset || sub->format == 0)
+        return 0;
+
+    cd = iconv_open("UTF-8", avctx->sub_charset);
+    if (cd == (iconv_t)-1) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to open iconv context "
+               "with input charset \"%s\"\n", avctx->sub_charset);
+        return AVERROR_EXTERNAL;
+    }
+
+    for (i = 0; i < sub->num_rects; i++) {
+        char *inb, *outb, *inb_start, *outb_start;
+        size_t inl, outl;
+
+        inb = inb_start = sub->rects[i]->ass;
+        if (!inb)
+            break;
+        inl  = strlen(inb);
+        outl = 4*inl;
+        outb = outb_start = av_malloc(outl + 1);
+        if (!outb) {
+            ret = AVERROR(ENOMEM);
+            break;
+        }
+
+        if (iconv(cd, &inb, &inl, &outb, &outl) == (size_t)-1 ||
+            iconv(cd, NULL, NULL, &outb, &outl) == (size_t)-1) {
+            av_log(avctx, AV_LOG_ERROR, "Unable to recode subtitle event \"%s\" "
+                   "from %s to UTF-8\n", inb_start, avctx->sub_charset);
+            av_free(outb_start);
+            ret = AVERROR_EXTERNAL;
+            break;
+        }
+        *outb = 0;
+
+        av_free(inb_start);
+        sub->rects[i]->ass = outb_start;
+    }
+
+    iconv_close(cd);
+    return ret;
+}
+#endif
+
 int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
                              int *got_sub_ptr,
                              AVPacket *avpkt)
@@ -1850,8 +1909,12 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
                 ret = avpkt->size;
         }
 
-        if (*got_sub_ptr)
+        if (*got_sub_ptr) {
+#if HAVE_ICONV
+            ret = recode_subtitle(avctx, sub);
+#endif
             avctx->frame_number++;
+        }
     }
 
     return ret;
-- 
1.8.0.3



More information about the ffmpeg-devel mailing list