[FFmpeg-devel] [PATCH 1/5] lavc: move UTF-8 validation to lavu.

Nicolas George nicolas.george at normalesup.org
Thu Aug 8 17:03:31 CEST 2013


Signed-off-by: Nicolas George <nicolas.george at normalesup.org>
---
 libavcodec/utils.c   |   22 ++--------------------
 libavutil/avstring.c |   20 ++++++++++++++++++++
 libavutil/internal.h |    5 +++++
 3 files changed, 27 insertions(+), 20 deletions(-)


I believe this version is ready for review.

There are still parts that will need to be added later, they concern mostly
how users are supposed to control the encoding conversion, but they are a
matter of deciding what is most convenient and can be just added on top of
what is already there.


diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index fd08bec..c13b4b1 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -2271,25 +2271,6 @@ end:
 #endif
 }
 
-static int utf8_check(const uint8_t *str)
-{
-    const uint8_t *byte;
-    uint32_t codepoint, min;
-
-    while (*str) {
-        byte = str;
-        GET_UTF8(codepoint, *(byte++), return 0;);
-        min = byte - str == 1 ? 0 : byte - str == 2 ? 0x80 :
-              1 << (5 * (byte - str) - 4);
-        if (codepoint < min || codepoint >= 0x110000 ||
-            codepoint == 0xFFFE /* BOM */ ||
-            codepoint >= 0xD800 && codepoint <= 0xDFFF /* surrogates */)
-            return 0;
-        str = byte;
-    }
-    return 1;
-}
-
 int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
                              int *got_sub_ptr,
                              AVPacket *avpkt)
@@ -2332,7 +2313,8 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
             }
 
             for (i = 0; i < sub->num_rects; i++) {
-                if (sub->rects[i]->ass && !utf8_check(sub->rects[i]->ass)) {
+                if (sub->rects[i]->ass &&
+                    !avpriv_utf8_check(sub->rects[i]->ass, strlen(sub->rects[i]->ass))) {
                     av_log(avctx, AV_LOG_ERROR,
                            "Invalid UTF-8 in decoded subtitles text; "
                            "maybe missing -sub_charenc option\n");
diff --git a/libavutil/avstring.c b/libavutil/avstring.c
index cf9be2a..16d85d8 100644
--- a/libavutil/avstring.c
+++ b/libavutil/avstring.c
@@ -29,6 +29,7 @@
 #include "mem.h"
 #include "avstring.h"
 #include "bprint.h"
+#include "internal.h"
 
 int av_strstart(const char *str, const char *pfx, const char **ptr)
 {
@@ -307,6 +308,25 @@ int av_isxdigit(int c)
     return av_isdigit(c) || (c >= 'a' && c <= 'f');
 }
 
+int avpriv_utf8_check(const uint8_t *str, size_t len)
+{
+    const uint8_t *byte, *end = str + len;
+    uint32_t codepoint, min;
+
+    while (str < end) {
+        byte = str;
+        GET_UTF8(codepoint, *(byte++), return 0;);
+        min = byte - str == 1 ? 0 : byte - str == 2 ? 0x80 :
+              1 << (5 * (byte - str) - 4);
+        if (codepoint < min || codepoint >= 0x110000 ||
+            codepoint == 0xFFFE /* BOM */ ||
+            codepoint >= 0xD800 && codepoint <= 0xDFFF /* surrogates */)
+            return 0;
+        str = byte;
+    }
+    return 1;
+}
+
 #ifdef TEST
 
 int main(void)
diff --git a/libavutil/internal.h b/libavutil/internal.h
index 680d600..aebff6b 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -203,4 +203,9 @@ void avpriv_request_sample(void *avc,
  */
 int avpriv_open(const char *filename, int flags, ...);
 
+/**
+ * Check if a strings looks like valid UTF-8
+ */
+int avpriv_utf8_check(const uint8_t *str, size_t len);
+
 #endif /* AVUTIL_INTERNAL_H */
-- 
1.7.10.4



More information about the ffmpeg-devel mailing list