[FFmpeg-devel] [PATCH] lavc/ass_split: improve handling of complex ASS features

Rodger Combs rodger.combs at gmail.com
Sat Sep 10 11:45:31 EEST 2016


Specifically:
- Skip writing drawings as text
- Convert \h to a Unicode non-breaking space
- Ignore comments and unknown tags

The test references are update to reflect these changes.
---
 libavcodec/ass_split.c       | 29 +++++++++++++++++------------
 tests/ref/fate/sub-textenc   | 12 ++++++------
 tests/ref/fate/sub-webvttenc | 12 ++++++------
 3 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/libavcodec/ass_split.c b/libavcodec/ass_split.c
index beaba7e..b25a0f3 100644
--- a/libavcodec/ass_split.c
+++ b/libavcodec/ass_split.c
@@ -477,30 +477,37 @@ int ff_ass_split_override_codes(const ASSCodesCallbacks *callbacks, void *priv,
     const char *text = NULL;
     char new_line[2];
     int text_len = 0;
+    int drawing = 0;
 
     while (buf && *buf) {
         if (text && callbacks->text &&
-            (sscanf(buf, "\\%1[nN]", new_line) == 1 ||
-             !strncmp(buf, "{\\", 2))) {
-            callbacks->text(priv, text, text_len);
+            (sscanf(buf, "\\%1[nNh]", new_line) == 1 ||
+             *buf == '{')) {
+            if (!drawing)
+                callbacks->text(priv, text, text_len);
             text = NULL;
         }
-        if (sscanf(buf, "\\%1[nN]", new_line) == 1) {
+        if (buf[0] == '\\' && buf[1] == 'h') {
+            callbacks->text(priv, "\u00A0", 2);
+            buf += 2;
+        } else if (sscanf(buf, "\\%1[nN]", new_line) == 1) {
             if (callbacks->new_line)
                 callbacks->new_line(priv, new_line[0] == 'N');
             buf += 2;
-        } else if (!strncmp(buf, "{\\", 2)) {
-            buf++;
+        } else if (*buf == '{' && strchr(buf, '}')) {
+            buf += strcspn(buf, "\\}"); // skip comments
             while (*buf == '\\') {
                 char style[2], c[2], sep[2], c_num[2] = "0", tmp[128] = {0};
                 unsigned int color = 0xFFFFFFFF;
-                int len, size = -1, an = -1, alpha = -1;
+                int len = 2, size = -1, an = -1, alpha = -1;
                 int x1, y1, x2, y2, t1 = -1, t2 = -1;
                 if (sscanf(buf, "\\%1[bisu]%1[01\\}]%n", style, c, &len) > 1) {
                     int close = c[0] == '0' ? 1 : c[0] == '1' ? 0 : -1;
                     len += close != -1;
                     if (callbacks->style)
                         callbacks->style(priv, style[0], close);
+                } else if (sscanf(buf, "\\p%u%1[\\}]%n", &size, sep, &len) > 1) {
+                    drawing = (size > 0);
                 } else if (sscanf(buf, "\\c%1[\\}]%n", sep, &len) > 0 ||
                            sscanf(buf, "\\c&H%X&%1[\\}]%n", &color, sep, &len) > 1 ||
                            sscanf(buf, "\\%1[1234]c%1[\\}]%n", c_num, sep, &len) > 1 ||
@@ -543,13 +550,11 @@ int ff_ass_split_override_codes(const ASSCodesCallbacks *callbacks, void *priv,
                 } else if (sscanf(buf, "\\org(%d,%d)%1[\\}]%n", &x1, &y1, sep, &len) > 2) {
                     if (callbacks->origin)
                         callbacks->origin(priv, x1, y1);
-                } else {
-                    len = strcspn(buf+1, "\\}") + 2;  /* skip unknown code */
                 }
                 buf += len - 1;
+                buf += strcspn(buf, "\\}"); // skip comments
             }
-            if (*buf++ != '}')
-                return AVERROR_INVALIDDATA;
+            buf++; // skip }
         } else {
             if (!text) {
                 text = buf;
@@ -559,7 +564,7 @@ int ff_ass_split_override_codes(const ASSCodesCallbacks *callbacks, void *priv,
             buf++;
         }
     }
-    if (text && callbacks->text)
+    if (text && callbacks->text && !drawing)
         callbacks->text(priv, text, text_len);
     if (callbacks->end)
         callbacks->end(priv);
diff --git a/tests/ref/fate/sub-textenc b/tests/ref/fate/sub-textenc
index f7d82ce..2d7236e 100644
--- a/tests/ref/fate/sub-textenc
+++ b/tests/ref/fate/sub-textenc
@@ -152,7 +152,7 @@ text 2
 00:00:52,501 --> 00:00:54,500
 Hide these tags:
 also hide these tags:
-but show this: {normal text}
+but show this: 
 
 30
 00:00:54,501 --> 00:01:00,500
@@ -160,18 +160,18 @@ but show this: {normal text}
 \ N is a forced line break
 \ h is a hard space
 Normal spaces at the start and at the end of the line are trimmed while hard spaces are not trimmed.
-The\hline\hwill\hnever\hbreak\hautomatically\hright\hbefore\hor\hafter\ha\hhard\hspace.\h:-D
+The line will never break automatically right before or after a hard space. :-D
 
 31
 00:00:54,501 --> 00:00:56,500
 
-\h\h\h\h\hA (05 hard spaces followed by a letter)
+     A (05 hard spaces followed by a letter)
 A (Normal  spaces followed by a letter)
 A (No hard spaces followed by a letter)
 
 32
 00:00:56,501 --> 00:00:58,500
-\h\h\h\h\hA (05 hard spaces followed by a letter)
+     A (05 hard spaces followed by a letter)
 A (Normal  spaces followed by a letter)
 A (No hard spaces followed by a letter)
 Show this: \TEST and this: \-)
@@ -179,10 +179,10 @@ Show this: \TEST and this: \-)
 33
 00:00:58,501 --> 00:01:00,500
 
-A letter followed by 05 hard spaces: A\h\h\h\h\h
+A letter followed by 05 hard spaces: A     
 A letter followed by normal  spaces: A
 A letter followed by no hard spaces: A
-05 hard  spaces between letters: A\h\h\h\h\hA
+05 hard  spaces between letters: A     A
 5 normal spaces between letters: A     A
 
 ^--Forced line break
diff --git a/tests/ref/fate/sub-webvttenc b/tests/ref/fate/sub-webvttenc
index 08903e3..5f4d2b6 100644
--- a/tests/ref/fate/sub-webvttenc
+++ b/tests/ref/fate/sub-webvttenc
@@ -125,33 +125,33 @@ text 2
 00:52.501 --> 00:54.500
 Hide these tags:
 also hide these tags:
-but show this: {normal text}
+but show this: 
 
 00:54.501 --> 01:00.500
 
 \ N is a forced line break
 \ h is a hard space
 Normal spaces at the start and at the end of the line are trimmed while hard spaces are not trimmed.
-The\hline\hwill\hnever\hbreak\hautomatically\hright\hbefore\hor\hafter\ha\hhard\hspace.\h:-D
+The line will never break automatically right before or after a hard space. :-D
 
 00:54.501 --> 00:56.500
 
-\h\h\h\h\hA (05 hard spaces followed by a letter)
+     A (05 hard spaces followed by a letter)
 A (Normal  spaces followed by a letter)
 A (No hard spaces followed by a letter)
 
 00:56.501 --> 00:58.500
-\h\h\h\h\hA (05 hard spaces followed by a letter)
+     A (05 hard spaces followed by a letter)
 A (Normal  spaces followed by a letter)
 A (No hard spaces followed by a letter)
 Show this: \TEST and this: \-)
 
 00:58.501 --> 01:00.500
 
-A letter followed by 05 hard spaces: A\h\h\h\h\h
+A letter followed by 05 hard spaces: A     
 A letter followed by normal  spaces: A
 A letter followed by no hard spaces: A
-05 hard  spaces between letters: A\h\h\h\h\hA
+05 hard  spaces between letters: A     A
 5 normal spaces between letters: A     A
 
 ^--Forced line break
-- 
2.10.0



More information about the ffmpeg-devel mailing list