[FFmpeg-cvslog] h264dsp: Factorize code into a new function, h264_find_start_code_candidate

Ben Avison git at videolan.org
Thu Aug 8 12:51:42 CEST 2013


ffmpeg | branch: master | Ben Avison <bavison at riscosopen.org> | Mon Aug  5 13:12:47 2013 +0100| [218d6844b37d339ffbf2044ad07d8be7767e2734] | committer: Martin Storsjö

h264dsp: Factorize code into a new function, h264_find_start_code_candidate

This performs the start code search which was previously part of
h264_find_frame_end() - the most CPU intensive part of the function.

By itself, this results in a performance regression:
              Before          After
              Mean   StdDev   Mean   StdDev  Change
Overall time  2925.6 26.2     3068.5 31.7    -4.7%

but this can more than be made up for by platform-optimised
implementations of the function.

Signed-off-by: Martin Storsjö <martin at martin.st>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=218d6844b37d339ffbf2044ad07d8be7767e2734
---

 libavcodec/h264_parser.c |   27 +++------------------------
 libavcodec/h264dsp.c     |   29 +++++++++++++++++++++++++++++
 libavcodec/h264dsp.h     |    9 +++++++++
 3 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
index da2a5f9..ef5da98 100644
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@@ -47,30 +47,9 @@ static int h264_find_frame_end(H264Context *h, const uint8_t *buf,
 
     for (i = 0; i < buf_size; i++) {
         if (state == 7) {
-#if HAVE_FAST_UNALIGNED
-            /* we check i < buf_size instead of i + 3 / 7 because it is
-             * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
-             * bytes at the end.
-             */
-#if HAVE_FAST_64BIT
-            while (i < buf_size &&
-                   !((~*(const uint64_t *)(buf + i) &
-                      (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
-                      0x8080808080808080ULL))
-                i += 8;
-#else
-            while (i < buf_size &&
-                   !((~*(const uint32_t *)(buf + i) &
-                      (*(const uint32_t *)(buf + i) - 0x01010101U)) &
-                      0x80808080U))
-                i += 4;
-#endif
-#endif
-            for (; i < buf_size; i++)
-                if (!buf[i]) {
-                    state = 2;
-                    break;
-                }
+            i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i);
+            if (i < buf_size)
+                state = 2;
         } else if (state <= 2) {
             if (buf[i] == 1)
                 state ^= 5;            // 2->7, 1->4, 0->5
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index 3ca6abe..a901dbb 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -53,6 +53,34 @@
 #include "h264addpx_template.c"
 #undef BIT_DEPTH
 
+static int h264_find_start_code_candidate_c(const uint8_t *buf, int size)
+{
+    int i = 0;
+#if HAVE_FAST_UNALIGNED
+    /* we check i < size instead of i + 3 / 7 because it is
+     * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
+     * bytes at the end.
+     */
+#if HAVE_FAST_64BIT
+    while (i < size &&
+            !((~*(const uint64_t *)(buf + i) &
+                    (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
+                    0x8080808080808080ULL))
+        i += 8;
+#else
+    while (i < size &&
+            !((~*(const uint32_t *)(buf + i) &
+                    (*(const uint32_t *)(buf + i) - 0x01010101U)) &
+                    0x80808080U))
+        i += 4;
+#endif
+#endif
+    for (; i < size; i++)
+        if (!buf[i])
+            break;
+    return i;
+}
+
 av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
                              const int chroma_format_idc)
 {
@@ -133,6 +161,7 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
         H264_DSP(8);
         break;
     }
+    c->h264_find_start_code_candidate = h264_find_start_code_candidate_c;
 
     if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc);
     if (ARCH_PPC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc);
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 1f9f8fe..6249ba7 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -105,6 +105,15 @@ typedef struct H264DSPContext {
     /* bypass-transform */
     void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride);
     void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride);
+
+    /**
+     * Search buf from the start for up to size bytes. Return the index
+     * of a zero byte, or >= size if not found. Ideally, use lookahead
+     * to filter out any zero bytes that are known to not be followed by
+     * one or more further zero bytes and a one byte. Better still, filter
+     * out any bytes that form the trailing_zero_8bits syntax element too.
+     */
+    int (*h264_find_start_code_candidate)(const uint8_t *buf, int size);
 } H264DSPContext;
 
 void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,



More information about the ffmpeg-cvslog mailing list