[FFmpeg-devel] [PATCH 1/5] startcode: Use common macro and switch to pointer arithmetic

Andreas Rheinhardt andreas.rheinhardt at gmail.com
Sun Jun 9 14:00:49 EEST 2019


The reasons are cosmetics and preparation for future patches that will
have even more cases and whose performance improves when switching to
direct pointer arithmetic: Benchmarks have shown that using pointers
directly instead of indexing to access the array to be about 5% faster
(33665 vs. 31806 for a 7.4 Mb/s H.264 file based on 10 iterations of
131072 runs each; and 244356 vs 233373 for a 30.2 Mb/s H.264 file based
on 10 iterations with 8192 runs each).

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at gmail.com>
---
 libavcodec/startcode.c | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/libavcodec/startcode.c b/libavcodec/startcode.c
index 9efdffe8c6..a55a8fafa6 100644
--- a/libavcodec/startcode.c
+++ b/libavcodec/startcode.c
@@ -27,31 +27,32 @@
 
 #include "startcode.h"
 #include "config.h"
+#include "libavutil/intreadwrite.h"
 
 int ff_startcode_find_candidate_c(const uint8_t *buf, int size)
 {
-    int i = 0;
+    const uint8_t *start = buf, *end = buf + size;
+
 #if HAVE_FAST_UNALIGNED
-    /* we check i < size instead of i + 3 / 7 because it is
-     * simpler and there must be AV_INPUT_BUFFER_PADDING_SIZE
-     * bytes at the end.
-     */
+#define READ(bitness) AV_RN ## bitness
+#define MAIN_LOOP(bitness, mask1, mask2) do {                              \
+        /* we check p < end instead of p + 3 / 7 because it is
+         * simpler and there must be AV_INPUT_BUFFER_PADDING_SIZE
+         * bytes at the end. */                                            \
+        for (; buf < end; buf += bitness / 8)                              \
+            if ((~READ(bitness)(buf) & (READ(bitness)(buf) - mask1))       \
+                                     & mask2)                              \
+                break;                                                     \
+    } while (0)
+
 #if HAVE_FAST_64BIT
-    while (i < size &&
-            !((~*(const uint64_t *)(buf + i) &
-                    (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
-                    0x8080808080808080ULL))
-        i += 8;
+    MAIN_LOOP(64, 0x0101010101010101ULL, 0x8080808080808080ULL);
 #else
-    while (i < size &&
-            !((~*(const uint32_t *)(buf + i) &
-                    (*(const uint32_t *)(buf + i) - 0x01010101U)) &
-                    0x80808080U))
-        i += 4;
+    MAIN_LOOP(32, 0x01010101U, 0x80808080U);
 #endif
 #endif
-    for (; i < size; i++)
-        if (!buf[i])
+    for (; buf < end; buf++)
+        if (!*buf)
             break;
-    return i;
+    return buf - start;
 }
-- 
2.21.0



More information about the ffmpeg-devel mailing list