[FFmpeg-devel] [PATCHv2] lavc/cbrt_tablegen: speed up tablegen

Ganesh Ajjanagadde gajjanagadde at gmail.com
Tue Jan 5 03:33:59 CET 2016


This exploits an approach based on the sieve of Eratosthenes, a popular
method for generating prime numbers.

Tables are identical to previous ones.

Tested with FATE with/without --enable-hardcoded-tables.

Sample benchmark (Haswell, GNU/Linux+gcc):
prev:
7860100 decicycles in cbrt_tableinit,       1 runs,      0 skips
7777490 decicycles in cbrt_tableinit,       2 runs,      0 skips
[...]
7582339 decicycles in cbrt_tableinit,     256 runs,      0 skips
7563556 decicycles in cbrt_tableinit,     512 runs,      0 skips

new:
2099480 decicycles in cbrt_tableinit,       1 runs,      0 skips
2044470 decicycles in cbrt_tableinit,       2 runs,      0 skips
[...]
1796544 decicycles in cbrt_tableinit,     256 runs,      0 skips
1791631 decicycles in cbrt_tableinit,     512 runs,      0 skips

Both small and large run count given as this is called once so small run
count may give a better picture, small numbers are fairly consistent,
and there is a consistent downward trend from small to large runs,
at which point it stabilizes to a new value.

Signed-off-by: Ganesh Ajjanagadde <gajjanagadde at gmail.com>
---
 libavcodec/aacdec_fixed.c           |  4 +--
 libavcodec/aacdec_template.c        |  2 +-
 libavcodec/cbrt_tablegen.h          | 53 ++++++++++++++++++++++++++-----------
 libavcodec/cbrt_tablegen_template.c | 12 ++++++++-
 4 files changed, 51 insertions(+), 20 deletions(-)

diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c
index 396a874..f7b882b 100644
--- a/libavcodec/aacdec_fixed.c
+++ b/libavcodec/aacdec_fixed.c
@@ -155,9 +155,9 @@ static void vector_pow43(int *coefs, int len)
     for (i=0; i<len; i++) {
         coef = coefs[i];
         if (coef < 0)
-            coef = -(int)cbrt_tab[-coef];
+            coef = -(int)cbrt_tab[-coef].i;
         else
-            coef = (int)cbrt_tab[coef];
+            coef = (int)cbrt_tab[coef].i;
         coefs[i] = coef;
     }
 }
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index d819958..1380510 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -1791,7 +1791,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, INTFLOAT coef[1024],
                                         v = -v;
                                     *icf++ = v;
 #else
-                                    *icf++ = cbrt_tab[n] | (bits & 1U<<31);
+                                    *icf++ = cbrt_tab[n].i | (bits & 1U<<31);
 #endif /* USE_FIXED */
                                     bits <<= 1;
                                 } else {
diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_tablegen.h
index 59b5a1d..e3d6634 100644
--- a/libavcodec/cbrt_tablegen.h
+++ b/libavcodec/cbrt_tablegen.h
@@ -26,14 +26,13 @@
 #include <stdint.h>
 #include <math.h>
 #include "libavutil/attributes.h"
+#include "libavutil/intfloat.h"
 #include "libavcodec/aac_defines.h"
 
-#if USE_FIXED
-#define CBRT(x) lrint((x).f * 8192)
-#else
-#define CBRT(x) x.i
-#endif
-
+union ff_int32float64 {
+    uint32_t i;
+    double   f;
+};
 #if CONFIG_HARDCODED_TABLES
 #if USE_FIXED
 #define cbrt_tableinit_fixed()
@@ -43,20 +42,42 @@
 #include "libavcodec/cbrt_tables.h"
 #endif
 #else
-static uint32_t cbrt_tab[1 << 13];
+static union ff_int32float64 cbrt_tab[1 << 13];
 
 static av_cold void AAC_RENAME(cbrt_tableinit)(void)
 {
-    if (!cbrt_tab[(1<<13) - 1]) {
-        int i;
-        for (i = 0; i < 1<<13; i++) {
-            union {
-                float f;
-                uint32_t i;
-            } f;
-            f.f = cbrt(i) * i;
-            cbrt_tab[i] = CBRT(f);
+    int i, j, k;
+    double cbrt_val;
+
+    if (!cbrt_tab[(1<<13) - 1].i) {
+        cbrt_tab[0].f = 0;
+        for (i = 1; i < 1<<13; i++)
+            cbrt_tab[i].f = 1;
+
+        /* have to worry about non-squarefree numbers */
+        for (i = 2; i < 90; i++) {
+            if (cbrt_tab[i].f == 1) {
+                cbrt_val = i * cbrt(i);
+                for (k = i; k < (1<<13); k*= i)
+                    for (j = k; j < (1<<13); j+=k)
+                        cbrt_tab[j].f *= cbrt_val;
+            }
         }
+
+        for (i = 91; i <= 8191; i+=2) {
+            if (cbrt_tab[i].f == 1) {
+                cbrt_val = i * cbrt(i);
+                for (j = i; j < (1<<13); j+=i)
+                    cbrt_tab[j].f *= cbrt_val;
+            }
+        }
+#if USE_FIXED
+        for (i = 0; i < 1<<13; i++)
+            cbrt_tab[i].i = lrint(cbrt_tab[i].f * 8192);
+#else
+        for (i = 0; i < 1<<13; i++)
+            cbrt_tab[i].i = av_float2int((float)cbrt_tab[i].f);
+#endif
     }
 }
 #endif /* CONFIG_HARDCODED_TABLES */
diff --git a/libavcodec/cbrt_tablegen_template.c b/libavcodec/cbrt_tablegen_template.c
index 7dcab91..5abcaba 100644
--- a/libavcodec/cbrt_tablegen_template.c
+++ b/libavcodec/cbrt_tablegen_template.c
@@ -28,11 +28,21 @@
 
 int main(void)
 {
+    const int array_size = FF_ARRAY_ELEMS(cbrt_tab);
+    int i;
     AAC_RENAME(cbrt_tableinit)();
 
     write_fileheader();
 
-    WRITE_ARRAY("static const", uint32_t, cbrt_tab);
+    printf("static const union ff_int32float64 cbrt_tab[%d] = {\n", array_size);
+    printf("   ");
+    for (i = 0; i < array_size - 1; i++) {
+        printf(" {.i = 0x%08"PRIx32"},", cbrt_tab[i].i);
+        if ((i & 7) == 7)
+            printf("\n   ");
+    }
+    printf(" {.i = 0x%08"PRIx32"}\n", cbrt_tab[i].i);
+    printf("};\n");
 
     return 0;
 }
-- 
2.6.4



More information about the ffmpeg-devel mailing list