[FFmpeg-devel] [PATCHv2] lavc/cbrt_tablegen: speed up tablegen

Ganesh Ajjanagadde gajjanagadde at gmail.com
Thu Jan 7 19:55:53 CET 2016


On Mon, Jan 4, 2016 at 6:33 PM, Ganesh Ajjanagadde
<gajjanagadde at gmail.com> wrote:
> This exploits an approach based on the sieve of Eratosthenes, a popular
> method for generating prime numbers.
>
> Tables are identical to previous ones.
>
> Tested with FATE with/without --enable-hardcoded-tables.
>
> Sample benchmark (Haswell, GNU/Linux+gcc):
> prev:
> 7860100 decicycles in cbrt_tableinit,       1 runs,      0 skips
> 7777490 decicycles in cbrt_tableinit,       2 runs,      0 skips
> [...]
> 7582339 decicycles in cbrt_tableinit,     256 runs,      0 skips
> 7563556 decicycles in cbrt_tableinit,     512 runs,      0 skips
>
> new:
> 2099480 decicycles in cbrt_tableinit,       1 runs,      0 skips
> 2044470 decicycles in cbrt_tableinit,       2 runs,      0 skips
> [...]
> 1796544 decicycles in cbrt_tableinit,     256 runs,      0 skips
> 1791631 decicycles in cbrt_tableinit,     512 runs,      0 skips
>
> Both small and large run count given as this is called once so small run
> count may give a better picture, small numbers are fairly consistent,
> and there is a consistent downward trend from small to large runs,
> at which point it stabilizes to a new value.
>
> Signed-off-by: Ganesh Ajjanagadde <gajjanagadde at gmail.com>
> ---
>  libavcodec/aacdec_fixed.c           |  4 +--
>  libavcodec/aacdec_template.c        |  2 +-
>  libavcodec/cbrt_tablegen.h          | 53 ++++++++++++++++++++++++++-----------
>  libavcodec/cbrt_tablegen_template.c | 12 ++++++++-
>  4 files changed, 51 insertions(+), 20 deletions(-)
>
> diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c
> index 396a874..f7b882b 100644
> --- a/libavcodec/aacdec_fixed.c
> +++ b/libavcodec/aacdec_fixed.c
> @@ -155,9 +155,9 @@ static void vector_pow43(int *coefs, int len)
>      for (i=0; i<len; i++) {
>          coef = coefs[i];
>          if (coef < 0)
> -            coef = -(int)cbrt_tab[-coef];
> +            coef = -(int)cbrt_tab[-coef].i;
>          else
> -            coef = (int)cbrt_tab[coef];
> +            coef = (int)cbrt_tab[coef].i;
>          coefs[i] = coef;
>      }
>  }
> diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
> index d819958..1380510 100644
> --- a/libavcodec/aacdec_template.c
> +++ b/libavcodec/aacdec_template.c
> @@ -1791,7 +1791,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, INTFLOAT coef[1024],
>                                          v = -v;
>                                      *icf++ = v;
>  #else
> -                                    *icf++ = cbrt_tab[n] | (bits & 1U<<31);
> +                                    *icf++ = cbrt_tab[n].i | (bits & 1U<<31);
>  #endif /* USE_FIXED */
>                                      bits <<= 1;
>                                  } else {
> diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_tablegen.h
> index 59b5a1d..e3d6634 100644
> --- a/libavcodec/cbrt_tablegen.h
> +++ b/libavcodec/cbrt_tablegen.h
> @@ -26,14 +26,13 @@
>  #include <stdint.h>
>  #include <math.h>
>  #include "libavutil/attributes.h"
> +#include "libavutil/intfloat.h"
>  #include "libavcodec/aac_defines.h"
>
> -#if USE_FIXED
> -#define CBRT(x) lrint((x).f * 8192)
> -#else
> -#define CBRT(x) x.i
> -#endif
> -
> +union ff_int32float64 {
> +    uint32_t i;
> +    double   f;
> +};
>  #if CONFIG_HARDCODED_TABLES
>  #if USE_FIXED
>  #define cbrt_tableinit_fixed()
> @@ -43,20 +42,42 @@
>  #include "libavcodec/cbrt_tables.h"
>  #endif
>  #else
> -static uint32_t cbrt_tab[1 << 13];
> +static union ff_int32float64 cbrt_tab[1 << 13];
>
>  static av_cold void AAC_RENAME(cbrt_tableinit)(void)
>  {
> -    if (!cbrt_tab[(1<<13) - 1]) {
> -        int i;
> -        for (i = 0; i < 1<<13; i++) {
> -            union {
> -                float f;
> -                uint32_t i;
> -            } f;
> -            f.f = cbrt(i) * i;
> -            cbrt_tab[i] = CBRT(f);
> +    int i, j, k;
> +    double cbrt_val;
> +
> +    if (!cbrt_tab[(1<<13) - 1].i) {
> +        cbrt_tab[0].f = 0;
> +        for (i = 1; i < 1<<13; i++)
> +            cbrt_tab[i].f = 1;
> +
> +        /* have to worry about non-squarefree numbers */
> +        for (i = 2; i < 90; i++) {
> +            if (cbrt_tab[i].f == 1) {
> +                cbrt_val = i * cbrt(i);
> +                for (k = i; k < (1<<13); k*= i)
> +                    for (j = k; j < (1<<13); j+=k)
> +                        cbrt_tab[j].f *= cbrt_val;
> +            }
>          }
> +
> +        for (i = 91; i <= 8191; i+=2) {
> +            if (cbrt_tab[i].f == 1) {
> +                cbrt_val = i * cbrt(i);
> +                for (j = i; j < (1<<13); j+=i)
> +                    cbrt_tab[j].f *= cbrt_val;
> +            }
> +        }
> +#if USE_FIXED
> +        for (i = 0; i < 1<<13; i++)
> +            cbrt_tab[i].i = lrint(cbrt_tab[i].f * 8192);
> +#else
> +        for (i = 0; i < 1<<13; i++)
> +            cbrt_tab[i].i = av_float2int((float)cbrt_tab[i].f);
> +#endif
>      }
>  }
>  #endif /* CONFIG_HARDCODED_TABLES */
> diff --git a/libavcodec/cbrt_tablegen_template.c b/libavcodec/cbrt_tablegen_template.c
> index 7dcab91..5abcaba 100644
> --- a/libavcodec/cbrt_tablegen_template.c
> +++ b/libavcodec/cbrt_tablegen_template.c
> @@ -28,11 +28,21 @@
>
>  int main(void)
>  {
> +    const int array_size = FF_ARRAY_ELEMS(cbrt_tab);
> +    int i;
>      AAC_RENAME(cbrt_tableinit)();
>
>      write_fileheader();
>
> -    WRITE_ARRAY("static const", uint32_t, cbrt_tab);
> +    printf("static const union ff_int32float64 cbrt_tab[%d] = {\n", array_size);
> +    printf("   ");
> +    for (i = 0; i < array_size - 1; i++) {
> +        printf(" {.i = 0x%08"PRIx32"},", cbrt_tab[i].i);
> +        if ((i & 7) == 7)
> +            printf("\n   ");
> +    }
> +    printf(" {.i = 0x%08"PRIx32"}\n", cbrt_tab[i].i);
> +    printf("};\n");
>
>      return 0;
>  }
> --
> 2.6.4
>

ping


More information about the ffmpeg-devel mailing list