[FFmpeg-cvslog] avcodec/fft: Add revtab32 for FFTs with more than 65536 samples

Michael Niedermayer git at videolan.org
Fri Mar 4 16:06:14 CET 2016


ffmpeg | branch: master | Michael Niedermayer <michael at niedermayer.cc> | Fri Mar  4 15:39:55 2016 +0100| [305344d89e21ed11c74274167cf597f151778c42] | committer: Michael Niedermayer

avcodec/fft: Add revtab32 for FFTs with more than 65536 samples

x86 optimizations are used only for the cases they support (<=65536 samples)

Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=305344d89e21ed11c74274167cf597f151778c42
---

 libavcodec/fft.h          |    1 +
 libavcodec/fft_template.c |   31 ++++++++++++++++++++++++++-----
 libavcodec/x86/fft_init.c |    3 +++
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/libavcodec/fft.h b/libavcodec/fft.h
index 60df239..c858570 100644
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -110,6 +110,7 @@ struct FFTContext {
     void (*mdct_calcw)(struct FFTContext *s, FFTDouble *output, const FFTSample *input);
     enum fft_permutation_type fft_permutation;
     enum mdct_permutation_type mdct_permutation;
+    uint32_t *revtab32;
 };
 
 #if CONFIG_HARDCODED_TABLES
diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
index 2781a33..480557f 100644
--- a/libavcodec/fft_template.c
+++ b/libavcodec/fft_template.c
@@ -143,14 +143,23 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
 {
     int i, j, n;
 
+    s->revtab = NULL;
+    s->revtab32 = NULL;
+
     if (nbits < 2 || nbits > 17)
         goto fail;
     s->nbits = nbits;
     n = 1 << nbits;
 
-    s->revtab = av_malloc(n * sizeof(uint16_t));
-    if (!s->revtab)
-        goto fail;
+    if (nbits <= 16) {
+        s->revtab = av_malloc(n * sizeof(uint16_t));
+        if (!s->revtab)
+            goto fail;
+    } else {
+        s->revtab32 = av_malloc(n * sizeof(uint32_t));
+        if (!s->revtab32)
+            goto fail;
+    }
     s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
     if (!s->tmp_buf)
         goto fail;
@@ -192,16 +201,22 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
         fft_perm_avx(s);
     } else {
         for(i=0; i<n; i++) {
+            int k;
             j = i;
             if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
                 j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
-            s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j;
+            k = -split_radix_permutation(i, n, s->inverse) & (n-1);
+            if (s->revtab)
+                s->revtab[k] = j;
+            if (s->revtab32)
+                s->revtab32[k] = j;
         }
     }
 
     return 0;
  fail:
     av_freep(&s->revtab);
+    av_freep(&s->revtab32);
     av_freep(&s->tmp_buf);
     return -1;
 }
@@ -210,15 +225,21 @@ static void fft_permute_c(FFTContext *s, FFTComplex *z)
 {
     int j, np;
     const uint16_t *revtab = s->revtab;
+    const uint32_t *revtab32 = s->revtab32;
     np = 1 << s->nbits;
     /* TODO: handle split-radix permute in a more optimal way, probably in-place */
-    for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
+    if (revtab) {
+        for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
+    } else
+        for(j=0;j<np;j++) s->tmp_buf[revtab32[j]] = z[j];
+
     memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
 }
 
 av_cold void ff_fft_end(FFTContext *s)
 {
     av_freep(&s->revtab);
+    av_freep(&s->revtab32);
     av_freep(&s->tmp_buf);
 }
 
diff --git a/libavcodec/x86/fft_init.c b/libavcodec/x86/fft_init.c
index 5085f11..337f32d 100644
--- a/libavcodec/x86/fft_init.c
+++ b/libavcodec/x86/fft_init.c
@@ -26,6 +26,9 @@ av_cold void ff_fft_init_x86(FFTContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
 
+    if (s->nbits > 16)
+        return;
+
 #if ARCH_X86_32
     if (EXTERNAL_AMD3DNOW(cpu_flags)) {
         /* 3DNow! for K6-2/3 */



More information about the ffmpeg-cvslog mailing list