[FFmpeg-devel] [PATCH] avcodec/fft: out of place permutation with av_fft_permute2

Muhammad Faiz mfcc64 at gmail.com
Tue Oct 13 07:37:49 CEST 2015


-------------- next part --------------
From a403f93a8fa20ce0d7345d9a00d75bc90fe7d73f Mon Sep 17 00:00:00 2001
From: Muhammad Faiz <mfcc64 at gmail.com>
Date: Tue, 13 Oct 2015 00:31:29 +0700
Subject: [PATCH] avcodec/fft: out of place permutation with av_fft_permute2

with optimization (more cache friendly)
also optimize av_fft_permute
machine specific code (unfortunately) is not touched
speedup (at least on my machine, bits = 14):
with av_fft_permute  ~ 1.5x - 2x times
with av_fft_permute2 ~ 2.5x - 3x times
---
 libavcodec/avfft.c        |  5 +++++
 libavcodec/avfft.h        |  3 +++
 libavcodec/fft.h          |  2 ++
 libavcodec/fft_template.c | 34 +++++++++++++++++++++++++++++-----
 libavcodec/version.h      |  2 +-
 5 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 675d2b9..6b33ab5 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -40,6 +40,11 @@ void av_fft_permute(FFTContext *s, FFTComplex *z)
     s->fft_permute(s, z);
 }
 
+void av_fft_permute2(FFTContext *s, FFTComplex *dst, const FFTComplex *src)
+{
+    s->fft_permute2(s, dst, src);
+}
+
 void av_fft_calc(FFTContext *s, FFTComplex *z)
 {
     s->fft_calc(s, z);
diff --git a/libavcodec/avfft.h b/libavcodec/avfft.h
index 0c0f9b8..31d5420 100644
--- a/libavcodec/avfft.h
+++ b/libavcodec/avfft.h
@@ -52,6 +52,9 @@ FFTContext *av_fft_init(int nbits, int inverse);
  */
 void av_fft_permute(FFTContext *s, FFTComplex *z);
 
+/* out of place permutation */
+void av_fft_permute2(FFTContext *s, FFTComplex *dst, const FFTComplex *src);
+
 /**
  * Do a complex FFT with the parameters defined in av_fft_init(). The
  * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
diff --git a/libavcodec/fft.h b/libavcodec/fft.h
index 64f0f63..c7f2bdb 100644
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -110,6 +110,8 @@ struct FFTContext {
     void (*mdct_calcw)(struct FFTContext *s, FFTDouble *output, const FFTSample *input);
     enum fft_permutation_type fft_permutation;
     enum mdct_permutation_type mdct_permutation;
+    /* out of place permutation */
+    void (*fft_permute2)(struct FFTContext *s, FFTComplex *dst, const FFTComplex* src);
 };
 
 #if CONFIG_HARDCODED_TABLES
diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
index 23ea453..00e652b 100644
--- a/libavcodec/fft_template.c
+++ b/libavcodec/fft_template.c
@@ -72,6 +72,8 @@ COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
 #endif /* FFT_FIXED_32 */
 
 static void fft_permute_c(FFTContext *s, FFTComplex *z);
+static void fft_permute2_c(FFTContext *s, FFTComplex *dst, const FFTComplex *src);
+static void fft_permute2_wrapper_c(FFTContext *s, FFTComplex *dst, const FFTComplex *src);
 static void fft_calc_c(FFTContext *s, FFTComplex *z);
 
 static int split_radix_permutation(int i, int n, int inverse)
@@ -156,6 +158,7 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
     s->fft_permutation = FF_FFT_PERM_DEFAULT;
 
     s->fft_permute = fft_permute_c;
+    s->fft_permute2= fft_permute2_c;
     s->fft_calc    = fft_calc_c;
 #if CONFIG_MDCT
     s->imdct_calc  = ff_imdct_calc_c;
@@ -197,6 +200,9 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
         }
     }
 
+    if (s->fft_permute != fft_permute_c && s->fft_permute2 == fft_permute2_c)
+        s->fft_permute2 = fft_permute2_wrapper_c;
+
     return 0;
  fail:
     av_freep(&s->revtab);
@@ -206,12 +212,30 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
 
 static void fft_permute_c(FFTContext *s, FFTComplex *z)
 {
-    int j, np;
+    int n = 1 << s->nbits;
+    fft_permute2_c(s, s->tmp_buf, z);
+    memcpy(z, s->tmp_buf, n * sizeof(FFTComplex));
+}
+
+static void fft_permute2_c(FFTContext *s, FFTComplex *dst, const FFTComplex *src)
+{
+    int j, n, q;
     const uint16_t *revtab = s->revtab;
-    np = 1 << s->nbits;
-    /* TODO: handle split-radix permute in a more optimal way, probably in-place */
-    for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
-    memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
+    n = 1 << s->nbits;
+    q = n >> 2;
+    for (j = 0; j < q; j++) {
+        dst[revtab[j    ]] = src[j    ];
+        dst[revtab[j+  q]] = src[j+  q];
+        dst[revtab[j+2*q]] = src[j+2*q];
+        dst[revtab[j+3*q]] = src[j+3*q];
+    }
+}
+
+/* for fft_permute other than fft_permute_c */
+static void fft_permute2_wrapper_c(FFTContext *s, FFTComplex *dst, const FFTComplex *src)
+{
+    memcpy(dst, src, (1 << s->nbits) * sizeof(FFTComplex));
+    s->fft_permute(s, dst);
 }
 
 av_cold void ff_fft_end(FFTContext *s)
diff --git a/libavcodec/version.h b/libavcodec/version.h
index c7fc1f1..953ff9f 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR  57
-#define LIBAVCODEC_VERSION_MINOR   5
+#define LIBAVCODEC_VERSION_MINOR   6
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
-- 
1.8.3.1



More information about the ffmpeg-devel mailing list