[FFmpeg-devel] [PATCH] libavcodec/fft_template: improve performance of the ff_fft_init in fft_template
Steven Liu
lq at chinaffmpeg.org
Mon Dec 17 11:13:05 EET 2018
After patch:
init nbits = 17, get 10000 samples, duration: 15221
Before patch:
init nbits = 17, get 10000 samples, duration: 16105
test script:
DURATION=0
for((i=0;i<10000;i++)) do
./libavcodec/tests/fft -n 17 &>output
T_DURATION=`grep "duration" output | awk -F"=" '{ print $2}'`
DURATION=`expr $DURATION + $T_DURATION`
done
TOTAL=`expr $DURATION / 10000`
echo $TOTAL
Signed-off-by: Steven Liu <lq at chinaffmpeg.org>
---
libavcodec/fft_template.c | 27 +++++++++++++++++----------
1 file changed, 17 insertions(+), 10 deletions(-)
diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
index 762c014bc8..5f6f52275d 100644
--- a/libavcodec/fft_template.c
+++ b/libavcodec/fft_template.c
@@ -257,21 +257,28 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
}
#endif /* FFT_FIXED_32 */
+#define SPLIT_RADIX_PERMUTATION(num) \
+ for(i=0; i<n; i++) { \
+ int k; \
+ j = i; \
+ if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) \
+ j = (j&~3) | ((j>>1)&1) | ((j<<1)&2); \
+ k = -split_radix_permutation(i, n, s->inverse) & (n-1); \
+ j = (j&~3) | ((j>>1)&1) | ((j<<1)&2); \
+ k = -split_radix_permutation(i, n, s->inverse) & (n-1); \
+ s->revtab##num[k] = j; \
+ }
if (s->fft_permutation == FF_FFT_PERM_AVX) {
fft_perm_avx(s);
} else {
- for(i=0; i<n; i++) {
- int k;
- j = i;
- if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
- j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
- k = -split_radix_permutation(i, n, s->inverse) & (n-1);
- if (s->revtab)
- s->revtab[k] = j;
- if (s->revtab32)
- s->revtab32[k] = j;
+ if (s->revtab) {
+ SPLIT_RADIX_PERMUTATION()
+ }
+ if (s->revtab32) {
+ SPLIT_RADIX_PERMUTATION(32)
}
+#undef SPLIT_RADIX_PERMUTATION
}
return 0;
--
2.15.2 (Apple Git-101.1)
More information about the ffmpeg-devel
mailing list