[FFmpeg-devel] Fixpoint FFT optimization, with MDCT and IMDCT wrappers for audio optimization
Michael Niedermayer
michaelni
Sat Jul 28 03:08:41 CEST 2007
Hi
On Fri, Jul 27, 2007 at 05:40:14PM -0400, mmh wrote:
[...]
> +/*
> + This is a fixedpoint inplace 32bit FFT which accepts 3 arguments:
> +
> + @param X - input signal in format 1.31
> + @param W - phase factors in 1.31 format
> + @param lgN - log_2(N) where N is the size of the input data set.
> +
> +*/
> +void ff_fft32 (FFTContext *c, FFTComplex32 *X)
> +{
> + FFTComplex16 *W = c->exptab16;
> + int lgN = c->nbits;
> + int N = 1<<lgN;
> + int s, j, k, m, w, ws;
> + int64_t wwr,wwi;
> +
> + for (s=1; s<=lgN; s++) {
> + m = 1<<s;
> + ws = 1<<(lgN-s);
> + w=0;
> + for (j=0; j<m/2; j++) {
> + wwr=W[w].re;
> + wwi=W[w].im;
> + w+=ws;
> + for (k=j; k<N; k+=m) {
> + long tr,ti;
> + int k2 = k+m/2;
signed and /2 is slow either changed to unsiged or use >>1
> +
> + tr = (X[k2].re*wwr - X[k2].im*wwi + 0x4000)>>15;
> + ti = (X[k2].re*wwi + X[k2].im*wwr + 0x4000)>>15;
> +
> + X[k2].re = (X[k].re - tr);
> + X[k2].im = (X[k].im - ti);
> +
> + X[k].re = (X[k].re + tr);
> + X[k].im = (X[k].im + ti);
> + }
> + }
> + }
> +}
what about implementing a split radix fft?
(http://en.wikipedia.org/wiki/Split-radix_FFT)
also several iterations have trivial (1/-1/0) w* these should not be done
in this generic way
> +
> +/*
> + use same number of bits of precision as floating point.
> +*/
> +#define PRECISION 23
> +#define HALF (1<<PRECISION)
> +void ff_fft32x32 (FFTContext *c, FFTComplex32 *X)
> +{
> + FFTComplex32 *W = c->exptab32;
> + int lgN = c->nbits;
> + int N = 1<<lgN;
> + int s, j, k, m, w, ws;
> + int64_t wwr,wwi;
> +
> +
> + for (s=1; s<=lgN; s++) {
> + m = 1<<s;
> + ws = 1<<(lgN-s);
> + w=0;
> + for (j=0; j<m/2; j++) {
> + wwr=W[w].re;
> + wwi=W[w].im;
> + w+=ws;
> + for (k=j; k<N; k+=m) {
> + long tr,ti;
> + int k2 = k+m/2;
> +
> + tr = (X[k2].re*wwr - X[k2].im*wwi + HALF)>>PRECISION;
> + ti = (X[k2].re*wwi + X[k2].im*wwr + HALF)>>PRECISION;
what about a
re+=re;
im+=im;
tr= ((re*wwr)>>32) - ((im*wwi)>>32);
ti= ((re*wwi)>>32) + ((im*wwr)>>32);
this should be much faster if a 32*32->high 32 instruction is available
> +
> + X[k2].re = (X[k].re - tr);
> + X[k2].im = (X[k].im - ti);
> +
> + X[k].re = (X[k].re + tr);
> + X[k].im = (X[k].im + ti);
> + }
> + }
> + }
> +}
> +
> +
> +
> +
trailing whitespace
> +/*
> + Generate 16b coefficient table from a predefined floating point table.
> +*/
not a proper doxygen comment
> +static FFTComplex16 *stwids (FFTComplex *w, int n) {
> + int i;
> + FFTComplex16 *v = av_malloc (sizeof (short)*n);
> + for (i = 0; i < n/2; i++) {
> + v[i].re = w[i].re*32767;
> + v[i].im = w[i].im*32767;
> + }
this should be 32768 with proper cliping
[...]
> + s->fft_calc = (void *)ff_fft32;
unneeded cast or your code is buggy
> +}
inconsistant indention
[...]
> +/* complex multiplication: p = a * b */
> +#define CMUL(pre, pim, are, aim, bre, bim) \
> +{\
> + int32_t _are = (are);\
> + int32_t _aim = (aim);\
> + int64_t _bre = (bre);\
> + int64_t _bim = (bim);\
> + (pre) = (_are * _bre - _aim * _bim + 0x4000)>>15;\
> + (pim) = (_are * _bim + _aim * _bre + 0x4000)>>15;\
stuff begining with _ is reserved
[...]
> -
> +#undef random
> float frandom(void)
doesnt belong in this patch
[...]
> Index: libavcodec/fft.c
> ===================================================================
> --- libavcodec/fft.c (revision 9807)
> +++ libavcodec/fft.c (working copy)
> @@ -109,7 +109,7 @@
> }
> nblocks = nblocks >> 1;
> } while (nblocks != 0);
> - av_freep(&s->exptab);
> +// av_freep(&s->exptab);
> }
hmm
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Democracy is the form of government in which you can choose your dictator
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20070728/ace41a30/attachment.pgp>
More information about the ffmpeg-devel
mailing list