[MN-dev] [mndiff]: r112 - trunk/noe/gfft.c

michael subversion at mplayerhq.hu
Fri Oct 24 16:28:04 CEST 2008


Author: michael
Date: Fri Oct 24 16:28:04 2008
New Revision: 112

Log:
Split radix gfft.


Modified:
   trunk/noe/gfft.c

Modified: trunk/noe/gfft.c
==============================================================================
--- trunk/noe/gfft.c	(original)
+++ trunk/noe/gfft.c	Fri Oct 24 16:28:04 2008
@@ -120,60 +120,97 @@ static inline void fft8(GFF4Element *p){
 static void fft16(GFF4Element *p){
     int n;
 
-    for(n=0; n<8; n++){
-        const unsigned int w= EXT(exp)[ n<<(SHIFT-4) ];
-        const unsigned int a= p[n    ];
-        const unsigned int b= p[n + 8];
+    for(n=0; n<4; n++){
+        const unsigned int w2= EXT(exp)[(  n+4)<<(SHIFT-4) ];
+        const unsigned int wx= EXT(exp)[(3*n+4)<<(SHIFT-4) ];
+              unsigned int a= p[n      ];
+              unsigned int b= p[n +   4];
+        const unsigned int c= p[n + 2*4];
+        const unsigned int d= p[n + 3*4];
 
-        p[n    ]= sum(a, b);
-        p[n + 8]= reduce(sum(a, SIZE - b)*w);
+        p[n      ]= sum (a, c);
+        a         = SIZE + (SIZE<<(SHIFT/2)) + ((a - c)<<(SHIFT/2));
+        p[n +   4]= sum (b, d);
+        b         = b - d;
+
+        p[n + 2*4]= reduce(reduce(a + b)*w2);
+        p[n + 3*4]= reduce(reduce(a - b)*wx);
     }
 
     fft8(p);
-    fft8(p+8);
+    fft4(p+8);
+    fft4(p+12);
 }
 
 static void fftn(GFF4Element *p, int logSize){
     int n;
-    const int size= 1<<(logSize-1);
+    const int size= 1<<(logSize-2);
 
     for(n=0; n<size; n++){
-        const unsigned int w= EXT(exp)[ n<<(SHIFT-logSize) ];
-        const unsigned int a= p[n       ];
-        const unsigned int b= p[n + size];
+        const unsigned int w2= EXT(exp)[(  n+size)<<(SHIFT-logSize ) ];
+        const unsigned int wx= EXT(exp)[(3*n+size)<<(SHIFT-logSize ) ];
+              unsigned int a= p[n         ];
+              unsigned int b= p[n +   size];
+        const unsigned int c= p[n + 2*size];
+        const unsigned int d= p[n + 3*size];
 
-        p[n       ]= sum(a, b);
-        p[n + size]= reduce(sum(a, SIZE - b)*w);
+        p[n         ]= sum (a, c);
+        a            = SIZE + (SIZE<<(SHIFT/2)) + ((a - c)<<(SHIFT/2));
+        p[n +   size]= sum (b, d);
+        b            = b - d;
+
+        p[n + 2*size]= reduce(reduce(a + b)*w2);
+        p[n + 3*size]= reduce(reduce(a - b)*wx);
     }
-    
-    if(logSize==5){
+
+    if(logSize==6){
+        fftn(p, logSize-1);
+        fft16(p+32);
+        fft16(p+48);
+    }else if(logSize==5){
         fft16(p);
-        fft16(p+16);
+        fft8 (p+16);
+        fft8 (p+24);
     }else{
-        fftn(p, logSize-1);
-        fftn(p+size, logSize-1);
+        fftn(p       , logSize-1);
+        fftn(p+2*size, logSize-2);
+        fftn(p+3*size, logSize-2);
     }
 }
 
 static void fftn2(GFF4Element *p, GFF4Element *src, int logSize){
     int n;
-    const int size= 1<<(logSize-1);
+    const int size= 1<<(logSize-2);
 
     for(n=0; n<size; n++){
-        const unsigned int w= EXT(exp)[ n<<(SHIFT-logSize) ];
-        const unsigned int a= src[n       ];
-        const unsigned int b= src[n + size];
+        const unsigned int w2= EXT(exp)[(  n+size)<<(SHIFT-logSize ) ];
+        const unsigned int wx= EXT(exp)[(3*n+size)<<(SHIFT-logSize ) ];
+              unsigned int a= src[n         ];
+              unsigned int b= src[n +   size];
+        const unsigned int c= src[n + 2*size];
+        const unsigned int d= src[n + 3*size];
 
-        p[n       ]= sum(a, b);
-        p[n + size]= reduce(sum(a, SIZE - b)*w);
+        p[n         ]= sum (a, c);
+        a            = SIZE + (SIZE<<(SHIFT/2)) + ((a - c)<<(SHIFT/2));
+        p[n +   size]= sum (b, d);
+        b            = b - d;
+
+        p[n + 2*size]= reduce(reduce(a + b)*w2);
+        p[n + 3*size]= reduce(reduce(a - b)*wx);
     }
-    
-    if(logSize==5){
+
+    if(logSize==6){
+        fftn(p, logSize-1);
+        fft16(p+32);
+        fft16(p+48);
+    }else if(logSize==5){
         fft16(p);
-        fft16(p+16);
+        fft8 (p+16);
+        fft8 (p+24);
     }else{
-        fftn(p, logSize-1);
-        fftn(p+size, logSize-1);
+        fftn(p       , logSize-1);
+        fftn(p+2*size, logSize-2);
+        fftn(p+3*size, logSize-2);
     }
 }
 
@@ -185,25 +222,83 @@ static inline void ifft2(GFF4Element *p)
     p[1]= sum(a, SIZE - b);
 }
 
+static inline void ifft4(GFF4Element *p){
+    unsigned int a,b,c,d;
+
+    a= p[0] + p[1];
+    b= p[0] - p[1] + SIZE + (SIZE<<(SHIFT/2));
+    c= p[2] + p[3];
+    d=(p[2] - p[3])<<(SHIFT/2);
+
+    p[0]= reduce(a+c);
+    p[1]= reduce(b+d);
+    p[2]= reduce(a-c + 2*SIZE);
+    p[3]= reduce(b-d);
+}
+
+static void ifft8(GFF4Element *p){
+    unsigned a,b,c,d,t;
+
+    ifft4(p);
+    ifft2(p+4);
+    ifft2(p+6);
+
+    a= p[0];
+    b= p[2] + SIZE + (SIZE<<(SHIFT/2));
+    c= p[4];
+    t= p[6];
+    d= (c - t)<<(SHIFT/2);
+    c= sum (c, t);
+
+    p[0]= sum(a, c);
+    p[4]= sum(a, SIZE - c);
+    p[2]= reduce(b + d);
+    p[6]= reduce(b - d);
+
+    a=        p[1] + (SIZE<<(SHIFT-1));
+    b=        p[3] + (SIZE<<(SHIFT-1));
+    c=        p[5]<<(SHIFT/4);
+    t= reduce(p[7]<<(SHIFT*3/4));
+    d= (t - c)<<(SHIFT/2);
+    c+= t;
+    p[1]= reduce(a - c);
+    p[5]= reduce(a + c);
+    p[3]= reduce(b + d);
+    p[7]= reduce(b - d);
+}
+
 static void ifftn(GFF4Element *p, int logSize){
     int n;
-    const int size= 1<<(logSize-1);
+    const int size= 1<<(logSize-2);
 
-    if(logSize==2){
-        ifft2(p);
-        ifft2(p+2);
+    if(logSize==4){
+        ifft8(p);
+        ifft4(p+8);
+        ifft4(p+12);
+    } else if(logSize==5){
+        ifftn(p       , logSize-1);
+        ifft8(p+16);
+        ifft8(p+24);
     }else{
-        ifftn(p, logSize-1);
-        ifftn(p+size, logSize-1);
+        ifftn(p       , logSize-1);
+        ifftn(p+2*size, logSize-2);
+        ifftn(p+3*size, logSize-2);
     }
     
     for(n=0; n<size; n++){
-        const unsigned int w= EXT(exp)[ MINUS1 - (n<<(SHIFT-logSize)) ];
-        const unsigned int a= p[n       ];
-        const unsigned int b= prod(p[n + size], w);
+        const unsigned int w1= EXT(exp)[ MINUS1 - ( n       <<(SHIFT-logSize)) ];
+        const unsigned int wx= EXT(exp)[ MINUS1 - ((3*n    )<<(SHIFT-logSize)) ];
+        const unsigned int a=        p[n         ];
+        const unsigned int b=        p[n +   size] + SIZE + (SIZE<<(SHIFT/2));
+              unsigned int c= reduce(p[n + 2*size]*w1);
+        const unsigned int t= reduce(p[n + 3*size]*wx);
+        unsigned int d= (c - t)<<(SHIFT/2);
+        c= sum (c, t);
 
-        p[n       ]= sum(a, b);
-        p[n + size]= sum(a, SIZE - b);
+        p[n         ]= sum(a, c);
+        p[n + 2*size]= sum(a, SIZE - c);
+        p[n + 1*size]= reduce(b + d);
+        p[n + 3*size]= reduce(b - d);
     }
     
 }



More information about the Mndiff-dev mailing list