[MN-dev] [mndiff]: r211 - trunk/noe/gfft.c

michael subversion at mplayerhq.hu
Fri Jul 23 00:20:21 CEST 2010


Author: michael
Date: Fri Jul 23 00:20:21 2010
New Revision: 211

Log:
port optimizations from fft8 to ifft8

Modified:
   trunk/noe/gfft.c

Modified: trunk/noe/gfft.c
==============================================================================
--- trunk/noe/gfft.c	Fri Jul 23 00:20:20 2010	(r210)
+++ trunk/noe/gfft.c	Fri Jul 23 00:20:21 2010	(r211)
@@ -222,51 +222,47 @@ static inline void ifft2(GFF4Element *p)
     p[1]= sum(a, SIZE - b);
 }
 
-static inline void ifft4(GFF4Element *p){
+static inline void ifft4(GFF4Element *p, int step){
     unsigned int a,b,c,d;
 
     d= p[0] + SIZE*(SIZE/2ULL);
 
-    a= d + p[1];
-    b= d - p[1];
-    c= p[2] + p[3];
-    d=(p[2] - p[3])<<(SHIFT/2);
+    a= d   +p[step];
+    c= d   -p[step];
+    b=  p[2*step] + p[3*step];
+    d= (p[2*step] - p[3*step])<<(SHIFT/2) /*(p[1] - p[3])*noe_exp[16384]*/;
 
-    p[0]= reduce(a+c);
-    p[2]= reduce(a-c);
-    p[1]= reduce(b+d);
-    p[3]= reduce(b-d);
+    p[0     ]= reduce(a+b);
+    p[2*step]= reduce(a-b);
+    p[1*step]= reduce(c+d);
+    p[3*step]= reduce(c-d);
 }
 
-static void ifft8(GFF4Element *p){
-    unsigned a,b,c,d,t;
-
-    ifft4(p);
-    ifft2(p+4);
-    ifft2(p+6);
+static inline void ifft8(GFF4Element *p){
+    unsigned int a,b;
 
     a= p[0];
-    b= p[2] + SIZE + (SIZE<<(SHIFT/2));
-    c= p[4];
-    t= p[6];
-    d= (c - t)<<(SHIFT/2);
-    c= sum (c, t);
+    b= p[1];
+    p[0]= a+b;
+    p[1]= a-b;
 
-    p[0]= sum(a, c);
-    p[4]= sum(a, SIZE - c);
-    p[2]= reduce(b + d);
-    p[6]= reduce(b - d);
+    a= p[6];
+    b= p[7];
+    p[6]= a+b;
+    p[7]= hsreduce(((b-a)<<(SHIFT*3/4)))/*(a-b)*noe_exp[8192]*/;
 
-    a=        p[1] + (SIZE<<(SHIFT-1));
-    b=        p[3] + (SIZE<<(SHIFT-1));
-    c=        p[5]<<(SHIFT/4);
-    t= reduce(p[7]<<(SHIFT*3/4));
-    d= (t - c)<<(SHIFT/2);
-    c+= t;
-    p[1]= reduce(a - c);
-    p[5]= reduce(a + c);
-    p[3]= reduce(b + d);
-    p[7]= reduce(b - d);
+    a= p[2];
+    b= p[3];
+    p[2]= a+b;
+    p[3]= (a-b)<<(SHIFT/2) /*(a-b)*noe_exp[16384]*/;
+
+    a= p[4];
+    b= p[5];
+    p[4]= a+b;
+    p[5]= (b-a)<<(SHIFT/4)/*(a-b)*noe_exp[3*8192]*/;
+
+    ifft4(p  ,2);
+    ifft4(p+1,2);
 }
 
 static void ifftn(GFF4Element *p, int logSize){
@@ -275,8 +271,8 @@ static void ifftn(GFF4Element *p, int lo
 
     if(logSize==4){
         ifft8(p);
-        ifft4(p+8);
-        ifft4(p+12);
+        ifft4(p+8 ,1);
+        ifft4(p+12,1);
     } else if(logSize==5){
         ifftn(p       , logSize-1);
         ifft8(p+16);


More information about the Mndiff-dev mailing list