[MN-dev] [mndiff]: r213 - in trunk/noe: galois.c gfft.c gfft.h rs.c

michael subversion at mplayerhq.hu
Fri Jul 23 03:25:01 CEST 2010


Author: michael
Date: Fri Jul 23 03:25:01 2010
New Revision: 213

Log:
add gfft_0padded()
a bit faster

Modified:
   trunk/noe/galois.c
   trunk/noe/gfft.c
   trunk/noe/gfft.h
   trunk/noe/rs.c

Modified: trunk/noe/galois.c
==============================================================================
--- trunk/noe/galois.c	Fri Jul 23 00:20:22 2010	(r212)
+++ trunk/noe/galois.c	Fri Jul 23 03:25:01 2010	(r213)
@@ -103,14 +103,8 @@ void EXT(prodPoly)(GFF4Element *dst, GFF
         GFF4Element temp[2][SIZE]; //[size]
         const GFF4Element scale= inv(size);
 
-        //FIXME avoid mem* (but note the memcpy/set only takes 2% of the prodPoly time)
-        memcpy(temp[0], src1, sizeof(GFF4Element)*(order1+1));
-        memset(temp[0] + order1 + 1, 0,  sizeof(GFF4Element)*(size - order1 - 1));
-        memcpy(temp[1], src2, sizeof(GFF4Element)*(order2+1));
-        memset(temp[1] + order2 + 1, 0,  sizeof(GFF4Element)*(size - order2 - 1));
-
-        EXT(gfft)(temp[0], temp[0], logSize);
-        EXT(gfft)(temp[1], temp[1], logSize);
+        EXT(gfft_0padded)(temp[0], src1, logSize, order1+1);
+        EXT(gfft_0padded)(temp[1], src2, logSize, order2+1);
         
         for(i=0; i<size; i++){
             temp[0][i]= prod(prod(temp[0][i], temp[1][i]), scale);
@@ -228,14 +222,8 @@ void EXT(partialProdPoly)(GFF4Element *d
         GFF4Element temp[SIZE]; //[size]
         const GFF4Element scale= inv(size);
 
-        //FIXME avoid mem* (but note the memcpy/set only takes 2% of the prodPoly time)
-        memcpy(temp, src2, sizeof(GFF4Element)*(order2+1));
-        memset(temp + order2 + 1, 0,  sizeof(GFF4Element)*(size - order2 - 1));
-        memcpy(dst, src1, sizeof(GFF4Element)*(order1+1));
-        memset(dst + order1 + 1, 0,  sizeof(GFF4Element)*(size - order1 - 1));
-
-        EXT(gfft)(dst, dst, logSize);
-        EXT(gfft)(temp, temp, logSize);
+        EXT(gfft_0padded)(temp, src2, logSize, order2+1);
+        EXT(gfft_0padded)(dst , src1, logSize, order1+1);
         
         for(i=0; i<size; i++){
             dst[i]= prod(prod(dst[i], temp[i]), scale);

Modified: trunk/noe/gfft.c
==============================================================================
--- trunk/noe/gfft.c	Fri Jul 23 00:20:22 2010	(r212)
+++ trunk/noe/gfft.c	Fri Jul 23 03:25:01 2010	(r213)
@@ -330,6 +330,15 @@ void EXT(gfft)(GFF4Element *dst, GFF4Ele
 //  int i, j, pass;
 //printf("%X %X\n", noe_exp[4096], noe_exp[3*4096]);
 
+    if(logSize<5){
+        memcpy(dst, src, sizeof(*dst)<<logSize);
+             if(logSize==1) fft2(dst);
+        else if(logSize==2) fft4(dst);
+        else if(logSize==3) fft8(dst);
+        else if(logSize==4) fft16(dst);
+        return;
+    }
+
     assert(logSize>=5);
 
     if(src==dst)
@@ -370,6 +379,25 @@ STOP_TIMER}
 #endif
 }
 
+void EXT(gfft_0padded)(GFF4Element *dst, GFF4Element *src, int logSize, int nzCoeffs){
+    int lognz= noe_log2(nzCoeffs-1)+1;
+    int i, j;
+
+    for(i=1; i<1<<(logSize-lognz); i++){
+        int idx= i<<lognz;
+        int m= bitReverse(i, SHIFT)>>lognz;
+        for(j=0; j<nzCoeffs; j++){
+            dst[idx+j]= prod(src[j], EXT(exp)[ j*m ]);
+        }
+        memset(dst+nzCoeffs+idx, 0, sizeof(*dst)*((1<<lognz)-nzCoeffs));
+        EXT(gfft)(dst+idx, dst+idx, lognz);
+    }
+
+    memcpy(dst, src, sizeof(*dst)*nzCoeffs);
+    memset(dst+nzCoeffs, 0, sizeof(*dst)*((1<<lognz)-nzCoeffs));
+    EXT(gfft)(dst, dst, lognz);
+}
+
 void EXT(igfft)(GFF4Element *dst, GFF4Element *src, int logSize){
     assert(logSize>=5);
 

Modified: trunk/noe/gfft.h
==============================================================================
--- trunk/noe/gfft.h	Fri Jul 23 00:20:22 2010	(r212)
+++ trunk/noe/gfft.h	Fri Jul 23 03:25:01 2010	(r213)
@@ -18,6 +18,7 @@
 
 void noe_gfft_init();
 void EXT(gfft)(GFF4Element *dst, GFF4Element *src, int size);
+void EXT(gfft_0padded)(GFF4Element *dst, GFF4Element *src, int logSize, int nzCoeffs);
 void EXT(igfft)(GFF4Element *dst, GFF4Element *src, int size);
 void EXT(permute)(GFF4Element *dst, GFF4Element *src, int outCount, int codeBits);
 

Modified: trunk/noe/rs.c
==============================================================================
--- trunk/noe/rs.c	Fri Jul 23 00:20:22 2010	(r212)
+++ trunk/noe/rs.c	Fri Jul 23 03:25:01 2010	(r213)
@@ -434,9 +434,8 @@ for(i=0; i<erasureCount; i++){
     }
 
     if(elfft){
-        memset(errorLocator + errorCount+2, 0, (codeCount - errorCount - 1)*sizeof(GFF4Element));
         errorLocator++;
-        EXT(gfft)(errorLocator, errorLocator, codeBits);
+        EXT(gfft_0padded)(errorLocator, errorLocator, codeBits, errorLocator[-1]+1);
 
         for(j=0,i=0; j<errorCount; i++){
             if(i >= codeCount)
@@ -466,13 +465,10 @@ for(i=0; i<erasureCount; i++){
     }
 
     if(gfftEval>1){
-        //FIXME
-        memset(omega + omega[0] + 2, 0, (codeCount - omega[0] - 1)*sizeof(GFF4Element));
-        memset(psi   + psi  [0] + 2, 0, (codeCount - psi  [0] - 1)*sizeof(GFF4Element));
         omega++;
         psi++;
-        EXT(gfft)(omega, omega, codeBits);
-        EXT(gfft)(psi  , psi  , codeBits);
+        EXT(gfft_0padded)(omega, omega, codeBits, omega[-1]+1);
+        EXT(gfft_0padded)(psi  , psi  , codeBits, psi  [-1]+1);
     }
 
     for(i=0; i<erasureCount + errorCount; i++){


More information about the Mndiff-dev mailing list