[MN-dev] [mndiff]: r213 - in trunk/noe: galois.c gfft.c gfft.h rs.c
michael
subversion at mplayerhq.hu
Fri Jul 23 03:25:01 CEST 2010
Author: michael
Date: Fri Jul 23 03:25:01 2010
New Revision: 213
Log:
add gfft_0padded()
a bit faster
Modified:
trunk/noe/galois.c
trunk/noe/gfft.c
trunk/noe/gfft.h
trunk/noe/rs.c
Modified: trunk/noe/galois.c
==============================================================================
--- trunk/noe/galois.c Fri Jul 23 00:20:22 2010 (r212)
+++ trunk/noe/galois.c Fri Jul 23 03:25:01 2010 (r213)
@@ -103,14 +103,8 @@ void EXT(prodPoly)(GFF4Element *dst, GFF
GFF4Element temp[2][SIZE]; //[size]
const GFF4Element scale= inv(size);
- //FIXME avoid mem* (but note the memcpy/set only takes 2% of the prodPoly time)
- memcpy(temp[0], src1, sizeof(GFF4Element)*(order1+1));
- memset(temp[0] + order1 + 1, 0, sizeof(GFF4Element)*(size - order1 - 1));
- memcpy(temp[1], src2, sizeof(GFF4Element)*(order2+1));
- memset(temp[1] + order2 + 1, 0, sizeof(GFF4Element)*(size - order2 - 1));
-
- EXT(gfft)(temp[0], temp[0], logSize);
- EXT(gfft)(temp[1], temp[1], logSize);
+ EXT(gfft_0padded)(temp[0], src1, logSize, order1+1);
+ EXT(gfft_0padded)(temp[1], src2, logSize, order2+1);
for(i=0; i<size; i++){
temp[0][i]= prod(prod(temp[0][i], temp[1][i]), scale);
@@ -228,14 +222,8 @@ void EXT(partialProdPoly)(GFF4Element *d
GFF4Element temp[SIZE]; //[size]
const GFF4Element scale= inv(size);
- //FIXME avoid mem* (but note the memcpy/set only takes 2% of the prodPoly time)
- memcpy(temp, src2, sizeof(GFF4Element)*(order2+1));
- memset(temp + order2 + 1, 0, sizeof(GFF4Element)*(size - order2 - 1));
- memcpy(dst, src1, sizeof(GFF4Element)*(order1+1));
- memset(dst + order1 + 1, 0, sizeof(GFF4Element)*(size - order1 - 1));
-
- EXT(gfft)(dst, dst, logSize);
- EXT(gfft)(temp, temp, logSize);
+ EXT(gfft_0padded)(temp, src2, logSize, order2+1);
+ EXT(gfft_0padded)(dst , src1, logSize, order1+1);
for(i=0; i<size; i++){
dst[i]= prod(prod(dst[i], temp[i]), scale);
Modified: trunk/noe/gfft.c
==============================================================================
--- trunk/noe/gfft.c Fri Jul 23 00:20:22 2010 (r212)
+++ trunk/noe/gfft.c Fri Jul 23 03:25:01 2010 (r213)
@@ -330,6 +330,15 @@ void EXT(gfft)(GFF4Element *dst, GFF4Ele
// int i, j, pass;
//printf("%X %X\n", noe_exp[4096], noe_exp[3*4096]);
+ if(logSize<5){
+ memcpy(dst, src, sizeof(*dst)<<logSize);
+ if(logSize==1) fft2(dst);
+ else if(logSize==2) fft4(dst);
+ else if(logSize==3) fft8(dst);
+ else if(logSize==4) fft16(dst);
+ return;
+ }
+
assert(logSize>=5);
if(src==dst)
@@ -370,6 +379,25 @@ STOP_TIMER}
#endif
}
+void EXT(gfft_0padded)(GFF4Element *dst, GFF4Element *src, int logSize, int nzCoeffs){
+ int lognz= noe_log2(nzCoeffs-1)+1;
+ int i, j;
+
+ for(i=1; i<1<<(logSize-lognz); i++){
+ int idx= i<<lognz;
+ int m= bitReverse(i, SHIFT)>>lognz;
+ for(j=0; j<nzCoeffs; j++){
+ dst[idx+j]= prod(src[j], EXT(exp)[ j*m ]);
+ }
+ memset(dst+nzCoeffs+idx, 0, sizeof(*dst)*((1<<lognz)-nzCoeffs));
+ EXT(gfft)(dst+idx, dst+idx, lognz);
+ }
+
+ memcpy(dst, src, sizeof(*dst)*nzCoeffs);
+ memset(dst+nzCoeffs, 0, sizeof(*dst)*((1<<lognz)-nzCoeffs));
+ EXT(gfft)(dst, dst, lognz);
+}
+
void EXT(igfft)(GFF4Element *dst, GFF4Element *src, int logSize){
assert(logSize>=5);
Modified: trunk/noe/gfft.h
==============================================================================
--- trunk/noe/gfft.h Fri Jul 23 00:20:22 2010 (r212)
+++ trunk/noe/gfft.h Fri Jul 23 03:25:01 2010 (r213)
@@ -18,6 +18,7 @@
void noe_gfft_init();
void EXT(gfft)(GFF4Element *dst, GFF4Element *src, int size);
+void EXT(gfft_0padded)(GFF4Element *dst, GFF4Element *src, int logSize, int nzCoeffs);
void EXT(igfft)(GFF4Element *dst, GFF4Element *src, int size);
void EXT(permute)(GFF4Element *dst, GFF4Element *src, int outCount, int codeBits);
Modified: trunk/noe/rs.c
==============================================================================
--- trunk/noe/rs.c Fri Jul 23 00:20:22 2010 (r212)
+++ trunk/noe/rs.c Fri Jul 23 03:25:01 2010 (r213)
@@ -434,9 +434,8 @@ for(i=0; i<erasureCount; i++){
}
if(elfft){
- memset(errorLocator + errorCount+2, 0, (codeCount - errorCount - 1)*sizeof(GFF4Element));
errorLocator++;
- EXT(gfft)(errorLocator, errorLocator, codeBits);
+ EXT(gfft_0padded)(errorLocator, errorLocator, codeBits, errorLocator[-1]+1);
for(j=0,i=0; j<errorCount; i++){
if(i >= codeCount)
@@ -466,13 +465,10 @@ for(i=0; i<erasureCount; i++){
}
if(gfftEval>1){
- //FIXME
- memset(omega + omega[0] + 2, 0, (codeCount - omega[0] - 1)*sizeof(GFF4Element));
- memset(psi + psi [0] + 2, 0, (codeCount - psi [0] - 1)*sizeof(GFF4Element));
omega++;
psi++;
- EXT(gfft)(omega, omega, codeBits);
- EXT(gfft)(psi , psi , codeBits);
+ EXT(gfft_0padded)(omega, omega, codeBits, omega[-1]+1);
+ EXT(gfft_0padded)(psi , psi , codeBits, psi [-1]+1);
}
for(i=0; i<erasureCount + errorCount; i++){
More information about the Mndiff-dev
mailing list