[MN-dev] [mndiff]: r81 - in trunk/noe: Makefile galois.c galois.h gfft.c gfft.h noe.c noe_internal.h rs.c rs.h rw.h test.c

michael subversion at mplayerhq.hu
Tue Jul 10 00:48:15 CEST 2007


Author: michael
Date: Tue Jul 10 00:48:14 2007
New Revision: 81

Log:
version from unknown date but definitly before 2006-06-20 13:05


Modified:
   trunk/noe/Makefile
   trunk/noe/galois.c
   trunk/noe/galois.h
   trunk/noe/gfft.c
   trunk/noe/gfft.h
   trunk/noe/noe.c
   trunk/noe/noe_internal.h
   trunk/noe/rs.c
   trunk/noe/rs.h
   trunk/noe/rw.h
   trunk/noe/test.c

Modified: trunk/noe/Makefile
==============================================================================
--- trunk/noe/Makefile	(original)
+++ trunk/noe/Makefile	Tue Jul 10 00:48:14 2007
@@ -1,15 +1,35 @@
-OBJS = test.o galois.o rs.o gfft.o rw.o filerw.o
-SRCS = $(OBJS:.o=.c) $(ASM_OBJS:.o=.s)
+LIBOBJS = galois.o rs.o
+OBJS = rw.o filerw.o noe.o
+LIBSRCS = $(LIBOBJS:.o=.c) $(ASM_LIBOBJS:.o=.s)
+SRCS = $(OBJS:.o=.c)
 
 CFLAGS  = -g -Wall -O4 $(OPTFLAGS) -I. $(EXTRA_INC)
 
 %.o: %.c
 	$(CC) $(CFLAGS) -c -o $@ $<
 
-all: test
+%_100.o: %.c
+	$(CC) $(CFLAGS) -c -DSIZE=0x100 -o $@ $<
 
-test: $(OBJS)
-	$(CC) $(LDFLAGS) -o $@ $(OBJS)
+%_101.o: %.c
+	$(CC) $(CFLAGS) -c -DSIZE=0x101 -o $@ $<
+
+%_10001.o: %.c
+	$(CC) $(CFLAGS) -c -DSIZE=0x10001 -o $@ $<
+
+all: test_100 test_101 test_10001 libnoe_101.a libnoe_10001.a
+
+libnoe_100.a: $(LIBOBJS:.o=_100.o)
+	$(AR) rc $@ $^
+
+libnoe_1%1.a: $(LIBOBJS:.o=_1%1.o) gfft_1%1.o
+	$(AR) rc $@ $^
+
+test: all
+	./test_100 ; ./test_101 ; ./test_10001
+
+test_%: test_%.o libnoe_%.a
+	$(CC) $(LDFLAGS) -o $@ $^
 
 clean:
 	rm -f *.o *.a *~
@@ -17,4 +37,4 @@ clean:
 dep:	depend
 
 depend:
-	$(CC) -MM $(CFLAGS) $(SRCS) 1>.depend
+	$(CC) -MM $(CFLAGS) $(LIBSRCS) $(SRCS) 1>.depend

Modified: trunk/noe/galois.c
==============================================================================
--- trunk/noe/galois.c	(original)
+++ trunk/noe/galois.c	Tue Jul 10 00:48:14 2007
@@ -27,43 +27,71 @@
 #include "galois.h"
 #include "gfft.h"
 
-GFF4Element noe_exp[SIZE];
-GFF4Element noe_log[SIZE];
+GFF4Element EXT(exp)[SIZE];
+GFF4Element EXT(log)[SIZE];
 
-void noe_init(){
+void EXT(init)(){
     GFF4Element ge= 1;
     int i;
     
     for(i=0; i<SIZE; i++){
-        noe_exp[i]= ge;
-        noe_log[ge]= i;
+        EXT(exp)[i]= ge;
+        EXT(log)[ge]= i%(SIZE-1);
+#if M!=0
+        assert(PRIMITIVE_ELEMENT==2);
+        ge+=ge;
+        if(ge&SIZE) ge ^= M;
+#else
         ge= prod(ge, PRIMITIVE_ELEMENT);
+#endif
     }
-    
-    noe_log[0]= 0xFFFF;
-    noe_log[1]= 0;
-    
+#if M==0
     noe_gfft_init();
+#endif
 }
+
+int noe_log2(unsigned int v) // from ffmpeg
+{
+    int n;
+
+    n = 0;
+    if (v & 0xffff0000) {
+        v >>= 16;
+        n += 16;
+    }
+    if (v & 0xff00) {
+        v >>= 8;
+        n += 8;
+    }
+    if (v & 0xf0) {
+        v >>= 4;
+        n += 4;
+    }
+    if (v & 0xc) {
+        v >>= 2;
+        n += 2;
+    }
+    if (v & 0x2) {
+        n++;
+    }
+    return n;
+}
+
 /**
  * ...
  */
-void noe_prodPoly(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2){
+void EXT(prodPoly)(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2){
     const int order1= *src1++;
     const int order2= *src2++;
     int order= order1 + order2;
     int i;
 
-    if((order1==0 && src1[0]==0) || 
-       (order2==0 && src2[0]==0)){
-        SET_POLY0(dst, 0);
-        return;
-    }
-
     *dst++= order;
 
-    if(order1*order2 <= (order1+order2)*64){
-        for(i=order; i>=0; i--){
+    if(order1*order2 <= (order1+order2)*64 || M){
+        dst[order]= prod(src1[order1], src2[order2]);
+        if(!dst[order]) dst[-1]=0; //special case for zero polynoms
+        for(i=order-1; i>=0; i--){
             unsigned int acc=0;
             int j, end;
 
@@ -71,7 +99,8 @@ void noe_prodPoly(GFF4Element *dst, GFF4
             end= NOE_MIN(i, order1);
 
             for(; j<=end; j++){
-                acc+= prod(src1[j], src2[i-j]);
+                if(M) acc^= prod(src1[j], src2[i-j]);
+                else  acc+= prod(src1[j], src2[i-j]);
             }
             dst[i]= reduce(acc);
         }
@@ -87,23 +116,91 @@ void noe_prodPoly(GFF4Element *dst, GFF4
         memcpy(temp[1], src2, sizeof(GFF4Element)*(order2+1));
         memset(temp[1] + order2 + 1, 0,  sizeof(GFF4Element)*(size - order2 - 1));
 
-        noe_gfft(temp[0], temp[0], logSize);
-        noe_gfft(temp[1], temp[1], logSize);
+        EXT(gfft)(temp[0], temp[0], logSize);
+        EXT(gfft)(temp[1], temp[1], logSize);
         
         for(i=0; i<size; i++){
             temp[0][i]= prod(prod(temp[0][i], temp[1][i]), scale);
         }
 
-        noe_igfft(temp[0], temp[0], logSize);
+        EXT(igfft)(temp[0], temp[0], logSize);
         memcpy(dst, temp[0], sizeof(GFF4Element)*(order+1));
     }
 }
 
+void EXT(prodMatrix)(GFF4Element *d1[2], GFF4Element *d2[2], GFF4Element *s1[2], GFF4Element *s2[2]){
+//FIXME improve threshold decission
+    if((s1[0][0] + s1[1][0] + s2[0][0] + s2[1][0]) < 64 || M/*|| 
+        d1[0][0]==0 || d1[1][0]==0 || d2[0][0]==0 || d2[1][0]==0 ||
+        s1[0][0]==0 || s1[1][0]==0 || s2[0][0]==0 || s2[1][0]==0*/){
+        GFF4Element t1[SIZE]; //FIXME size
+        GFF4Element t2[SIZE]; //FIXME size
+
+        EXT(prodPoly)(t1   , d1[0], s2[0]);
+        EXT(prodPoly)(d1[0], d1[0], s1[0]);
+        EXT(prodPoly)(t2   , d2[0], s1[1]);
+        EXT(prodPoly)(d2[0], d2[0], s2[1]);
+        EXT(sumPoly)(d1[0], d1[0], t2);
+        EXT(sumPoly)(d2[0], d2[0], t1);
+
+        EXT(prodPoly)(t1   , d1[1], s2[0]);
+        EXT(prodPoly)(d1[1], d1[1], s1[0]);
+        EXT(prodPoly)(t2   , d2[1], s1[1]);
+        EXT(prodPoly)(d2[1], d2[1], s2[1]);
+        EXT(sumPoly)(d1[1], d1[1], t2);
+        EXT(sumPoly)(d2[1], d2[1], t1);
+    }else{
+        int i;
+        int max_order= NOE_MAX( NOE_MAX( NOE_MAX(d1[0][0] + s2[0][0], d1[0][0] + s1[0][0]),
+                                         NOE_MAX(d2[0][0] + s2[1][0], d2[0][0] + s1[1][0]) ),
+                                NOE_MAX( NOE_MAX(d1[1][0] + s2[0][0], d1[1][0] + s1[0][0]),
+                                         NOE_MAX(d2[1][0] + s2[1][0], d2[1][0] + s1[1][0]) ));
+
+        const int logSize= noe_log2(max_order)+1;
+        const int size= 1<<logSize;
+        const GFF4Element scale= inv(size);
+
+        s1[0]++; s1[1]++; s2[0]++; s2[1]++;
+        d1[0]++; d1[1]++; d2[0]++; d2[1]++;
+
+        for(i=0; i<2; i++){
+            memset(s1[i] + s1[i][-1] + 1, 0,  sizeof(GFF4Element)*(size - s1[i][-1] - 1));
+            memset(s2[i] + s2[i][-1] + 1, 0,  sizeof(GFF4Element)*(size - s2[i][-1] - 1));
+            memset(d1[i] + d1[i][-1] + 1, 0,  sizeof(GFF4Element)*(size - d1[i][-1] - 1));
+            memset(d2[i] + d2[i][-1] + 1, 0,  sizeof(GFF4Element)*(size - d2[i][-1] - 1));
+            EXT(gfft)(s1[i], s1[i], logSize);
+            EXT(gfft)(s2[i], s2[i], logSize);
+            EXT(gfft)(d1[i], d1[i], logSize);
+            EXT(gfft)(d2[i], d2[i], logSize);
+        }
+
+        for(i=0; i<size; i++){
+            int d1t= d1[0][i];
+            d1[0][i]= prod(sum(prod(d1t, s1[0][i]), prod(d2[0][i], s1[1][i])), scale);
+            d2[0][i]= prod(sum(prod(d1t, s2[0][i]), prod(d2[0][i], s2[1][i])), scale);
+        }
+        for(i=0; i<size; i++){
+            int d1t= d1[1][i];
+            d1[1][i]= prod(sum(prod(d1t, s1[0][i]), prod(d2[1][i], s1[1][i])), scale);
+            d2[1][i]= prod(sum(prod(d1t, s2[0][i]), prod(d2[1][i], s2[1][i])), scale);
+        }
+
+        for(i=0; i<2; i++){
+            EXT(igfft)(d1[i], d1[i], logSize);
+            EXT(igfft)(d2[i], d2[i], logSize);
+            *(--d1[i])= max_order;
+            *(--d2[i])= max_order;
+            EXT(getOrder)(d1[i]);
+            EXT(getOrder)(d2[i]);
+        }
+    }
+}
+
 /**
  * returns the first order2 coefficients of the product of the 2 polynoms.
  * Note dst must be at least 2^log2(order1+order2) big FIXME benchmark if its worth it
  */
-void noe_partialProdPoly(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2, int order){
+void EXT(partialProdPoly)(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2, int order){
     int order1= *src1++;
     int order2= *src2++;
     int i;
@@ -113,7 +210,7 @@ void noe_partialProdPoly(GFF4Element *ds
     if(order < order1) order1= order;
     if(order < order2) order2= order;
 
-    if(order1*order2 <= (order1+order2)*64){
+    if(order1*order2 <= (order1+order2)*64 || M){
         for(i=order; i>=0; i--){
             unsigned int acc=0;
             int j, end;
@@ -122,12 +219,13 @@ void noe_partialProdPoly(GFF4Element *ds
             end= NOE_MIN(i, order1);
 
             for(; j<=end; j++){
-                acc+= prod(src1[j], src2[i-j]);
+                if(M) acc^= prod(src1[j], src2[i-j]);
+                else  acc+= prod(src1[j], src2[i-j]);
             }
             dst[i]= reduce(acc);
         }
     }else{
-        const int logSize= noe_log2(order1+order2)+1;
+        const int logSize= noe_log2(order2 + order1)+1;
         const int size= 1<<logSize;
         GFF4Element temp[size]; //FIXME is this fast?
         const GFF4Element scale= inv(size);
@@ -138,20 +236,20 @@ void noe_partialProdPoly(GFF4Element *ds
         memcpy(dst, src1, sizeof(GFF4Element)*(order1+1));
         memset(dst + order1 + 1, 0,  sizeof(GFF4Element)*(size - order1 - 1));
 
-        noe_gfft(dst, dst, logSize);
-        noe_gfft(temp, temp, logSize);
+        EXT(gfft)(dst, dst, logSize);
+        EXT(gfft)(temp, temp, logSize);
         
         for(i=0; i<size; i++){
             dst[i]= prod(prod(dst[i], temp[i]), scale);
         }
 
-        noe_igfft(dst, dst, logSize);
+        EXT(igfft)(dst, dst, logSize);
     }
 
-    noe_getOrder(dst-1);
+    EXT(getOrder)(dst-1);
 }
 
-void noe_printPoly(GFF4Element *src){
+void EXT(printPoly)(GFF4Element *src){
     int i;
     
     for(i=src[0]+1; i>0; i--){
@@ -163,30 +261,42 @@ void noe_printPoly(GFF4Element *src){
 /**
  * Evaluates the src polynom at x.
  */
-GFF4Element noe_evalPoly(GFF4Element *src, GFF4Element x){
+GFF4Element EXT(evalPoly)(GFF4Element *src, GFF4Element x){
     unsigned int acc=0;
     int j;
 
     for(j=src[0]+1; j>0; j--){
+//FIXME skip reduce
         acc = sum(prod(acc, x), src[j]);
     }
 
     return acc;
 }
 
-void noe_getDerivative(GFF4Element *dst, GFF4Element *src){
+void EXT(getDerivative)(GFF4Element *dst, GFF4Element *src){
     int i;
 
+#if M!=0
+    dst[1]= src[2];
+    for(i=3; i<=src[0]; i+=2){
+        dst[i-1]= 0;
+        dst[i  ]= src[i+1];
+    }
+    dst[0]= i-3;
+    assert(dst[0] >= 0);
+    assert(dst[ dst[0]+1 ]); //FIXME this isnt guranteed but seems to be always true
+#else
     for(i=1; i<=src[0]; i++){
         dst[i  ]= prod(src[i+1], i);
     }
     dst[0]= src[0]-1;
+#endif
 }
 
 /**
  * ...
  */
-void noe_synthPoly(GFF4Element *dst, GFF4Element *src, int count){
+void EXT(synthPoly)(GFF4Element *dst, GFF4Element *src, int count){
     if(count<20){
         int i;
 
@@ -194,7 +304,7 @@ void noe_synthPoly(GFF4Element *dst, GFF
 
         for(i=0; i<count; i++){
             assert(src[3*i] == 1);
-            noe_prodPoly(dst, dst, src+3*i);
+            EXT(prodPoly)(dst, dst, src+3*i);
         }
     }else{
         int pass, i, countLeft;
@@ -205,11 +315,11 @@ void noe_synthPoly(GFF4Element *dst, GFF
 
         countLeft= count;
 
-    //  noe_printPoly(src, 1);
-    //  noe_printPoly(src+2, 1);
+    //  EXT(printPoly)(src, 1);
+    //  EXT(printPoly)(src+2, 1);
 
         for(i=0; i<(count>>1); i++){
-            noe_prodPoly(temp[0]+4*i, src+6*i, src+6*i+3);
+            EXT(prodPoly)(temp[0]+4*i, src+6*i, src+6*i+3);
         }
         if(count&1){
             memcpy(temp[0]+4*i, src+6*i, 3*sizeof(GFF4Element));
@@ -217,7 +327,7 @@ void noe_synthPoly(GFF4Element *dst, GFF
         }
         countLeft>>=1;
 
-    //  noe_printPoly(temp[0], 2);
+    //  EXT(printPoly)(temp[0], 2);
 
         for(pass=1; countLeft>1; pass++){
             const int step= 2<<pass;
@@ -225,7 +335,7 @@ void noe_synthPoly(GFF4Element *dst, GFF
             GFF4Element *temp2= temp[(pass&1)^1];
 
             for(i=0; i<(countLeft>>1); i++){
-                noe_prodPoly(temp1+2*step*i, temp2+2*step*i, temp2+2*step*i+step);
+                EXT(prodPoly)(temp1+2*step*i, temp2+2*step*i, temp2+2*step*i+step);
             }
 
             if(countLeft&1){
@@ -237,14 +347,14 @@ void noe_synthPoly(GFF4Element *dst, GFF
 
         assert(pass==passCount);
 
-    //  noe_printPoly(temp[0], count);
+    //  EXT(printPoly)(temp[0], count);
 
         memcpy(dst, temp[(pass&1)^1], (count+2)*sizeof(GFF4Element));
-    //  noe_printPoly(dst, count);
+    //  EXT(printPoly)(dst, count);
     }
 }
 
-void noe_getOrder(GFF4Element *src){
+void EXT(getOrder)(GFF4Element *src){
     while(src[0] && src[ src[0] + 1 ]==0) 
         src[0]--;
 }
@@ -253,7 +363,7 @@ void noe_getOrder(GFF4Element *src){
  * ...
  * Note: rem can be identical to nom
  */
-void noe_divPoly(GFF4Element *quot, GFF4Element *rem, GFF4Element *nom, GFF4Element *denom){
+void EXT(divPoly)(GFF4Element *quot, GFF4Element *rem, GFF4Element *nom, GFF4Element *denom){
     const int nomOrder= *nom++;
     const int denomOrder= *denom++;
     int i;
@@ -279,16 +389,16 @@ void noe_divPoly(GFF4Element *quot, GFF4
 
         for(i=0; i<=denomOrder; i++){ //FIXME optimize
             const int remIndex= i + remOrder - denomOrder;
-            rem[remIndex]= sum(rem[remIndex], SIZE - prod(scale, denom[i]));
+            rem[remIndex]= diff(rem[remIndex], prod(scale, denom[i])); //FIXME skip reduce
         }
 
         quot[remOrder - denomOrder]= scale;
     }
 
-    noe_getOrder(rem-1);
+    EXT(getOrder)(rem-1);
 }
 
-void noe_diffPoly(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2){
+void EXT(diffPoly)(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2){
     const int order1= *src1++;
     const int order2= *src2++;
     int i;
@@ -297,24 +407,24 @@ void noe_diffPoly(GFF4Element *dst, GFF4
     *dst++= order1;
 
     for(i=0; i<=minOrder; i++){
-        dst[i]= sum(src1[i], SIZE - src2[i]);
+        dst[i]= diff(src1[i], src2[i]);
     }
     
     if(order1==order2){
-        noe_getOrder(dst-1);
+        EXT(getOrder)(dst-1);
     }else{
         if(dst!=src1)
             memcpy(dst+i, src1+i, (i-1-order1)*sizeof(GFF4Element));
 
         for(; i<=order2; i++){
-            dst[i]= SIZE - src2[i];
+            dst[i]= neg(src2[i]);
         }
 
         dst[-1]= NOE_MAX(order1, order2);
     }
 }
 
-void noe_sumPoly(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2){
+void EXT(sumPoly)(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2){
     const int order1= *src1++;
     const int order2= *src2++;
     int i;
@@ -327,7 +437,7 @@ void noe_sumPoly(GFF4Element *dst, GFF4E
     }
     
     if(order1==order2){
-        noe_getOrder(dst-1);
+        EXT(getOrder)(dst-1);
     }else if(order1 < order2){
         dst[-1]= order2;
         if(dst!=src2)
@@ -338,7 +448,7 @@ void noe_sumPoly(GFF4Element *dst, GFF4E
     }
 }
 
-void noe_scaledSumPoly(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2, GFF4Element f, int s){
+void EXT(scaledSumPoly)(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2, GFF4Element f, int s){
     const int order1= *src1++;
     const int order2= *src2++; //FIXME +s
     int i;
@@ -352,7 +462,7 @@ void noe_scaledSumPoly(GFF4Element *dst,
         dst[i]= sum(src1[i], prod(src2[i-s], f));
 
     if(order1==order2 + s){
-        noe_getOrder(dst-1);
+        EXT(getOrder)(dst-1);
     }else{
         if(dst!=src1)
             memcpy(dst+i, src1+i, (order1+1-i)*sizeof(GFF4Element));
@@ -362,6 +472,6 @@ void noe_scaledSumPoly(GFF4Element *dst,
         }
 
         dst[-1]= NOE_MAX(order1, order2+s);
-        noe_getOrder(dst-1); //FIXME we need this due to src2= 0
+        EXT(getOrder)(dst-1); //FIXME we need this due to src2= 0
     }
 }

Modified: trunk/noe/galois.h
==============================================================================
--- trunk/noe/galois.h	(original)
+++ trunk/noe/galois.h	Tue Jul 10 00:48:14 2007
@@ -16,48 +16,103 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#if SIZE == 0x100
+#define PRIMITIVE_ELEMENT 2
+#define SHIFT 8
+#define M 0x11D
+#define EXT(name) noe_ ## name ## _100
+#elif SIZE == 0x10001
 #define PRIMITIVE_ELEMENT 3
-#define SIZE 0x10001
+#define SHIFT 16
+#define M 0
+#define EXT(name) name ## _10001
+#elif SIZE == 0x101
+#define PRIMITIVE_ELEMENT 3
+#define SHIFT 8
+#define M 0
+#define EXT(name) noe_ ## name ## _101
+#else
+#error wrong SIZE
+#endif
+
+#define MINUS1 (SIZE-1)
+#define MASK ((1<<SHIFT)-1)
 
 typedef unsigned int GFF4Element;
 
-extern GFF4Element noe_exp[SIZE];
-extern GFF4Element noe_log[SIZE];
+extern GFF4Element EXT(exp)[SIZE];
+extern GFF4Element EXT(log)[SIZE];
 
-int noe_prodPoly(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2, int order1, int order2);
-int noe_partialProdPoly(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2, int order1, int order2);
-void noe_init();
-void noe_printPoly(GFF4Element *src, int order);
-int noe_getDerivative(GFF4Element *dst, GFF4Element *src, int order);
-GFF4Element noe_evalPoly(GFF4Element *src, int order, GFF4Element x);
-void noe_synthPoly(GFF4Element *dst, GFF4Element *src, int count);
-int noe_divPoly(GFF4Element *quot, GFF4Element *rem, GFF4Element *nom, GFF4Element *denom, int nomOrder, int denomOrder);
-int noe_diffPoly(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2, int order1, int order2);
-int noe_getOrder(GFF4Element *src, int maxOrder);
+void EXT(prodPoly)(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2);
+void EXT(partialProdPoly)(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2, int order);
+void EXT(init)();
+void EXT(printPoly)(GFF4Element *src);
+void EXT(getDerivative)(GFF4Element *dst, GFF4Element *src);
+GFF4Element EXT(evalPoly)(GFF4Element *src, GFF4Element x);
+void EXT(synthPoly)(GFF4Element *dst, GFF4Element *src, int count);
+void EXT(divPoly)(GFF4Element *quot, GFF4Element *rem, GFF4Element *nom, GFF4Element *denom);
+void EXT(diffPoly)(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2);
+void EXT(sumPoly)(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2);
+void EXT(getOrder)(GFF4Element *src);
+void EXT(scaledSumPoly)(GFF4Element *dst, GFF4Element *src1, GFF4Element *src2, GFF4Element f, int s);
+void EXT(prodMatrix)(GFF4Element *d1[2], GFF4Element *d2[2], GFF4Element *s1[2], GFF4Element *s2[2]);
+int noe_log2(unsigned int v);
 
 static inline GFF4Element reduce(GFF4Element a){
-    a = (a&0xFFFF) - (a>>16);
-    a += (((int)a)>>31)&0x10001;
-    
+#if M==0
+    a = (a&MASK) - (a>>SHIFT);
+    a += (((int)a)>>31)&SIZE;
+#endif
     return a;
 }
 
 static inline GFF4Element prod(GFF4Element a, GFF4Element b){
+#if M!=0
+    if(a==0 || b==0) return 0;
+    a= EXT(log)[a] + EXT(log)[b];
+    if(a>=SIZE) a-= SIZE-1;
+    return EXT(exp)[a];
+#else
 //printf("%d %d\n", a,b);
-    if((a&b)==0x10000)  return 1;
-    else            return reduce(a*b);
+#if 0
+    uint64_t x= ((uint64_t)a)*b;
+    return reduce(x + (x>>32));
+#else
+    if(a==MINUS1)  return -b + ((((int)-b)>>31)&SIZE);
+    else           return reduce(a*b);
+#endif
+#endif
 }
 
 static inline GFF4Element sum(GFF4Element a, GFF4Element b){
-    
-    a += b - 0x10001;
-    a += (((int)a)>>31)&0x10001;
-    
-    return a;
+#if M!=0
+    return a^b;
+#else
+    a += b - SIZE;
+    return a + ((((int)a)>>31)&SIZE);
+#endif
 }
 
+static inline GFF4Element diff(GFF4Element a, GFF4Element b){
+#if M!=0
+    return a^b;
+#else
+    a -= b;
+    return a + ((((int)a)>>31)&SIZE);
+#endif
+}
+
+#if M!=0
+#define neg(a) (a)
+#else
+#define neg(a) (SIZE - (a))
+#endif
+
 static inline GFF4Element inv(GFF4Element a){
     assert(a!=0);
 
-    return noe_exp[SIZE - 1 - noe_log[a]];
+    return EXT(exp)[MINUS1 - EXT(log)[a]];
 }
+
+#define SET_POLY0(dst, coeff0) (dst)[0]=0; (dst)[1]=coeff0;
+#define SET_POLY1(dst, coeff0, coeff1) (dst)[0]=1; (dst)[1]=coeff0; (dst)[2]=coeff1;

Modified: trunk/noe/gfft.c
==============================================================================
--- trunk/noe/gfft.c	(original)
+++ trunk/noe/gfft.c	Tue Jul 10 00:48:14 2007
@@ -40,39 +40,12 @@ void noe_gfft_init(){
     }
 }
 
-int noe_log2(unsigned int v) // from ffmpeg
-{
-    int n;
-
-    n = 0;
-    if (v & 0xffff0000) {
-        v >>= 16;
-        n += 16;
-    }
-    if (v & 0xff00) {
-        v >>= 8;
-        n += 8;
-    }
-    if (v & 0xf0) {
-        v >>= 4;
-        n += 4;
-    }
-    if (v & 0xc) {
-        v >>= 2;
-        n += 2;
-    }
-    if (v & 0x2) {
-        n++;
-    }
-    return n;
-}
-
-void noe_permute(GFF4Element *dst, GFF4Element *src, int outCount){
+void EXT(permute)(GFF4Element *dst, GFF4Element *src, int outCount){
     int i;
 
     if(dst==src){
         for(i=1; i<outCount; i++){
-            const int r= noe_bitReverse(i);
+            const int r= bitReverse(i);
             unsigned int t;
             if(r<=i) continue;
             t= dst[i];
@@ -81,7 +54,7 @@ void noe_permute(GFF4Element *dst, GFF4E
         }
     }else{
         for(i=0; i<outCount; i++){
-            dst[i]= src[noe_bitReverse(i)];
+            dst[i]= src[bitReverse(i)];
         }
     }
 }
@@ -97,12 +70,12 @@ static inline void fft2(GFF4Element *p){
 static inline void fft4(GFF4Element *p){
     unsigned int a,b,c,d;
 
-    d= p[0] + SIZE*0x8000ULL;
+    d= p[0] + SIZE*(SIZE/2ULL);
     
     a= d   +p[2];
     b= p[1]+p[3];
     c= d   -p[2];
-    d= (p[3] - p[1])<<8 /*(p[1] - p[3])*noe_exp[16384]*/;
+    d= (p[3] - p[1])<<(SHIFT/2) /*(p[1] - p[3])*noe_exp[16384]*/;
 
     p[0]= reduce(a+b);
     p[1]= reduce(a-b);
@@ -128,17 +101,17 @@ static inline void fft8(GFF4Element *p){
     a= p[1];
     b= p[5];
     p[1]= a+b;
-    p[5]= reduce(SIZE*0x8000ULL + ((a-b)<<12))/*(a-b)*noe_exp[8192]*/;
+    p[5]= reduce(SIZE*(SIZE/2ULL) + ((a-b)<<(SHIFT*3/4)))/*(a-b)*noe_exp[8192]*/;
 
     a= p[2];
     b= p[6];
     p[2]= a+b;
-    p[6]= (b-a)<<8 /*(a-b)*noe_exp[16384]*/;
+    p[6]= (b-a)<<(SHIFT/2) /*(a-b)*noe_exp[16384]*/;
 
     a= p[3];
     b= p[7];
     p[3]= a+b;
-    p[7]= (a-b)<<4/*(a-b)*noe_exp[3*8192]*/;
+    p[7]= (a-b)<<(SHIFT/4)/*(a-b)*noe_exp[3*8192]*/;
 
     fft4(p);
     fft4(p+4);
@@ -148,7 +121,7 @@ static void fft16(GFF4Element *p){
     int n;
 
     for(n=0; n<8; n++){
-        const unsigned int w= noe_exp[ n<<12 ];
+        const unsigned int w= EXT(exp)[ n<<(SHIFT-4) ];
         const unsigned int a= p[n    ];
         const unsigned int b= p[n + 8];
 
@@ -165,7 +138,7 @@ static void fftn(GFF4Element *p, int log
     const int size= 1<<(logSize-1);
 
     for(n=0; n<size; n++){
-        const unsigned int w= noe_exp[ n<<(16-logSize) ];
+        const unsigned int w= EXT(exp)[ n<<(SHIFT-logSize) ];
         const unsigned int a= p[n       ];
         const unsigned int b= p[n + size];
 
@@ -187,7 +160,7 @@ static void fftn2(GFF4Element *p, GFF4El
     const int size= 1<<(logSize-1);
 
     for(n=0; n<size; n++){
-        const unsigned int w= noe_exp[ n<<(16-logSize) ];
+        const unsigned int w= EXT(exp)[ n<<(SHIFT-logSize) ];
         const unsigned int a= src[n       ];
         const unsigned int b= src[n + size];
 
@@ -225,7 +198,7 @@ static void ifftn(GFF4Element *p, int lo
     }
     
     for(n=0; n<size; n++){
-        const unsigned int w= noe_exp[ (1<<16) - (n<<(16-logSize)) ];
+        const unsigned int w= EXT(exp)[ MINUS1 - (n<<(SHIFT-logSize)) ];
         const unsigned int a= p[n       ];
         const unsigned int b= prod(p[n + size], w);
 
@@ -235,7 +208,7 @@ static void ifftn(GFF4Element *p, int lo
     
 }
 
-void noe_gfft(GFF4Element *dst, GFF4Element *src, int logSize){
+void EXT(gfft)(GFF4Element *dst, GFF4Element *src, int logSize){
 //  int i, j, pass;
 //printf("%X %X\n", noe_exp[4096], noe_exp[3*4096]);
 
@@ -258,7 +231,7 @@ void noe_gfft(GFF4Element *dst, GFF4Elem
             const int bot= top + np;
             
             for(n=0; n<np; n++){
-                const unsigned int w= noe_exp[ n<<pass ];
+                const unsigned int w= EXT(exp)[ n<<pass ];
                 const unsigned int a= dst[top + n];
                 const unsigned int b= dst[bot + n];
                 
@@ -279,7 +252,7 @@ STOP_TIMER}
 #endif
 }
 
-void noe_igfft(GFF4Element *dst, GFF4Element *src, int logSize){
+void EXT(igfft)(GFF4Element *dst, GFF4Element *src, int logSize){
     assert(logSize>=5);
 
     if(src!=dst)

Modified: trunk/noe/gfft.h
==============================================================================
--- trunk/noe/gfft.h	(original)
+++ trunk/noe/gfft.h	Tue Jul 10 00:48:14 2007
@@ -17,14 +17,17 @@
  */
 
 void noe_gfft_init();
-void noe_gfft(GFF4Element *dst, GFF4Element *src, int size);
-void noe_igfft(GFF4Element *dst, GFF4Element *src, int size);
-void noe_permute(GFF4Element *dst, GFF4Element *src, int outCount);
-int noe_log2(unsigned int v);
+void EXT(gfft)(GFF4Element *dst, GFF4Element *src, int size);
+void EXT(igfft)(GFF4Element *dst, GFF4Element *src, int size);
+void EXT(permute)(GFF4Element *dst, GFF4Element *src, int outCount);
 
 extern uint8_t noe_revTable[256];
 
-static inline unsigned int noe_bitReverse(unsigned int x){
+static inline unsigned int bitReverse(unsigned int x){
+#if SHIFT==16
     return (noe_revTable[x&255]<<8) | (noe_revTable[x>>8]);
+#else
+    return noe_revTable[x];
+#endif
 }
 

Modified: trunk/noe/noe.c
==============================================================================
--- trunk/noe/noe.c	(original)
+++ trunk/noe/noe.c	Tue Jul 10 00:48:14 2007
@@ -146,7 +146,7 @@ static int decode_header(NoeContext *n, 
         code[k+i]= get_c(&bs, 2);
     }
 
-    if(noe_rsDecode(code, erased, erasedCount, HEADER_PARITY_LENGTH) < 0)
+    if(EXT(rsDecode)(code, erased, erasedCount, HEADER_PARITY_LENGTH) < 0)
         return -1;
 
     for(i=HEADER_DATA_LENGTH; i<k; i++){
@@ -158,8 +158,8 @@ static int decode_header(NoeContext *n, 
             return -2;
     }
 
-    //      noe_printPoly(gen, i);
-    //      noe_printPoly(syn, i-1);
+    //      EXT(printPoly)(gen, i);
+    //      EXT(printPoly)(syn, i-1);
 
     for(i=0; 2*i<HEADER_DATA_LENGTH; i++){
         buffer[2*i+0]= code[  i] >> 8;
@@ -388,7 +388,7 @@ static int correct(NoeContext *n, noe_Rw
                 code[cw_id - first_cw][cw_pos]= 256*buffer[pos] + buffer[pos+1];
             }
         }
-        if(noe_rsDecode(code, NULL/*erased*/, erasedCount, n->parityCount) < 0)
+        if(EXT(rsDecode)(code, NULL/*erased*/, erasedCount, n->parityCount) < 0)
             uncorrectable++;
 
 //        write data

Modified: trunk/noe/noe_internal.h
==============================================================================
--- trunk/noe/noe_internal.h	(original)
+++ trunk/noe/noe_internal.h	Tue Jul 10 00:48:14 2007
@@ -39,12 +39,12 @@ uint64_t tstart= rdtsc();\
 
 #define STOP_TIMER \
 tend= rdtsc();\
-if(tcount<2 || tend - tstart < 4*tsum/tcount){\
+if(tcount<2 || tend - tstart < 16*tsum/tcount){\
     tsum+= tend - tstart;\
     tcount++;\
 }else\
     tskip_count++;\
-if(256*256*256*64%tcount==0){\
+if(256*256*256*64%(tskip_count+tcount)==0){\
     fprintf(stderr, "%Ld dezicycles, %d runs, %d skips\n", tsum*10/tcount, tcount, tskip_count);\
     fflush(stderr);\
 }

Modified: trunk/noe/rs.c
==============================================================================
--- trunk/noe/rs.c	(original)
+++ trunk/noe/rs.c	Tue Jul 10 00:48:14 2007
@@ -34,31 +34,37 @@
 /**
  * 
  */
-void noe_getRsGenerator(GFF4Element *dst, int first, int order){
+void EXT(getRsGenerator)(GFF4Element *dst, int first, int order){
     int i;
     GFF4Element factor[3*order];
 
     for(i=0; i<order; i++){
-        SET_POLY1(factor+3*i, SIZE - noe_exp[ first + i ], 1);
+        SET_POLY1(factor+3*i, neg(EXT(exp)[ first + i ]), 1);
     }
     
-    noe_synthPoly(dst, factor, order);
+    EXT(synthPoly)(dst, factor, order);
 }
 
-void noe_getSyndrom(GFF4Element *syn, GFF4Element *src, int order){
+void EXT(getSyndrom)(GFF4Element *syn, GFF4Element *src, int order){
     *syn++= order;
-    noe_gfft(syn, src, 16);
-    noe_permute(syn, syn, order+1);
-
-    syn[0]= 1;
-#if 0
+#if M==0
+    EXT(gfft)(syn, src, SHIFT);
+    EXT(permute)(syn, syn, order+1);
+#else
+{
+    int i;
+    int bak= src[-1];
+    src[-1]= SIZE-2;
     for(i=0; i<order; i++){
-        syn[i+1]= noe_evalPoly(src, SIZE-2, noe_exp[1 + i]);
+        syn[i+1]= EXT(evalPoly)(src-1, EXT(exp)[1 + i]);
     }
+    src[-1]= bak;
+}
 #endif
+    syn[0]= 1;
 }
 #if 1
-noe_RsEncoder *noe_getRsEncoder(int parityCount){
+noe_RsEncoder *EXT(getRsEncoder)(int parityCount){
     const int dataCount= SIZE - parityCount - 1;
     int i;  
     GFF4Element *locator, *factor;
@@ -72,36 +78,36 @@ noe_RsEncoder *noe_getRsEncoder(int pari
     encoder->parityCount= parityCount;
     
     for(i=0; i<parityCount; i++){
-        SET_POLY1(locationFactor+3*i, 1, SIZE - noe_exp[dataCount + i]);
+        SET_POLY1(locationFactor+3*i, 1, neg(EXT(exp)[dataCount + i]));
     }
-    noe_synthPoly(locator, locationFactor, parityCount);
+    EXT(synthPoly)(locator, locationFactor, parityCount);
 #if 0
 for(i=0; i<parityCount; i++){
-    if(noe_evalPoly(locator, parityCount, inv(noe_exp[dataCount + i]))){
+    if(EXT(evalPoly)(locator, inv(EXT(exp)[dataCount + i]))){
         printf("Internal Error 1\n");
     }
 }
-if(!noe_evalPoly(locator, parityCount, inv(noe_exp[dataCount - 1])))
+if(!EXT(evalPoly)(locator, inv(EXT(exp)[dataCount - 1])))
     printf("Internal Error 2\n");
 #endif
-    noe_getDerivative(locatorDerivative, locator);
+    EXT(getDerivative)(locatorDerivative, locator);
     assert(locatorDerivative[0] == parityCount-1);
 
     for(i=0; i<parityCount; i++){
-        GFF4Element X= noe_exp[dataCount + i];
-        GFF4Element invX= noe_exp[SIZE - dataCount - i - 1];
+        GFF4Element X= EXT(exp)[dataCount + i];
+        GFF4Element invX= EXT(exp)[MINUS1 - (dataCount + i)];
         GFF4Element denom;
         
         assert(X == inv(invX));
 
-        denom= noe_evalPoly(locatorDerivative, invX); //FIXME do in freq domain if parityCount>1000
-        factor[i]= prod(SIZE - X, inv(denom));
+        denom= EXT(evalPoly)(locatorDerivative, invX); //FIXME do in freq domain if parityCount>1000
+        factor[i]= prod(X, inv(denom));
     }
     
     return encoder;
 }
 
-void noe_freeRsEncoder(noe_RsEncoder *encoder){
+void EXT(freeRsEncoder)(noe_RsEncoder *encoder){
     if(!encoder) return;
     
     if(encoder->parityLocator) free(encoder->parityLocator);
@@ -116,7 +122,7 @@ void noe_freeRsEncoder(noe_RsEncoder *en
 /**
  *
  */
-void noe_rsEncode(noe_RsEncoder *encoder, GFF4Element *data){
+void EXT(rsEncode)(noe_RsEncoder *encoder, GFF4Element *data){
     int i;
     const int parityCount= encoder->parityCount;
     const int dataCount= SIZE - parityCount - 1;
@@ -127,47 +133,35 @@ void noe_rsEncode(noe_RsEncoder *encoder
 
     memset(data + dataCount, 0, parityCount*sizeof(GFF4Element));
     
-    noe_getSyndrom(syn, data, parityCount);
+    EXT(getSyndrom)(syn, data, parityCount);
 
-    noe_partialProdPoly(omega, locator, syn, syn[0]);
-    
-    if(parityCount < 1000){
+    EXT(partialProdPoly)(omega, locator, syn, syn[0]);
+
+    if(parityCount < 1000 || M){
         for(i=0; i<parityCount; i++){
-            GFF4Element invX= noe_exp[SIZE - dataCount - i - 1];
+            GFF4Element invX= EXT(exp)[MINUS1 - (dataCount + i)];
             
-            GFF4Element parity= prod(noe_evalPoly(omega, invX), factor[i]);
+            GFF4Element parity= prod(EXT(evalPoly)(omega, invX), factor[i]);
             
-            data[dataCount + i]= SIZE - parity;
+            data[dataCount + i]= parity;
         }
     }else{
-//START_TIMER
-#if 0
-        for(i=0; i<parityCount; i++){
-            GFF4Element invX= noe_exp[SIZE - dataCount - i - 1];
-            
-            GFF4Element parity= prod(noe_evalPoly(omega, parityCount, invX), factor[i]);
-            
-            data[dataCount + i]= SIZE - parity;
-        }
-#else
         memset(omega + parityCount + 1 + 1, 0, (SIZE - 2 - parityCount)*sizeof(GFF4Element));
-        noe_gfft(omega+1, omega+1, 16);
+        EXT(gfft)(omega+1, omega+1, SHIFT);
         for(i=0; i<parityCount; i++){
             int index= SIZE - dataCount - i - 1;
             
-            GFF4Element parity= prod(omega[noe_bitReverse(index)+1], factor[i]);
+            GFF4Element parity= prod(omega[bitReverse(index)+1], factor[i]);
             
-            data[dataCount + i]= SIZE - parity;
+            data[dataCount + i]= parity;
         }
-#endif
-//STOP_TIMER
     }
 }
 #endif
 
 #define XCHG(a,b) {void *tmp= a; a= b; b= tmp;}
 
-static void lehmer(GFF4Element *ma[2], GFF4Element *mb[2], GFF4Element *a, GFF4Element *b, int len){
+static void hgcd(GFF4Element *ma[2], GFF4Element *mb[2], GFF4Element *a, GFF4Element *b, int len){
     int j;
     int end= a[0] + b[0] - len;
     assert(a[0] >= b[0]);
@@ -189,19 +183,20 @@ static void lehmer(GFF4Element *ma[2], G
         max= a[0] - a_tmp[0];
         memcpy(a_tmp+1, a+1+a[0]-a_tmp[0], (a_tmp[0]+1)*sizeof(GFF4Element));
         memcpy(b_tmp+1, b+1+b[0]-b_tmp[0], (b_tmp[0]+1)*sizeof(GFF4Element));
-        lehmer(ma, mb, a_tmp, b_tmp, len2);
+        hgcd(ma, mb, a_tmp, b_tmp, len2);
 
         max+= NOE_MAX(a_tmp[0], b_tmp[0]);
 
-        noe_partialProdPoly(a_tmp, b, ma[1], max);
-        noe_partialProdPoly(b_tmp, a, mb[0], max);
-        noe_partialProdPoly(a    , a, ma[0], max);
-        noe_partialProdPoly(b    , b, mb[1], max);
-        noe_sumPoly(a, a, a_tmp);
-        noe_sumPoly(b, b, b_tmp);
+        EXT(partialProdPoly)(a_tmp, b, ma[1], max);
+        EXT(partialProdPoly)(b_tmp, a, mb[0], max);
+        EXT(partialProdPoly)(a    , a, ma[0], max);
+        EXT(partialProdPoly)(b    , b, mb[1], max);
+        EXT(sumPoly)(a, a, a_tmp);
+        EXT(sumPoly)(b, b, b_tmp);
         len2= a[0] + b[0] - end;
 //printf("Mid %d %d %d len2=%d\n", level, a[0], b[0], len2);
-        if(len2 == 0)
+
+        if(len2 == 0 || a[ a[0] + 1 ]==0)
             return;
             
         assert(len2>0);
@@ -215,23 +210,10 @@ static void lehmer(GFF4Element *ma[2], G
         j= b[0] + 1 - len2;
         a[j]= a[0] - j;
         b[j]= b[0] - j;
-        lehmer(ma2, mb2, a + j, b + j, len2);
+        hgcd(ma2, mb2, a + j, b + j, len2);
 
 //printf("End %d %d %d\n", level, a[0], b[0]);
-
-        noe_prodPoly(a_tmp, ma[0], mb2[0]);
-        noe_prodPoly(ma[0], ma[0], ma2[0]);
-        noe_prodPoly(b_tmp, mb[0], ma2[1]);
-        noe_prodPoly(mb[0], mb[0], mb2[1]);
-        noe_sumPoly(ma[0], ma[0], b_tmp);
-        noe_sumPoly(mb[0], mb[0], a_tmp);
-
-        noe_prodPoly(a_tmp, ma[1], mb2[0]);
-        noe_prodPoly(ma[1], ma[1], ma2[0]);
-        noe_prodPoly(b_tmp, mb[1], ma2[1]);
-        noe_prodPoly(mb[1], mb[1], mb2[1]);
-        noe_sumPoly(ma[1], ma[1], b_tmp);
-        noe_sumPoly(mb[1], mb[1], a_tmp);
+        EXT(prodMatrix)(ma, mb, ma2, mb2);
     }else{
         SET_POLY0(ma[0], 1);
         SET_POLY0(ma[1], 0);
@@ -251,13 +233,12 @@ static void lehmer(GFF4Element *ma[2], G
             }
             a[0]--;
 
-            noe_scaledSumPoly(ma[0], ma[0], mb[0], q, s);
-            noe_scaledSumPoly(ma[1], ma[1], mb[1], q, s);
+            EXT(scaledSumPoly)(ma[0], ma[0], mb[0], q, s);
+            EXT(scaledSumPoly)(ma[1], ma[1], mb[1], q, s);
 
-            noe_getOrder(a);
-            if(a[0] + b[0] <= end)
+            EXT(getOrder)(a);
+            if(a[0] + b[0] <= end || a[a[0] + 1]==0)
                 return;
-            assert(a[0]!=0 || a[1]!=0);
 
             if(a[0] < b[0]){
                 XCHG(a,b)
@@ -267,7 +248,7 @@ static void lehmer(GFF4Element *ma[2], G
     }
 }
 
-int noe_rsEuclid2(GFF4Element *locator[2], GFF4Element *omega[2], int parityCount, int erasureCount){
+static int rsSchoenhage(GFF4Element *locator[2], GFF4Element *omega[2], int parityCount, int erasureCount){
     int i;
     GFF4Element quot[parityCount+2]; //FIXME size ok?
 
@@ -292,7 +273,7 @@ int noe_rsEuclid2(GFF4Element *locator[2
         if(2*omega[si][0] <= parityCount + erasureCount)
             return si;
 
-        if(len > omega[si][0]+1) len=omega[si][0]+1;
+        assert(len <= omega[si][0]+1);
         if(len>128 && omega[si][0]/2 < len) len= omega[si][0]/2;
 
         assert(omega[si][ omega[si][0]+1 ]);
@@ -303,22 +284,22 @@ int noe_rsEuclid2(GFF4Element *locator[2
         memcpy(b_tmp+1, omega[si]+1+omega[si][0]-b_tmp[0], (b_tmp[0]+1)*sizeof(GFF4Element));
         assert(b_tmp[ b_tmp[0]+1 ]);
 //printf("In 0 %d %d\n", omega[di][0], omega[si][0]);
-        lehmer(ma, mb, a_tmp, b_tmp, len);
+        hgcd(ma, mb, a_tmp, b_tmp, len);
         max= omega[si][0] - (len-1) + NOE_MAX(a_tmp[0], b_tmp[0]);
 
-        noe_partialProdPoly(a_tmp, omega[si], ma[1], max);
-        noe_partialProdPoly(b_tmp, omega[di], mb[0], max);
-        noe_partialProdPoly(omega[di], omega[di], ma[0], max);
-        noe_partialProdPoly(omega[si], omega[si], mb[1], max);
-        noe_sumPoly(omega[di], omega[di], a_tmp);
-        noe_sumPoly(omega[si], omega[si], b_tmp);
+        EXT(partialProdPoly)(a_tmp, omega[si], ma[1], max);
+        EXT(partialProdPoly)(b_tmp, omega[di], mb[0], max);
+        EXT(partialProdPoly)(omega[di], omega[di], ma[0], max);
+        EXT(partialProdPoly)(omega[si], omega[si], mb[1], max);
+        EXT(sumPoly)(omega[di], omega[di], a_tmp);
+        EXT(sumPoly)(omega[si], omega[si], b_tmp);
 
-        noe_prodPoly(a_tmp, locator[si], ma[1]);
-        noe_prodPoly(b_tmp, locator[di], mb[0]);
-        noe_prodPoly(locator[di], locator[di], ma[0]);
-        noe_prodPoly(locator[si], locator[si], mb[1]);
-        noe_sumPoly(locator[di], locator[di], a_tmp);
-        noe_sumPoly(locator[si], locator[si], b_tmp);
+        EXT(prodPoly)(a_tmp, locator[si], ma[1]);
+        EXT(prodPoly)(b_tmp, locator[di], mb[0]);
+        EXT(prodPoly)(locator[di], locator[di], ma[0]);
+        EXT(prodPoly)(locator[si], locator[si], mb[1]);
+        EXT(sumPoly)(locator[di], locator[di], a_tmp);
+        EXT(sumPoly)(locator[si], locator[si], b_tmp);
 
         if(omega[si][0] < omega[di][0])
             i++;
@@ -329,7 +310,7 @@ int noe_rsEuclid2(GFF4Element *locator[2
  * ...
  * @param omega must be parityCount+2 big
  */
-int noe_rsEuclid(GFF4Element *locator[2], GFF4Element *omega[2], int parityCount, int erasureCount){
+static int rsEuclid(GFF4Element *locator[2], GFF4Element *omega[2], int parityCount, int erasureCount){
     int i;
     GFF4Element quot[parityCount+2]; //FIXME size ok?
 
@@ -346,10 +327,10 @@ int noe_rsEuclid(GFF4Element *locator[2]
         if(2*omega[si][0] <= parityCount + erasureCount)
             return si;
 
-        noe_divPoly(quot, omega[di], omega[di], omega[si]);
+        EXT(divPoly)(quot, omega[di], omega[di], omega[si]);
 
-        noe_prodPoly(quot, quot, locator[si]);
-        noe_diffPoly(locator[di], locator[di], quot);
+        EXT(prodPoly)(quot, quot, locator[si]);
+        EXT(diffPoly)(locator[di], locator[di], quot);
     }
 }
 
@@ -357,10 +338,10 @@ int noe_rsEuclid(GFF4Element *locator[2]
  * ...
  * @param omega must be parityCount+2 big
  */
-int noe_rsBerlekampMassey(GFF4Element *locator[2], GFF4Element *omega[2], int parityCount, int erasureCount){
+static int rsBerlekampMassey(GFF4Element *locator[2], GFF4Element *omega[2], int parityCount, int erasureCount){
     GFF4Element *syn= omega[1]+1; //yeah maybe we should clean this ...
     int k, j;
-    GFF4Element oldDelta=0x10000;
+    GFF4Element oldDelta=MINUS1;
     GFF4Element *Tix= locator[0]+1;
     GFF4Element *Lix= locator[1]+1;
     GFF4Element *temp;
@@ -375,7 +356,11 @@ int noe_rsBerlekampMassey(GFF4Element *l
         GFF4Element delta= syn[k];
 
         for(j=1; j<=Lix[-1]; j++)
+#if M!=0
+            delta^= prod(Lix[j], syn[k-j]);
+#else
             delta+= prod(Lix[j], syn[k-j]);
+#endif
         delta= reduce(delta);
 
         if(delta){
@@ -401,7 +386,7 @@ int noe_rsBerlekampMassey(GFF4Element *l
                     Tix[j]= Lix[j];
                 Tshift=0;
                 temp= Tix; Tix= Lix; Lix= temp;
-                oldDelta= SIZE - inv(delta);
+                oldDelta= neg(inv(delta));
                 Lix[-1]= k2 - Tix[-1];
             }
         }
@@ -412,19 +397,19 @@ int noe_rsBerlekampMassey(GFF4Element *l
     assert(Lix[0]==1);
 
     k= Tix-1 == locator[0];
-    noe_partialProdPoly(omega[k], Lix-1, syn-1, syn[-1]);
+    EXT(partialProdPoly)(omega[k], Lix-1, syn-1, syn[-1]);
 
     return k;
 }
 
-int noe_rsDecode(GFF4Element *data, int *erasure, int erasureCount, int parityCount){
+int EXT(rsDecode)(GFF4Element *data, int *erasure, int erasureCount, int parityCount){
     int i;
     int errorCount, gfftEval, phantomErrorCount;
     const int dataCount= SIZE - 1 - parityCount;
     GFF4Element erasureLocator[erasureCount + 1];
     GFF4Element erasureSynthSrc[erasureCount*3 + 1];
-    GFF4Element locator0[parityCount + 1 + 1];
-    GFF4Element locator1[parityCount + 1 + 1];
+    GFF4Element locator0[SIZE]; //[parityCount + 1 + 1];
+    GFF4Element locator1[SIZE]; //[parityCount + 1 + 1];
     GFF4Element *locators[2]={locator0, locator1};
     GFF4Element omega0[SIZE - 1 + 1];
     GFF4Element omega1[SIZE - 1 + 1];
@@ -439,7 +424,7 @@ int noe_rsDecode(GFF4Element *data, int 
         data[ erasure[i] ]=0;
     }
 
-    noe_getSyndrom(syn, data, parityCount);
+    EXT(getSyndrom)(syn, data, parityCount);
     for(i=1; i<=parityCount; i++){
         if(syn[i+1]) break;
     }
@@ -450,20 +435,21 @@ int noe_rsDecode(GFF4Element *data, int 
     //FIXME check truncated symbols syndrom
 
     for(i=0; i<erasureCount; i++){
-        SET_POLY1(erasureSynthSrc + 3*i, 1, SIZE - noe_exp[erasure[i]]);
+        SET_POLY1(erasureSynthSrc + 3*i, 1, neg(EXT(exp)[erasure[i]]));
     }
-    noe_synthPoly(erasureLocator, erasureSynthSrc, erasureCount);
-//  noe_printPoly(erasureLocator, erasureCount);
-#if 0
+    EXT(synthPoly)(erasureLocator, erasureSynthSrc, erasureCount);
+//  EXT(printPoly)(erasureLocator, erasureCount);
+#if 1
 for(i=0; i<erasureCount; i++){
-    int eval= noe_evalPoly(erasureLocator, erasureCount, inv(noe_exp[erasure[i]]));
+    int eval= EXT(evalPoly)(erasureLocator, inv(EXT(exp)[erasure[i]]));
     assert( eval==0);
 }
 #endif
-    noe_partialProdPoly(syn, erasureLocator, syn, syn[0]);
+    EXT(partialProdPoly)(syn, erasureLocator, syn, syn[0]);
 START_TIMER
-//    i= noe_rsBerlekampMassey(locators, omegas, parityCount, erasureCount);
-    i= noe_rsEuclid2(locators, omegas, parityCount, erasureCount);
+    i= rsBerlekampMassey(locators, omegas, parityCount, erasureCount);
+//    i= rsEuclid(locators, omegas, parityCount, erasureCount);
+//    i= rsSchoenhage(locators, omegas, parityCount, erasureCount);
 STOP_TIMER
 //printf("errorCount %d \n", errorCount);
 
@@ -475,42 +461,57 @@ STOP_TIMER
     fErrorLocator= omegas[1-i]; //reuse some unused space
     psi= locators[1-i]; //reuse some unused space
 
-    gfftEval= errorCount>20;
-    if(gfftEval && errorCount){
+    gfftEval= (errorCount>20) + (errorCount>1024); //FIXME finetune thresholds
+    if(M) gfftEval=0;
+    if(gfftEval){
         memcpy(fErrorLocator, errorLocator+1, (errorCount+1)*sizeof(GFF4Element));
         memset(fErrorLocator + errorCount+1, 0, (SIZE - errorCount-2)*sizeof(GFF4Element));
-        noe_gfft(fErrorLocator, fErrorLocator, 16);
+        EXT(gfft)(fErrorLocator, fErrorLocator, SHIFT);
     }
 
-    noe_prodPoly(psi, errorLocator, erasureLocator);
-    noe_getDerivative(psi, psi);
+    EXT(prodPoly)(psi, errorLocator, erasureLocator);
+    EXT(getDerivative)(psi, psi);
+
+    if(gfftEval>1){
+        memset(omega + omega[0] + 2, 0, (SIZE - omega[0] - 2)*sizeof(GFF4Element));
+        memset(psi   + psi  [0] + 2, 0, (SIZE - psi  [0] - 2)*sizeof(GFF4Element));
+        omega++;
+        psi++;
+        EXT(gfft)(omega, omega, SHIFT);
+        EXT(gfft)(psi  , psi  , SHIFT);
+    }
 
     if(errorCount){
         for(i=0; i<SIZE-1; i++){
             int r, e;
-            GFF4Element X, invX, nom, denom;
+            GFF4Element X, nom, denom;
 
             if(gfftEval){ //FIXME optimize
                 if(fErrorLocator[i])
                     continue;
                     
-                r= (- noe_bitReverse(i))&0xFFFF;
+                r= i ? MINUS1 -  bitReverse(i) : 0; //FIXME i=0 <->MINUS1 ?
             }else{
-                if(noe_evalPoly(errorLocator, noe_exp[i]))
+                if(EXT(evalPoly)(errorLocator, EXT(exp)[i]))
                     continue;
                     
-                r= (-i)&0xFFFF;
+                r= i ? MINUS1 - i : 0;
             }
 //printf("E%6d %6d\n", i, r);
 
-            X   = noe_exp[r];
-            invX= noe_exp[SIZE - r - 1];
-            nom= prod(X, noe_evalPoly(omega, invX));
-            denom= noe_evalPoly(psi, invX);
+            X= EXT(exp)[r];
+            if(gfftEval>1){
+                nom= prod(X, omega[i]);
+                denom= psi[i];
+            }else{
+                GFF4Element invX= EXT(exp)[MINUS1 - r];
+                nom= prod(X, EXT(evalPoly)(omega, invX));
+                denom= EXT(evalPoly)(psi, invX);
+            }
 
             assert(r>=0 && r<=SIZE-2);
             e= prod(nom, inv(denom));
-            if(data[r]==0 && e==0x10000){
+            if(data[r]==0 && e==MINUS1){
                 if(r < dataCount)
                     return -1;
                 else
@@ -522,15 +523,20 @@ STOP_TIMER
 
     for(i=0; i<erasureCount; i++){
         const int r= erasure[i];
-        const GFF4Element X   = noe_exp[r];
-        const GFF4Element invX= noe_exp[SIZE - r - 1];
+        const GFF4Element X   = EXT(exp)[r];
         GFF4Element nom, denom;
 
-        assert(r>=0 && r<0x10000);
-        assert(prod(X, invX)==1);
+        assert(r>=0 && r<MINUS1);
         
-        nom= prod(X, noe_evalPoly(omega, invX));
-        denom= noe_evalPoly(psi, invX);
+        if(gfftEval>1){
+            int i2= bitReverse(MINUS1 - r);
+            nom= prod(X, omega[i2]);
+            denom= psi[i2];
+        }else{
+            const GFF4Element invX= EXT(exp)[MINUS1 - r];
+            nom= prod(X, EXT(evalPoly)(omega, invX));
+            denom= EXT(evalPoly)(psi, invX);
+        }
 
         assert(data[r]==0);
         data[r]= prod(nom, inv(denom));

Modified: trunk/noe/rs.h
==============================================================================
--- trunk/noe/rs.h	(original)
+++ trunk/noe/rs.h	Tue Jul 10 00:48:14 2007
@@ -22,20 +22,20 @@ typedef struct noe_RsEncoder{
     GFF4Element *parityFactor;
 }noe_RsEncoder;
 
-void noe_getRsGenerator(GFF4Element *dst, int first, int order);
+void EXT(getRsGenerator)(GFF4Element *dst, int first, int order);
 
 /**
  * gets the syndroms.
  * @param src a 2^16 entry long polynom
  * @param syn the syndrom polynom will be stored here
  */
-void noe_getSyndrom(GFF4Element *syn, GFF4Element *src, int order);
-noe_RsEncoder *noe_getRsEncoder(int parityOrder);
-void noe_freeRsEncoder(noe_RsEncoder *encoder);
+void EXT(getSyndrom)(GFF4Element *syn, GFF4Element *src, int order);
+noe_RsEncoder *EXT(getRsEncoder)(int parityOrder);
+void EXT(freeRsEncoder)(noe_RsEncoder *encoder);
 
 /**
  *
  */
-void noe_rsEncode(noe_RsEncoder *encoder, GFF4Element *data);
+void EXT(rsEncode)(noe_RsEncoder *encoder, GFF4Element *data);
 
-int noe_rsDecode(GFF4Element *data, int *erased, int erasedCount, int parityCount);
+int EXT(rsDecode)(GFF4Element *data, int *erased, int erasedCount, int parityCount);

Modified: trunk/noe/rw.h
==============================================================================
--- trunk/noe/rw.h	(original)
+++ trunk/noe/rw.h	Tue Jul 10 00:48:14 2007
@@ -19,9 +19,6 @@
 #define RW_SIZE (1<<18)
 
 typedef struct noe_RwContext{
-    uint8_t buffer[RW_SIZE];
-    uint64_t pos;
-    int flush;
     uint64_t size;
 
     /**
@@ -40,5 +37,3 @@ typedef struct noe_Map{
     uint64_t len;
 }noe_Map;
 
-uint64_t noe_read(noe_RwContext *c, uint64_t pos, int len);
-void noe_write(noe_RwContext *c, uint64_t pos, int len, uint64_t v);

Modified: trunk/noe/test.c
==============================================================================
--- trunk/noe/test.c	(original)
+++ trunk/noe/test.c	Tue Jul 10 00:48:14 2007
@@ -55,11 +55,11 @@ int main(){
     int i, j;
     GFF4Element bp=1;
 
-    noe_init();
+    EXT(init)();
 
-    printf("Testing GF(65537)\n");
+    printf("Testing GF(%d)\n", SIZE);
     for(i=0; i<SIZE-1; i++){
-        if((bp==1 || bp==0) && i>0){
+        if(bp==0 || (bp==1 && i>0) || bp >= SIZE){
             printf("FAIL %d %d\n", i, bp);
             break;
         }
@@ -86,7 +86,7 @@ int main(){
     }
 #endif
     srand (31415);
-#if 0
+  if(!M){
     printf("Testing GFFT");
     for(i=1; i<20; i++){
         int n= SIZE - 1;
@@ -95,32 +95,33 @@ int main(){
         int acc=0;
         
         for(j=0; j<n; j++)
-            code[j]= rand() % 0x10001;
+            code[j]= rand() % SIZE;
 
-        noe_gfft(syn, code, 16);
+        EXT(gfft)(syn, code, SHIFT);
         
         for(j=0; j<n; j++){
             acc+= syn[j];
             acc*=2*j+1;
         }
-        if(gfftChecksums[i-1] != acc) 
+        if(gfftChecksums[i-1] != acc && SHIFT == 16) 
             printf("FAIL: 0x%08X != 0x%08X\n", gfftChecksums[i-1], acc);
         
-        noe_igfft(syn, syn, 16);
+        EXT(igfft)(syn, syn, SHIFT);
         for(j=0; j<n; j++){
-            syn[j]= prod(syn[j], inv(1<<16));
+            syn[j]= prod(syn[j], inv(MINUS1));
         }
         
         for(j=0; j<n; j++){
             if(syn[j] != code[j]) printf("IGFFT missmatch at %d %X!=%X\n", j, syn[j], code[j]);
         }
         
-//      noe_printPoly(gen, i);
-//      noe_printPoly(syn, i-1);
+//      EXT(printPoly)(gen, i);
+//      EXT(printPoly)(syn, i-1);
         
         printf("."); fflush(stdout);
     }
-    
+  }
+#if 1
     srand (31415);
 
     printf("\nTesting generator polynoms");
@@ -134,16 +135,16 @@ int main(){
 
         data[0]= k-1;
         for(j=0; j<k; j++)
-            data[j+1]= rand() & 0xFFFF;
+            data[j+1]= rand() & MASK;
         
-        noe_getRsGenerator(gen, 1, i);
-        noe_prodPoly(code, gen, data);
+        EXT(getRsGenerator)(gen, 1, i);
+        EXT(prodPoly)(code, gen, data);
         assert(code[0]==n-1);
         
-        noe_getSyndrom(syn, code+1, i);
+        EXT(getSyndrom)(syn, code+1, i);
         
-//      noe_printPoly(gen, i);
-//      noe_printPoly(syn, i-1);
+//      EXT(printPoly)(gen, i);
+//      EXT(printPoly)(syn, i-1);
         
         for(j=0; j<i; j++){
             if(syn[j+1+1]) printf("FAIL generator:%d coefficient:%d\n", i, j);
@@ -152,7 +153,7 @@ int main(){
     }
 
     printf("\nTesting encoder");
-    for(i=1; i<1000; i+=200){
+    for(i=1; i<1000 && i<SIZE/2; i+=i+5){
         int n= SIZE - 1;
         int k= SIZE - 1 - i;
         GFF4Element syn[n+1];
@@ -162,32 +163,32 @@ int main(){
         noe_RsEncoder *encoder;
         int pass=5;
 
-        encoder= noe_getRsEncoder(i);
+        encoder= EXT(getRsEncoder)(i);
 
         for(pass=5; pass; pass--){
             for(j=0; j<k; j++)
-                data[j]= rand() & 0xFFFF;
+                data[j]= rand() & MASK;
 
             memcpy(code, data, n*sizeof(GFF4Element));
 
-            noe_rsEncode(encoder, code);
+            EXT(rsEncode)(encoder, code);
 
-            noe_getSyndrom(syn, code, i);
+            EXT(getSyndrom)(syn, code, i);
             //FIXME check that code contains data, but its not touched so ...
 
-    //      noe_printPoly(gen, i);
-    //      noe_printPoly(syn, i-1);
+    //      EXT(printPoly)(gen, i);
+    //      EXT(printPoly)(syn, i-1);
 
             for(j=0; j<i; j++){
                 if(syn[j+1+1]) printf("FAIL generator:%d coefficient:%d is %X\n", i, j, syn[j+1]);
             }
             printf("."); fflush(stdout);
         }
-        noe_freeRsEncoder(encoder);
+        EXT(freeRsEncoder)(encoder);
     }
 #endif
     printf("\nTesting decoder");
-    for(i=1; i<5000; i+=100){
+    for(i=1; i<2000 && i<SIZE/2; i+=(SIZE == 0x10001 ? 100 : 10)){
         int n= SIZE - 1;
         int k= SIZE - 1 - i;
 //      GFF4Element syn[n];
@@ -197,16 +198,16 @@ int main(){
         noe_RsEncoder *encoder;
         int pass=5;
 
-        encoder= noe_getRsEncoder(i);
+        encoder= EXT(getRsEncoder)(i);
 
         for(pass=5; pass; pass--){
             int erasedCount, errorCount;
             for(j=0; j<k; j++)
-                data[j]= rand() & 0xFFFF;
+                data[j]= rand() & MASK;
 
             memcpy(code, data, n*sizeof(GFF4Element));
 
-            noe_rsEncode(encoder, code);
+            EXT(rsEncode)(encoder, code);
             
             erasedCount=0;
             if(rand()&0x40){
@@ -214,15 +215,15 @@ int main(){
                 for(j=0; j<erasedCount; j++){
                     int e=-1;
                     
-                    while(e==-1 || code[e]==0x10000) e=rand()%n;
+                    while(e==-1 || code[e]==MINUS1) e=rand()%n;
 
                     erased[j]= e;
-                    code[ e ]= 0x10000;
+                    code[ e ]= MINUS1;
                 }
                 for(j=0; j<erasedCount; j++){
                     int e= erased[j];
 
-                    code[ e ]= rand()%0xFFFF;
+                    code[ e ]= rand()&MASK;
                 }
             }
             errorCount=0;
@@ -232,16 +233,16 @@ int main(){
                     errorCount= (rand() % max)+1;
                     for(j=0; j<errorCount; j++){
                         int pos= rand()%n;
-                        code[pos] = rand()%0xFFFF;
+                        code[pos] = rand()&MASK;
 //                      printf("P%6d\n", pos);
                     }
                 }
             }
 //printf("erased:%d errors:%d parity:%d\n", erasedCount, errorCount, i);
-            noe_rsDecode(code, erased, erasedCount, i);
+            EXT(rsDecode)(code, erased, erasedCount, i);
 
-    //      noe_printPoly(gen, i);
-    //      noe_printPoly(syn, i-1);
+    //      EXT(printPoly)(gen, i);
+    //      EXT(printPoly)(syn, i-1);
 
             for(j=0; j<k; j++){
                 if(data[j]!=code[j]){
@@ -251,7 +252,7 @@ int main(){
             }
             printf("."); fflush(stdout);
         }
-        noe_freeRsEncoder(encoder);
+        EXT(freeRsEncoder)(encoder);
     }
 #if 0
     {
@@ -294,10 +295,10 @@ return 0;
 
         data[0]= k-1;
         for(j=0; j<k; j++)
-            data[j+1]= rand() & 0xFFFF;
+            data[j+1]= rand() & MASK;
         
-        noe_getRsGenerator(gen, 1, i);
-        noe_prodPoly(code, gen, data);
+        EXT(getRsGenerator)(gen, 1, i);
+        EXT(prodPoly)(code, gen, data);
         assert(code[0]==n);
         
         locator[0] = 0;
@@ -308,20 +309,20 @@ return 0;
             
             if((rand()&7)==0 && j) continue;
             
-            index= rand()%0xFFFF;
-            SET_POLY1(locationFactor, 1, inv(noe_exp[index]));
-            noe_prodPoly(locator, locator, locationFactor);
+            index= rand()%MASK;
+            SET_POLY1(locationFactor, 1, inv(EXT(exp)[index]));
+            EXT(prodPoly)(locator, locator, locationFactor);
             
             code[index] = 0;
         }
         
-        noe_getSyndrom(syn, code+1, i);
-        noe_prodPoly(omega, syn, locator);
-//      noe_getDeriative(locator, locator, locatorOrder);
+        EXT(getSyndrom)(syn, code+1, i);
+        EXT(prodPoly)(omega, syn, locator);
+//      EXT(getDeriative)(locator, locator, locatorOrder);
         
 
-//      noe_printPoly(gen, i);
-//      noe_printPoly(syn, i-1);
+//      EXT(printPoly)(gen, i);
+//      EXT(printPoly)(syn, i-1);
         
         for(j=0; j<i; j++){
             if(syn[j+1]) printf("FAIL generator:%d coefficient:%d\n", i, j);



More information about the Mndiff-dev mailing list