[Mndiff-dev] [mndiff]: r40 - trunk/mnzip/mnzip.c

michael subversion at mplayerhq.hu
Thu Jun 21 18:09:58 CEST 2007


Author: michael
Date: Thu Jun 21 18:09:58 2007
New Revision: 40

Log:
move sorted sizes into unused parts of the existing arrays
~25% less memory needed for compression
sadly ~3% slower and its not clear why


Modified:
   trunk/mnzip/mnzip.c

Modified: trunk/mnzip/mnzip.c
==============================================================================
--- trunk/mnzip/mnzip.c	(original)
+++ trunk/mnzip/mnzip.c	Thu Jun 21 18:09:58 2007
@@ -320,11 +320,11 @@ static unsigned int bwt(uint8_t *out, ui
     uint8_t **ptr = malloc(len*sizeof(uint8_t*));
     uint8_t **ptr2= malloc(len*sizeof(uint8_t*));
     int histogram[257];
-    int last= 0;
+    int last= len-1;
+    int last2= len;
     int sorted;
     int *idx= ptr2;
-    int *siz1, *siz2, *idx2;
-    int siz1i=0;
+    int *idx2;
 
     if(!ptr || !ptr2 || len >= UINT_MAX / sizeof(uint8_t*)) //FIXME memleak
         return -1;
@@ -362,57 +362,88 @@ static unsigned int bwt(uint8_t *out, ui
     }
 fprintf(stderr, "radix sort done\n");
 #if 1
-    siz1= malloc((len+3)/2*sizeof(int));
-    siz2= malloc((len+3)/2*sizeof(int));
     idx2= malloc(len*sizeof(int));
-assert(last==0);
-    for(i=0; i<len; i++){
+    for(i=len-1; (int)i>=0; i--){
         if(ptr[last][1] != ptr[i][1] || ptr[last][2] != ptr[i][2] ||
            ptr[last][3] != ptr[i][3] || ptr[last][4] != ptr[i][4]){
-            if(i-last>1)
-                siz1[siz1i++]= i;
+
+            if(last - i > 1)
+                last2= i+1;
+            else{
+                ptr[last]= in-last2;
+                assert(in - ptr[last] > last);
+                assert(last2 <= len);
+            }
+
             last= i;
         }
         idx[ptr[i]-in]= last;
     }
-    if(i-last>1)
-        siz1[siz1i++]= i;
-    siz1[siz1i++]= 0;
+
+    if(last - i == 1){
+        ptr[last]= in-last2;
+        assert(in - ptr[last] > last);
+    }
+
 fprintf(stderr, "idx init done\n");
     for(sorted=RADIX_PASSES; sorted<len; sorted<<=1){
-        int last=0;
-        int siz1i=0;
-        int siz2i=0;
+//fprintf(stderr, "pass %d\n", sorted);
+
+        i=0;
         for(;;){
-            int i= siz1[siz1i++];
-            if(!i)
+            int right;
+//             assert(i < len+1);
+//             assert(i>=0);
+
+            if(i >= len)
                 break;
-            last= idx[ptr[i-1]-in];
+
+            if(ptr[i] - in < 0){
+                int openlink= i;
+                do{
+                    i = in - ptr[i];
+                }while(i < len && ptr[i] - in < 0);
+                ptr[openlink]= in - i;
+                if(i >= len)
+                    break;
+            }
+
+            right= idx[ptr[i]-in];
 #define limit(i) ((i)>=len ? (i) - len : (i))
-            for(j=last; j<i; j++)
+            assert(right-(int)i+1 > 1);
+            for(j=i; j<=right; j++){
                 idx2[j]= idx[limit(ptr[j] - in + sorted)];
-            qsort2(ptr + last, idx2 + last, i-last);
-            for(j=last+1; j<i; j++){
-                if(idx2[j-1] < idx2[j]){
-                    if(j-last>1)
-                        siz2[siz2i++]= j;
-                    last= j;
-                }
+//                assert(idx[ptr[j] - in] == idx[ptr[i] - in]);
+            }
+            qsort2(ptr + i, idx2 + i, right-i+1);
 
-                assert(idx[ptr[j]-in] <= last);
+            last= right;
+            assert(idx[ptr[right]-in] == right);
+            last2= right+1;
+            for(j=right-1; (int)j>=(int)i; j--){
+                if(idx2[j] < idx2[j+1]){
+                    assert(last - j >= 1);
+                    if(last - j > 1){
+                        last2= j+1;
+                    }else{
+                        ptr[last]= in - last2;
+                    }
+                    last=j;
+                }
+//                assert(ptr[j]-in >= 0);
+                assert(idx[ptr[j]-in] >= last);
                 idx[ptr[j]-in]= last;
             }
-            if(j-idx[ptr[j-1]-in]>1)
-                siz2[siz2i++]= j;
+            if(last - j == 1)
+                ptr[last]= in - last2;
+
+            i= right + 1;
         }
-        if(!siz2i)
-            break;
-        siz2[siz2i++]= 0;
-        FFSWAP(int*, siz1, siz2);
-        assert(idx[ptr[0]-in] == 0);
     }
-    free(siz1);
-    free(siz2);
+    for(i=0; i<len; i++){
+        out[idx[i]]= in[i];
+    }
+    ret= idx[0];
 #else
     for(i=1; i<len; i++){
         if(memcmp(ptr[last]+1, ptr[i]+1, RADIX_PASSES)){
@@ -424,12 +455,6 @@ fprintf(stderr, "idx init done\n");
     if(i-last>1)
         bssort(ptr + last, i-last, RADIX_PASSES+1, in, in+len);
 #endif
-    ret=-1;
-    for(i=0; i<len; i++){
-        out[i]= *ptr[i];
-        if(ptr[i] == in)
-            ret= i;
-    }
 
     free(ptr);
     free(ptr2);



More information about the Mndiff-dev mailing list