[Mndiff-dev] [mndiff]: r11 - trunk/mnzip/mnzip.c

michael subversion at mplayerhq.hu
Sun Apr 29 20:47:45 CEST 2007


Author: michael
Date: Sun Apr 29 20:47:45 2007
New Revision: 11

Log:
replace current qsort-memcmp based bwt by karp-miller-rosenberg and bentley-sedgewick based
sorting algorithms
also kill some global which where needed due to retarded c standard design of qsort()


Modified:
   trunk/mnzip/mnzip.c

Modified: trunk/mnzip/mnzip.c
==============================================================================
--- trunk/mnzip/mnzip.c	(original)
+++ trunk/mnzip/mnzip.c	Sun Apr 29 20:47:45 2007
@@ -37,31 +37,102 @@
 #define MAX(a,b) ((a) > (b) ? (a) : (b))
 #define MIN(a,b) ((a) > (b) ? (b) : (a))
 
-static uint8_t *global_end, *global_start; //FIXME remove ugly globals
+static void qsort2(uint8_t **ptr, int *idx, int len){
+    int pivot;
+    int i=0;
+    int j=0;
+    int k=len-1;
 
-static int cmp(const uint8_t **pa, const uint8_t **pb){
-    const uint8_t *a= *pa+1+RADIX_PASSES;
-    const uint8_t *b= *pb+1+RADIX_PASSES;
+    assert(len>1);
 
-    for(;;){
-        int len= global_end - MAX(a, b);
-        if(len>0){
-            int i= memcmp(a, b, len);
-            if(i) return i;
-            a+=len;
-            b+=len;
+    pivot= idx[len>>1];
+    i=0;
+    while(j<=k){
+        int v= idx[j];
+        if(v == pivot)
+            j++;
+        else if(v<pivot){
+            FFSWAP(int, idx[j], idx[i]);
+            FFSWAP(uint8_t*, ptr[j], ptr[i]);
+            j++;
+            i++;
+        }else{
+            FFSWAP(int, idx[j], idx[k]);
+            FFSWAP(uint8_t*, ptr[j], ptr[k]);
+            do{
+                k--;
+            }while(idx[k] > pivot);
+        }
+    }
+    assert(i>=0);
+    assert(i<=j);
+    assert(j<=k+1);
+    assert(k<=len-1);
+    if(i>1)
+        qsort2(ptr  ,idx  , i      );
+    if(len-k>2)
+        qsort2(ptr+j,idx+j, len-1-k);
+}
+
+#define read(p) ((p)>=end ? (p)[in - end] : *(p))
+
+static void bssort(const uint8_t **p, int len, int skip, uint8_t *in, uint8_t *end){
+    int pivot;
+    int i=0;
+    int j=0;
+    int k=len-1;
+
+    assert(len>1);
+
+retry:
+    pivot= read(p[len>>1] + skip);
+    for(i=0; i<len; i++)
+        if(read(p[i]+skip) != pivot)
+            break;
+    if(i==len){
+        skip++;
+        goto retry;
+    }
+
+    i=0;
+    while(j<=k){
+        uint8_t *pj= p[j];
+        int v= read(pj + skip);
+        if(v == pivot)
+            j++;
+        else if(v<pivot){
+            p[j]= p[i]; p[i]= pj;
+            j++;
+            i++;
+        }else{
+            p[j]= p[k]; p[k]= pj;
+            do{
+                k--;
+            }while(read(p[k] + skip) > pivot);
         }
-        if(a>=global_end) a-= global_end - global_start;
-        if(b>=global_end) b-= global_end - global_start;
     }
+    assert(i>=0);
+    assert(i<=j);
+    assert(j<=k+1);
+    assert(k<=len-1);
+    if(i>1)
+        bssort(p  , i      , skip  , in, end);
+    if(j-i>1)
+        bssort(p+i, j-i    , skip+1, in, end);
+    if(len-k>2)
+        bssort(p+j, len-1-k, skip  , in, end);
 }
 
 static unsigned int bwt(uint8_t *out, uint8_t *in, unsigned int len){
-    unsigned int i, ret;
+    unsigned int i, j, ret;
     uint8_t **ptr = malloc(len*sizeof(uint8_t*));
     uint8_t **ptr2= malloc(len*sizeof(uint8_t*));
     int histogram[257];
     int last= 0;
+    int sorted;
+    int *idx= ptr2;
+    int *siz1, *siz2, *idx2;
+    int siz1i=0;
 
     if(!ptr || !ptr2 || len >= UINT_MAX / sizeof(uint8_t*)) //FIXME memleak
         return -1;
@@ -97,20 +168,70 @@ static unsigned int bwt(uint8_t *out, ui
     for(i=0; i<len; i++){
         ptr[ histogram[ptr2[i][1]]++ ]= ptr2[i];
     }
-    free(ptr2);
-
-    global_start= in; //FIXME
-    global_end  = in+len;
+fprintf(stderr, "radix sort done\n");
+#if 1
+    siz1= malloc((len+3)/2*sizeof(int));
+    siz2= malloc((len+3)/2*sizeof(int));
+    idx2= malloc(len*sizeof(int));
+assert(last==0);
+    for(i=0; i<len; i++){
+        if(ptr[last][1] != ptr[i][1] || ptr[last][2] != ptr[i][2] ||
+           ptr[last][3] != ptr[i][3] || ptr[last][4] != ptr[i][4]){
+            if(i-last>1)
+                siz1[siz1i++]= i;
+            last= i;
+        }
+        idx[ptr[i]-in]= last;
+    }
+    if(i-last>1)
+        siz1[siz1i++]= i;
+    siz1[siz1i++]= 0;
+fprintf(stderr, "idx init done\n");
+    for(sorted=RADIX_PASSES; sorted<len; sorted<<=1){
+        int last=0;
+        int siz1i=0;
+        int siz2i=0;
+        for(;;){
+            int i= siz1[siz1i++];
+            if(!i)
+                break;
+            last= idx[ptr[i-1]-in];
+#define limit(i) ((i)>=len ? (i) - len : (i))
+            for(j=last; j<i; j++)
+                idx2[j]= idx[limit(ptr[j] - in + sorted)];
+            qsort2(ptr + last, idx2 + last, i-last);
+            for(j=last+1; j<i; j++){
+                if(idx2[j-1] < idx2[j]){
+                    if(j-last>1)
+                        siz2[siz2i++]= j;
+                    last= j;
+                }
 
+                assert(idx[ptr[j]-in] <= last);
+                idx[ptr[j]-in]= last;
+            }
+            if(j-idx[ptr[j-1]-in]>1)
+                siz2[siz2i++]= j;
+        }
+        if(!siz2i)
+            break;
+        siz2[siz2i++]= 0;
+        FFSWAP(int*, siz1, siz2);
+        assert(idx[ptr[0]-in] == 0);
+    }
+    free(siz1);
+    free(siz2);
+#else
     for(i=1; i<len; i++){
         if(memcmp(ptr[last]+1, ptr[i]+1, RADIX_PASSES)){
             if(i-last>1)
-                qsort(ptr + last, i-last, sizeof(uint8_t*), cmp);
+                bssort(ptr + last, i-last, RADIX_PASSES+1, in, in+len);
             last= i;
         }
     }
-    qsort(ptr + last, i-last, sizeof(uint8_t*), cmp);
-
+    if(i-last>1)
+        bssort(ptr + last, i-last, RADIX_PASSES+1, in, in+len);
+#endif
     ret=-1;
     for(i=0; i<len; i++){
         out[i]= *ptr[i];
@@ -119,6 +240,7 @@ static unsigned int bwt(uint8_t *out, ui
     }
 
     free(ptr);
+    free(ptr2);
     return ret;
 }
 



More information about the Mndiff-dev mailing list