[Mndiff-dev] [mndiff]: r11 - trunk/mnzip/mnzip.c
michael
subversion at mplayerhq.hu
Sun Apr 29 20:47:45 CEST 2007
Author: michael
Date: Sun Apr 29 20:47:45 2007
New Revision: 11
Log:
replace current qsort-memcmp based bwt by karp-miller-rosenberg and bentley-sedgewick based
sorting algorithms
also kill some global which where needed due to retarded c standard design of qsort()
Modified:
trunk/mnzip/mnzip.c
Modified: trunk/mnzip/mnzip.c
==============================================================================
--- trunk/mnzip/mnzip.c (original)
+++ trunk/mnzip/mnzip.c Sun Apr 29 20:47:45 2007
@@ -37,31 +37,102 @@
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define MIN(a,b) ((a) > (b) ? (b) : (a))
-static uint8_t *global_end, *global_start; //FIXME remove ugly globals
+static void qsort2(uint8_t **ptr, int *idx, int len){
+ int pivot;
+ int i=0;
+ int j=0;
+ int k=len-1;
-static int cmp(const uint8_t **pa, const uint8_t **pb){
- const uint8_t *a= *pa+1+RADIX_PASSES;
- const uint8_t *b= *pb+1+RADIX_PASSES;
+ assert(len>1);
- for(;;){
- int len= global_end - MAX(a, b);
- if(len>0){
- int i= memcmp(a, b, len);
- if(i) return i;
- a+=len;
- b+=len;
+ pivot= idx[len>>1];
+ i=0;
+ while(j<=k){
+ int v= idx[j];
+ if(v == pivot)
+ j++;
+ else if(v<pivot){
+ FFSWAP(int, idx[j], idx[i]);
+ FFSWAP(uint8_t*, ptr[j], ptr[i]);
+ j++;
+ i++;
+ }else{
+ FFSWAP(int, idx[j], idx[k]);
+ FFSWAP(uint8_t*, ptr[j], ptr[k]);
+ do{
+ k--;
+ }while(idx[k] > pivot);
+ }
+ }
+ assert(i>=0);
+ assert(i<=j);
+ assert(j<=k+1);
+ assert(k<=len-1);
+ if(i>1)
+ qsort2(ptr ,idx , i );
+ if(len-k>2)
+ qsort2(ptr+j,idx+j, len-1-k);
+}
+
+#define read(p) ((p)>=end ? (p)[in - end] : *(p))
+
+static void bssort(const uint8_t **p, int len, int skip, uint8_t *in, uint8_t *end){
+ int pivot;
+ int i=0;
+ int j=0;
+ int k=len-1;
+
+ assert(len>1);
+
+retry:
+ pivot= read(p[len>>1] + skip);
+ for(i=0; i<len; i++)
+ if(read(p[i]+skip) != pivot)
+ break;
+ if(i==len){
+ skip++;
+ goto retry;
+ }
+
+ i=0;
+ while(j<=k){
+ uint8_t *pj= p[j];
+ int v= read(pj + skip);
+ if(v == pivot)
+ j++;
+ else if(v<pivot){
+ p[j]= p[i]; p[i]= pj;
+ j++;
+ i++;
+ }else{
+ p[j]= p[k]; p[k]= pj;
+ do{
+ k--;
+ }while(read(p[k] + skip) > pivot);
}
- if(a>=global_end) a-= global_end - global_start;
- if(b>=global_end) b-= global_end - global_start;
}
+ assert(i>=0);
+ assert(i<=j);
+ assert(j<=k+1);
+ assert(k<=len-1);
+ if(i>1)
+ bssort(p , i , skip , in, end);
+ if(j-i>1)
+ bssort(p+i, j-i , skip+1, in, end);
+ if(len-k>2)
+ bssort(p+j, len-1-k, skip , in, end);
}
static unsigned int bwt(uint8_t *out, uint8_t *in, unsigned int len){
- unsigned int i, ret;
+ unsigned int i, j, ret;
uint8_t **ptr = malloc(len*sizeof(uint8_t*));
uint8_t **ptr2= malloc(len*sizeof(uint8_t*));
int histogram[257];
int last= 0;
+ int sorted;
+ int *idx= ptr2;
+ int *siz1, *siz2, *idx2;
+ int siz1i=0;
if(!ptr || !ptr2 || len >= UINT_MAX / sizeof(uint8_t*)) //FIXME memleak
return -1;
@@ -97,20 +168,70 @@ static unsigned int bwt(uint8_t *out, ui
for(i=0; i<len; i++){
ptr[ histogram[ptr2[i][1]]++ ]= ptr2[i];
}
- free(ptr2);
-
- global_start= in; //FIXME
- global_end = in+len;
+fprintf(stderr, "radix sort done\n");
+#if 1
+ siz1= malloc((len+3)/2*sizeof(int));
+ siz2= malloc((len+3)/2*sizeof(int));
+ idx2= malloc(len*sizeof(int));
+assert(last==0);
+ for(i=0; i<len; i++){
+ if(ptr[last][1] != ptr[i][1] || ptr[last][2] != ptr[i][2] ||
+ ptr[last][3] != ptr[i][3] || ptr[last][4] != ptr[i][4]){
+ if(i-last>1)
+ siz1[siz1i++]= i;
+ last= i;
+ }
+ idx[ptr[i]-in]= last;
+ }
+ if(i-last>1)
+ siz1[siz1i++]= i;
+ siz1[siz1i++]= 0;
+fprintf(stderr, "idx init done\n");
+ for(sorted=RADIX_PASSES; sorted<len; sorted<<=1){
+ int last=0;
+ int siz1i=0;
+ int siz2i=0;
+ for(;;){
+ int i= siz1[siz1i++];
+ if(!i)
+ break;
+ last= idx[ptr[i-1]-in];
+#define limit(i) ((i)>=len ? (i) - len : (i))
+ for(j=last; j<i; j++)
+ idx2[j]= idx[limit(ptr[j] - in + sorted)];
+ qsort2(ptr + last, idx2 + last, i-last);
+ for(j=last+1; j<i; j++){
+ if(idx2[j-1] < idx2[j]){
+ if(j-last>1)
+ siz2[siz2i++]= j;
+ last= j;
+ }
+ assert(idx[ptr[j]-in] <= last);
+ idx[ptr[j]-in]= last;
+ }
+ if(j-idx[ptr[j-1]-in]>1)
+ siz2[siz2i++]= j;
+ }
+ if(!siz2i)
+ break;
+ siz2[siz2i++]= 0;
+ FFSWAP(int*, siz1, siz2);
+ assert(idx[ptr[0]-in] == 0);
+ }
+ free(siz1);
+ free(siz2);
+#else
for(i=1; i<len; i++){
if(memcmp(ptr[last]+1, ptr[i]+1, RADIX_PASSES)){
if(i-last>1)
- qsort(ptr + last, i-last, sizeof(uint8_t*), cmp);
+ bssort(ptr + last, i-last, RADIX_PASSES+1, in, in+len);
last= i;
}
}
- qsort(ptr + last, i-last, sizeof(uint8_t*), cmp);
-
+ if(i-last>1)
+ bssort(ptr + last, i-last, RADIX_PASSES+1, in, in+len);
+#endif
ret=-1;
for(i=0; i<len; i++){
out[i]= *ptr[i];
@@ -119,6 +240,7 @@ static unsigned int bwt(uint8_t *out, ui
}
free(ptr);
+ free(ptr2);
return ret;
}
More information about the Mndiff-dev
mailing list