[Mndiff-dev] [mndiff]: r40 - trunk/mnzip/mnzip.c
michael
subversion at mplayerhq.hu
Thu Jun 21 18:09:58 CEST 2007
Author: michael
Date: Thu Jun 21 18:09:58 2007
New Revision: 40
Log:
move sorted sizes into unused parts of the existing arrays
~25% less memory needed for compression
sadly ~3% slower and its not clear why
Modified:
trunk/mnzip/mnzip.c
Modified: trunk/mnzip/mnzip.c
==============================================================================
--- trunk/mnzip/mnzip.c (original)
+++ trunk/mnzip/mnzip.c Thu Jun 21 18:09:58 2007
@@ -320,11 +320,11 @@ static unsigned int bwt(uint8_t *out, ui
uint8_t **ptr = malloc(len*sizeof(uint8_t*));
uint8_t **ptr2= malloc(len*sizeof(uint8_t*));
int histogram[257];
- int last= 0;
+ int last= len-1;
+ int last2= len;
int sorted;
int *idx= ptr2;
- int *siz1, *siz2, *idx2;
- int siz1i=0;
+ int *idx2;
if(!ptr || !ptr2 || len >= UINT_MAX / sizeof(uint8_t*)) //FIXME memleak
return -1;
@@ -362,57 +362,88 @@ static unsigned int bwt(uint8_t *out, ui
}
fprintf(stderr, "radix sort done\n");
#if 1
- siz1= malloc((len+3)/2*sizeof(int));
- siz2= malloc((len+3)/2*sizeof(int));
idx2= malloc(len*sizeof(int));
-assert(last==0);
- for(i=0; i<len; i++){
+ for(i=len-1; (int)i>=0; i--){
if(ptr[last][1] != ptr[i][1] || ptr[last][2] != ptr[i][2] ||
ptr[last][3] != ptr[i][3] || ptr[last][4] != ptr[i][4]){
- if(i-last>1)
- siz1[siz1i++]= i;
+
+ if(last - i > 1)
+ last2= i+1;
+ else{
+ ptr[last]= in-last2;
+ assert(in - ptr[last] > last);
+ assert(last2 <= len);
+ }
+
last= i;
}
idx[ptr[i]-in]= last;
}
- if(i-last>1)
- siz1[siz1i++]= i;
- siz1[siz1i++]= 0;
+
+ if(last - i == 1){
+ ptr[last]= in-last2;
+ assert(in - ptr[last] > last);
+ }
+
fprintf(stderr, "idx init done\n");
for(sorted=RADIX_PASSES; sorted<len; sorted<<=1){
- int last=0;
- int siz1i=0;
- int siz2i=0;
+//fprintf(stderr, "pass %d\n", sorted);
+
+ i=0;
for(;;){
- int i= siz1[siz1i++];
- if(!i)
+ int right;
+// assert(i < len+1);
+// assert(i>=0);
+
+ if(i >= len)
break;
- last= idx[ptr[i-1]-in];
+
+ if(ptr[i] - in < 0){
+ int openlink= i;
+ do{
+ i = in - ptr[i];
+ }while(i < len && ptr[i] - in < 0);
+ ptr[openlink]= in - i;
+ if(i >= len)
+ break;
+ }
+
+ right= idx[ptr[i]-in];
#define limit(i) ((i)>=len ? (i) - len : (i))
- for(j=last; j<i; j++)
+ assert(right-(int)i+1 > 1);
+ for(j=i; j<=right; j++){
idx2[j]= idx[limit(ptr[j] - in + sorted)];
- qsort2(ptr + last, idx2 + last, i-last);
- for(j=last+1; j<i; j++){
- if(idx2[j-1] < idx2[j]){
- if(j-last>1)
- siz2[siz2i++]= j;
- last= j;
- }
+// assert(idx[ptr[j] - in] == idx[ptr[i] - in]);
+ }
+ qsort2(ptr + i, idx2 + i, right-i+1);
- assert(idx[ptr[j]-in] <= last);
+ last= right;
+ assert(idx[ptr[right]-in] == right);
+ last2= right+1;
+ for(j=right-1; (int)j>=(int)i; j--){
+ if(idx2[j] < idx2[j+1]){
+ assert(last - j >= 1);
+ if(last - j > 1){
+ last2= j+1;
+ }else{
+ ptr[last]= in - last2;
+ }
+ last=j;
+ }
+// assert(ptr[j]-in >= 0);
+ assert(idx[ptr[j]-in] >= last);
idx[ptr[j]-in]= last;
}
- if(j-idx[ptr[j-1]-in]>1)
- siz2[siz2i++]= j;
+ if(last - j == 1)
+ ptr[last]= in - last2;
+
+ i= right + 1;
}
- if(!siz2i)
- break;
- siz2[siz2i++]= 0;
- FFSWAP(int*, siz1, siz2);
- assert(idx[ptr[0]-in] == 0);
}
- free(siz1);
- free(siz2);
+ for(i=0; i<len; i++){
+ out[idx[i]]= in[i];
+ }
+ ret= idx[0];
#else
for(i=1; i<len; i++){
if(memcmp(ptr[last]+1, ptr[i]+1, RADIX_PASSES)){
@@ -424,12 +455,6 @@ fprintf(stderr, "idx init done\n");
if(i-last>1)
bssort(ptr + last, i-last, RADIX_PASSES+1, in, in+len);
#endif
- ret=-1;
- for(i=0; i<len; i++){
- out[i]= *ptr[i];
- if(ptr[i] == in)
- ret= i;
- }
free(ptr);
free(ptr2);
More information about the Mndiff-dev
mailing list