[Mndiff-dev] [mndiff]: r29 - trunk/mnzip/mnzip.c

michael subversion at mplayerhq.hu
Sat Jun 16 03:05:55 CEST 2007


Author: michael
Date: Sat Jun 16 03:05:55 2007
New Revision: 29

Log:
pretransform
significantly improves compression of some files


Modified:
   trunk/mnzip/mnzip.c

Modified: trunk/mnzip/mnzip.c
==============================================================================
--- trunk/mnzip/mnzip.c	(original)
+++ trunk/mnzip/mnzip.c	Sat Jun 16 03:05:55 2007
@@ -24,6 +24,7 @@
 #include <limits.h>
 #include <string.h>
 #include <assert.h>
+#include <math.h>
 
 
 #undef NDEBUG
@@ -34,6 +35,7 @@
 
 #define MAX(a,b) ((a) > (b) ? (a) : (b))
 #define MIN(a,b) ((a) > (b) ? (b) : (a))
+#define ABS(a) ((a) >= 0 ? (a) : -(a))
 #define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
 
 //---------------- Range coder taken from libavcodec copied here to avoid a nasty dependancy
@@ -152,6 +154,50 @@ int ff_rac_terminate(RangeCoder *c){
 //--------------------------------------------------
 
 
+static int pretransform(uint8_t *in, int len){
+    int step, i, beststep;
+    double bestsum= INT_MAX;
+
+    for(step=0; step<=16; step++){
+        int hist[256]={0};
+        double sum=0;
+        for(i=0; i<len; i++){
+            uint8_t v= in[i];
+            if(step && i>=step)
+                v-= in[i-step];
+
+            hist[v]++;
+        }
+        for(i=0; i<256; i++){
+            if(hist[i])
+                sum += hist[i] * log2(len / (double)hist[i]);
+        }
+//        fprintf(stderr, "step:%d sum:%f\n", step, sum);
+
+        if(sum < bestsum){
+            bestsum= sum;
+            beststep= step;
+        }
+    }
+
+    if(beststep)
+        for(i=len-1; i>=beststep; i--)
+            in[i] -= in[i-beststep];
+
+    return beststep;
+}
+
+static void inv_pretransform(uint8_t *in, uint8_t *tmp, int len, int step){
+    int i;
+
+    for(i=0; i<step; i++)
+        in[i] += tmp[i];
+    for(i=step; i<len; i++)
+        in[i] += in[i-step];
+    for(i=0; i<step; i++)
+        tmp[i] = in[len-step+i];
+}
+
 static void qsort2(uint8_t **ptr, int *idx, int len){
     int pivot;
     int i=0;
@@ -381,7 +427,7 @@ fprintf(stderr, "idx init done\n");
     6,5 ( 3) -> 689
 */
 
-static int ibwt(uint8_t *in, unsigned int len, unsigned int start, unsigned int histogram[256], int low_mem){
+static int ibwt(uint8_t *in, unsigned int len, unsigned int start, unsigned int histogram[256], int low_mem, int pretype){
     int shift= low_mem*3;
     int shift2= shift + 8;
     int mask= (1<<shift)-1;
@@ -393,6 +439,7 @@ static int ibwt(uint8_t *in, unsigned in
     uint32_t *idx= malloc(idx_len*sizeof(uint32_t));
     uint8_t *idx2=malloc(idx2_len*256*sizeof(uint8_t));
     int packed= idx_len <= 256*256*256 && !shift;
+    uint8_t pret_buf[16]={0};
 
     if(!idx || idx_len >= UINT32_MAX / sizeof(uint32_t)) //FIXME memleak
         return -1;
@@ -458,6 +505,9 @@ static int ibwt(uint8_t *in, unsigned in
                 }
             }
         }
+        if(pretype)
+            inv_pretransform(buffer, pret_buf, len2, pretype);
+
         fwrite(buffer, len2, 1, stdout);
     }
 
@@ -621,17 +671,21 @@ static inline void wb32(uint8_t *p, int 
 
 static unsigned int compress(uint8_t *out, uint8_t *in, unsigned int len){
     uint8_t *tmp= malloc(len);
-    unsigned int start,j, out_size;
+    unsigned int start,j, out_size, pretype;
     int i;
     RangeCoder c;
     uint8_t state[16*512*32+1];
     int mtf[256];
+
+fprintf(stderr, "pretransform\n");
+    pretype= pretransform(in, len);
+
 fprintf(stderr, "bwt\n");
     start= bwt(tmp, in, len);
 fprintf(stderr," done\n");
 
 fprintf(stderr,"range coding (%d %d)\n", len, start);
-    wb32(out, 0    ); out+=4;
+    wb32(out, pretype); out+=4;
     wb32(out, len  ); out+=4;
     wb32(out, start); out+=4;
     wb32(out, 0    ); out+=4;
@@ -684,7 +738,7 @@ fprintf(stderr," done (%d %d %d)\n", len
 static int decompress(FILE *fi, int low_mem){
     uint8_t *in;
     unsigned int in_len;
-    unsigned int out_len,start,i, j;
+    unsigned int out_len,start,i, j, pretype;
     int ret;
     uint8_t *tmp; //FIXME reduce memory needs
     RangeCoder c;
@@ -699,6 +753,7 @@ static int decompress(FILE *fi, int low_
     else if(ret != 16)
         return -1;
 
+    pretype= rb32(tmpX  );
     out_len= rb32(tmpX+4);
     start  = rb32(tmpX+8);
     in_len = rb32(tmpX+12);
@@ -748,7 +803,7 @@ fprintf(stderr," block (%d %d %d)\n", ou
         fprintf(stderr, "error (%d)\n", c.bytestream - c.bytestream_end);
         return -1;
     }
-    ibwt(tmp, out_len, start, histogram, low_mem);
+    ibwt(tmp, out_len, start, histogram, low_mem, pretype);
 
     return out_len;
 }



More information about the Mndiff-dev mailing list