[FFmpeg-devel] [PATCH] FFV1 speed tweaks

Jason Garrett-Glaser darkshikari
Fri Aug 8 23:29:51 CEST 2008


On the order of 7-10 clocks faster per pixel, out of 120-160 clocks
total, for encoding and decoding.

Dark Shikari

Index: libavcodec/ffv1.c
===================================================================
--- libavcodec/ffv1.c	(revision 14661)
+++ libavcodec/ffv1.c	(working copy)
@@ -354,10 +354,10 @@
 static inline int encode_line(FFV1Context *s, int w, int_fast16_t
*sample[2], int plane_index, int bits){
     PlaneContext * const p= &s->plane[plane_index];
     RangeCoder * const c= &s->c;
-    int x;
     int run_index= s->run_index;
     int run_count=0;
     int run_mode=0;
+    int_fast16_t *cur_sample[3] = {sample[0],sample[1],sample[2]};

     if(s->ac){
         if(c->bytestream_end - c->bytestream < w*20){
@@ -370,18 +370,17 @@
             return -1;
         }
     }
+    for(; w>0; w--){
+        int diff, context, sign;

-    for(x=0; x<w; x++){
-        int diff, context;
+        context= get_context(s, cur_sample[0], cur_sample[1], cur_sample[2]);
+        diff= cur_sample[0][0] - predict(cur_sample[0], cur_sample[1]);
+
+        /* Negate context and diff if context is negative */
+        sign = context >> 31;
+        context = (sign ^ context) - sign;
+        diff = (sign ^ diff) - sign;

-        context= get_context(s, sample[0]+x, sample[1]+x, sample[2]+x);
-        diff= sample[0][x] - predict(sample[0]+x, sample[1]+x);
-
-        if(context < 0){
-            context = -context;
-            diff= -diff;
-        }
-
         diff= fold(diff, bits);

         if(s->ac){
@@ -413,6 +412,9 @@
             if(run_mode == 0)
                 put_vlc_symbol(&s->pb, &p->vlc_state[context], diff, bits);
         }
+        cur_sample[0]++;
+        cur_sample[1]++;
+        cur_sample[2]++;
     }
     if(run_mode){
         while(run_count >= 1<<ff_log2_run[run_index]){
@@ -707,22 +709,17 @@
 static inline void decode_line(FFV1Context *s, int w, int_fast16_t
*sample[2], int plane_index, int bits){
     PlaneContext * const p= &s->plane[plane_index];
     RangeCoder * const c= &s->c;
-    int x;
     int run_count=0;
     int run_mode=0;
     int run_index= s->run_index;
-
-    for(x=0; x<w; x++){
+    int_fast16_t *cur_sample[2] = {sample[0],sample[1]};
+//1334
+    for(; w>0; w--){START_TIMER;
         int diff, context, sign;
+        context= get_context(s, cur_sample[1], cur_sample[0], cur_sample[1]);
+        sign = context >> 31;
+        context = (sign ^ context) - sign;

-        context= get_context(s, sample[1] + x, sample[0] + x, sample[1] + x);
-        if(context < 0){
-            context= -context;
-            sign=1;
-        }else
-            sign=0;
-
-
         if(s->ac){
             diff= get_symbol(c, p->state[context], 1);
         }else{
@@ -732,31 +729,34 @@
                 if(run_count==0 && run_mode==1){
                     if(get_bits1(&s->gb)){
                         run_count = 1<<ff_log2_run[run_index];
-                        if(x + run_count <= w) run_index++;
+                        if(run_count <= w) run_index++;
                     }else{
-                        if(ff_log2_run[run_index]) run_count =
get_bits(&s->gb, ff_log2_run[run_index]);
+                        /* equivalent to if(ff_log2_run[run_index]) */
+                        if(run_index&0x1C) run_count =
get_bits(&s->gb, ff_log2_run[run_index]);
                         else run_count=0;
                         if(run_index) run_index--;
                         run_mode=2;
                     }
                 }
-                run_count--;
-                if(run_count < 0){
+                if(run_count == 0){
                     run_mode=0;
-                    run_count=0;
                     diff= get_vlc_symbol(&s->gb, &p->vlc_state[context], bits);
                     if(diff>=0) diff++;
-                }else
+                }else{
                     diff=0;
+                    run_count--;
+                }
             }else
                 diff= get_vlc_symbol(&s->gb, &p->vlc_state[context], bits);

 //            printf("count:%d index:%d, mode:%d, x:%d y:%d
pos:%d\n", run_count, run_index, run_mode, x, y,
get_bits_count(&s->gb));
         }

-        if(sign) diff= -diff;
+        diff = (sign ^ diff) - sign;

-        sample[1][x]= (predict(sample[1] + x, sample[0] + x) + diff)
& ((1<<bits)-1);
+        cur_sample[1][0]= (predict(cur_sample[1], cur_sample[0]) +
diff) & ((1<<bits)-1);
+        cur_sample[0]++;
+        cur_sample[1]++;STOP_TIMER("decode line");
     }
     s->run_index= run_index;
 }




More information about the ffmpeg-devel mailing list