[FFmpeg-cvslog] r24468 - in trunk/libavcodec: vp56.h vp8.c vp8data.h

conrad subversion
Fri Jul 23 23:46:17 CEST 2010


Author: conrad
Date: Fri Jul 23 23:46:17 2010
New Revision: 24468

Log:
Decode DCT tokens by branching to a different code path for each branch
on the huffman tree, instead of traversing the tree in a while loop.

Based on the similar optimization in libvpx's detokenize.c

10% faster at normal bitrates, and 30% faster for high-bitrate intra-only

Modified:
   trunk/libavcodec/vp56.h
   trunk/libavcodec/vp8.c
   trunk/libavcodec/vp8data.h

Modified: trunk/libavcodec/vp56.h
==============================================================================
--- trunk/libavcodec/vp56.h	Fri Jul 23 23:46:14 2010	(r24467)
+++ trunk/libavcodec/vp56.h	Fri Jul 23 23:46:17 2010	(r24468)
@@ -226,6 +226,24 @@ static inline int vp56_rac_get_prob(VP56
     return bit;
 }
 
+// branchy variant, to be used where there's a branch based on the bit decoded
+static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
+{
+    unsigned long code_word = vp56_rac_renorm(c);
+    unsigned low = 1 + (((c->high - 1) * prob) >> 8);
+    unsigned low_shift = low << 8;
+
+    if (code_word >= low_shift) {
+        c->high     -= low;
+        c->code_word = code_word - low_shift;
+        return 1;
+    }
+
+    c->high = low;
+    c->code_word = code_word;
+    return 0;
+}
+
 static inline int vp56_rac_get(VP56RangeCoder *c)
 {
     unsigned int code_word = vp56_rac_renorm(c);

Modified: trunk/libavcodec/vp8.c
==============================================================================
--- trunk/libavcodec/vp8.c	Fri Jul 23 23:46:14 2010	(r24467)
+++ trunk/libavcodec/vp8.c	Fri Jul 23 23:46:17 2010	(r24468)
@@ -800,36 +800,61 @@ static int decode_block_coeffs(VP56Range
                                uint8_t probs[8][3][NUM_DCT_TOKENS-1],
                                int i, int zero_nhood, int16_t qmul[2])
 {
-    int token, nonzero = 0;
-    int offset = 0;
+    uint8_t *token_prob;
+    int nonzero = 0;
+    int coeff;
 
-    for (; i < 16; i++) {
-        token = vp8_rac_get_tree_with_offset(c, vp8_coeff_tree, probs[vp8_coeff_band[i]][zero_nhood], offset);
+    do {
+        token_prob = probs[vp8_coeff_band[i]][zero_nhood];
 
-        if (token == DCT_EOB)
-            break;
-        else if (token >= DCT_CAT1) {
-            int cat = token-DCT_CAT1;
-            token = vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]);
-            token += 3 + (2<<cat);
-        }
+        if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
+            return nonzero;
 
-        // after the first token, the non-zero prediction context becomes
-        // based on the last decoded coeff
-        if (!token) {
+skip_eob:
+        if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
             zero_nhood = 0;
-            offset = 1;
-            continue;
-        } else if (token == 1)
+            token_prob = probs[vp8_coeff_band[++i]][0];
+            if (i < 16)
+                goto skip_eob;
+            return nonzero; // invalid input; blocks should end with EOB
+        }
+
+        if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
+            coeff = 1;
             zero_nhood = 1;
-        else
+        } else {
             zero_nhood = 2;
 
+            if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
+                coeff = vp56_rac_get_prob(c, token_prob[4]);
+                if (coeff)
+                    coeff += vp56_rac_get_prob(c, token_prob[5]);
+                coeff += 2;
+            } else {
+                // DCT_CAT*
+                if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
+                    if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
+                        coeff  = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
+                    } else {                                    // DCT_CAT2
+                        coeff  = 7;
+                        coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
+                        coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
+                    }
+                } else {    // DCT_CAT3 and up
+                    int a = vp56_rac_get_prob(c, token_prob[8]);
+                    int b = vp56_rac_get_prob(c, token_prob[9+a]);
+                    int cat = (a<<1) + b;
+                    coeff  = 3 + (8<<cat);
+                    coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]);
+                }
+            }
+        }
+
         // todo: full [16] qmat? load into register?
-        block[zigzag_scan[i]] = (vp8_rac_get(c) ? -token : token) * qmul[!!i];
-        nonzero = i+1;
-        offset = 0;
-    }
+        block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
+        nonzero = ++i;
+    } while (i < 16);
+
     return nonzero;
 }
 

Modified: trunk/libavcodec/vp8data.h
==============================================================================
--- trunk/libavcodec/vp8data.h	Fri Jul 23 23:46:14 2010	(r24467)
+++ trunk/libavcodec/vp8data.h	Fri Jul 23 23:46:17 2010	(r24468)
@@ -329,21 +329,6 @@ static const uint8_t vp8_coeff_band[16] 
     0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7
 };
 
-static const int8_t vp8_coeff_tree[NUM_DCT_TOKENS-1][2] =
-{
-    { -DCT_EOB, 1 },                // '0'
-     { -DCT_0, 2 },                 // '10'
-      { -DCT_1, 3 },                // '110'
-       { 4, 6 },
-        { -DCT_2, 5 },              // '11100'
-         { -DCT_3, -DCT_4 },        // '111010', '111011'
-        { 7, 8 },
-         { -DCT_CAT1, -DCT_CAT2 },  // '111100', '111101'
-         { 9, 10 },
-          { -DCT_CAT3, -DCT_CAT4 }, // '1111100', '1111101'
-          { -DCT_CAT5, -DCT_CAT6 }, // '1111110', '1111111'
-};
-
 static const uint8_t vp8_dct_cat1_prob[] = { 159, 0 };
 static const uint8_t vp8_dct_cat2_prob[] = { 165, 145, 0 };
 static const uint8_t vp8_dct_cat3_prob[] = { 173, 148, 140, 0 };
@@ -351,10 +336,9 @@ static const uint8_t vp8_dct_cat4_prob[]
 static const uint8_t vp8_dct_cat5_prob[] = { 180, 157, 141, 134, 130, 0 };
 static const uint8_t vp8_dct_cat6_prob[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
 
-static const uint8_t * const vp8_dct_cat_prob[6] =
+// only used for cat3 and above; cat 1 and 2 are referenced directly
+static const uint8_t * const vp8_dct_cat_prob[] =
 {
-    vp8_dct_cat1_prob,
-    vp8_dct_cat2_prob,
     vp8_dct_cat3_prob,
     vp8_dct_cat4_prob,
     vp8_dct_cat5_prob,



More information about the ffmpeg-cvslog mailing list