[FFmpeg-cvslog] vc2enc: halve allocated table size, refactor and optimize quantization

Fri Feb 26 15:51:52 CET 2016

ffmpeg | branch: master | Rostislav Pehlivanov <atomnuker at gmail.com> | Fri Feb 26 14:47:06 2016 +0000| [2b811e4605ebd570c647f8a4ed0e021b64fc4208] | committer: Rostislav Pehlivanov

vc2enc: halve allocated table size, refactor and optimize quantization

Since coefficients differ only in the last bit when writing to the
bitstream it was possible to remove the sign from the tables, thus
halving them. Also now all quantization is done in the unsigned domain
as the sign is completely separate, which gets rid of the need to do
quantization on 32 bit signed integers.

Overall, this slightly speeds up the encoder depending on the machine.
The commit still generates bit-identical files as before the commit.

Signed-off-by: Rostislav Pehlivanov <atomnuker at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2b811e4605ebd570c647f8a4ed0e021b64fc4208
---

 libavcodec/vc2enc.c     |  108 ++++++++++++++++++++++-------------------------
 libavcodec/vc2enc_dwt.h |    1 -
 2 files changed, 50 insertions(+), 59 deletions(-)

diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c
index 81de82c..53e2f14 100644
--- a/libavcodec/vc2enc.c
+++ b/libavcodec/vc2enc.c
@@ -32,6 +32,8 @@
 /* Quantizations above this usually zero coefficients and lower the quality */
 #define MAX_QUANT_INDEX 50
 
+/* Total range is -COEF_LUT_TAB to +COEFF_LUT_TAB, but total tab size is half
+ * (COEF_LUT_TAB*MAX_QUANT_INDEX) since the sign is appended during encoding */
 #define COEF_LUT_TAB 2048
 
 enum VC2_QM {
@@ -104,7 +106,7 @@ typedef struct VC2EncContext {
     uint8_t quant[MAX_DWT_LEVELS][4];
 
     /* Coefficient LUT */
-    uint32_t *coef_lut_val;
+    uint16_t *coef_lut_val;
     uint8_t  *coef_lut_len;
 
     int num_x; /* #slices horizontally */
@@ -147,7 +149,7 @@ static av_always_inline void put_padding(PutBitContext *pb, int bytes)
         put_bits(pb, bits, 0);
 }
 
-static av_always_inline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val)
+static av_always_inline void put_vc2_ue_uint(PutBitContext *pb, uint16_t val)
 {
     int i;
     int pbits = 0, bits = 0, topbit = 1, maxval = 1;
@@ -192,7 +194,7 @@ static av_always_inline int count_vc2_ue_uint(uint16_t val)
 }
 
 static av_always_inline void get_vc2_ue_uint(uint16_t val, uint8_t *nbits,
-                                               uint32_t *eval)
+                                             uint16_t *eval)
 {
     int i;
     int pbits = 0, bits = 0, topbit = 1, maxval = 1;
@@ -539,29 +541,7 @@ static void encode_picture_start(VC2EncContext *s)
     encode_wavelet_transform(s);
 }
 
-#define QUANT(c)  \
-    c <<= 2;      \
-    c /= qfactor; \
-
-static av_always_inline void coeff_quantize_get(qcoef coeff, int qfactor,
-                                                uint8_t *len, uint32_t *eval)
-{
-    QUANT(coeff)
-    get_vc2_ue_uint(FFABS(coeff), len, eval);
-    if (coeff) {
-        *eval = (*eval << 1) | (coeff < 0);
-        *len += 1;
-    }
-}
-
-static av_always_inline void coeff_quantize_encode(PutBitContext *pb, qcoef coeff,
-                                                   int qfactor)
-{
-    QUANT(coeff)
-    put_vc2_ue_uint(pb, FFABS(coeff));
-    if (coeff)
-        put_bits(pb, 1, coeff < 0);
-}
+#define QUANT(c, qf) (((c) << 2)/(qf))
 
 /* VC-2 13.5.5.2 - slice_band() */
 static void encode_subband(VC2EncContext *s, PutBitContext *pb, int sx, int sy,
@@ -569,23 +549,33 @@ static void encode_subband(VC2EncContext *s, PutBitContext *pb, int sx, int sy,
 {
     int x, y;
 
-    int left   = b->width  * (sx+0) / s->num_x;
-    int right  = b->width  * (sx+1) / s->num_x;
-    int top    = b->height * (sy+0) / s->num_y;
-    int bottom = b->height * (sy+1) / s->num_y;
+    const int left   = b->width  * (sx+0) / s->num_x;
+    const int right  = b->width  * (sx+1) / s->num_x;
+    const int top    = b->height * (sy+0) / s->num_y;
+    const int bottom = b->height * (sy+1) / s->num_y;
 
-    int qfactor = ff_dirac_qscale_tab[quant];
-    uint8_t  *len_lut = &s->coef_lut_len[2*quant*COEF_LUT_TAB + COEF_LUT_TAB];
-    uint32_t *val_lut = &s->coef_lut_val[2*quant*COEF_LUT_TAB + COEF_LUT_TAB];
+    const int qfactor = ff_dirac_qscale_tab[quant];
+    const uint8_t  *len_lut = &s->coef_lut_len[quant*COEF_LUT_TAB];
+    const uint16_t *val_lut = &s->coef_lut_val[quant*COEF_LUT_TAB];
 
     dwtcoef *coeff = b->buf + top * b->stride;
 
     for (y = top; y < bottom; y++) {
         for (x = left; x < right; x++) {
-            if (coeff[x] >= -COEF_LUT_TAB && coeff[x] < COEF_LUT_TAB)
-                put_bits(pb, len_lut[coeff[x]], val_lut[coeff[x]]);
-            else
-                coeff_quantize_encode(pb, coeff[x], qfactor);
+            const int neg = coeff[x] < 0;
+            uint16_t c_abs = FFABS(coeff[x]);
+            if (c_abs < COEF_LUT_TAB) {
+                const uint8_t len  = len_lut[c_abs];
+                if (len == 1)
+                    put_bits(pb, 1, 1);
+                else
+                    put_bits(pb, len + 1, (val_lut[c_abs] << 1) | neg);
+            } else {
+                c_abs = QUANT(c_abs, qfactor);
+                put_vc2_ue_uint(pb, c_abs);
+                if (c_abs)
+                    put_bits(pb, 1, neg);
+            }
         }
         coeff += b->stride;
     }
@@ -594,7 +584,7 @@ static void encode_subband(VC2EncContext *s, PutBitContext *pb, int sx, int sy,
 static int count_hq_slice(VC2EncContext *s, int slice_x,
                           int slice_y, int quant_idx)
 {
-    int x, y, left, right, top, bottom, qfactor;
+    int x, y;
     uint8_t quants[MAX_DWT_LEVELS][4];
     int bits = 0, p, level, orientation;
 
@@ -611,28 +601,29 @@ static int count_hq_slice(VC2EncContext *s, int slice_x,
         bits += 8;
         for (level = 0; level < s->wavelet_depth; level++) {
             for (orientation = !!level; orientation < 4; orientation++) {
-                dwtcoef *buf;
                 SubBand *b = &s->plane[p].band[level][orientation];
 
-                quant_idx = quants[level][orientation];
-                qfactor = ff_dirac_qscale_tab[quant_idx];
+                const int q_idx = quants[level][orientation];
+                const uint8_t *len_lut = &s->coef_lut_len[q_idx*COEF_LUT_TAB];
+                const int qfactor = ff_dirac_qscale_tab[q_idx];
 
-                left   = b->width  * slice_x    / s->num_x;
-                right  = b->width  *(slice_x+1) / s->num_x;
-                top    = b->height * slice_y    / s->num_y;
-                bottom = b->height *(slice_y+1) / s->num_y;
+                const int left   = b->width  * slice_x    / s->num_x;
+                const int right  = b->width  *(slice_x+1) / s->num_x;
+                const int top    = b->height * slice_y    / s->num_y;
+                const int bottom = b->height *(slice_y+1) / s->num_y;
 
-                buf = b->buf + top * b->stride;
+                dwtcoef *buf = b->buf + top * b->stride;
 
                 for (y = top; y < bottom; y++) {
                     for (x = left; x < right; x++) {
-                        qcoef coeff = (qcoef)buf[x];
-                        if (coeff >= -COEF_LUT_TAB && coeff < COEF_LUT_TAB) {
-                            bits += s->coef_lut_len[2*quant_idx*COEF_LUT_TAB + coeff + COEF_LUT_TAB];
+                        uint16_t c_abs = FFABS(buf[x]);
+                        if (c_abs < COEF_LUT_TAB) {
+                            const int len = len_lut[c_abs];
+                            bits += len + (len != 1);
                         } else {
-                            QUANT(coeff)
-                            bits += count_vc2_ue_uint(FFABS(coeff));
-                            bits += !!coeff;
+                            c_abs = QUANT(c_abs, qfactor);
+                            bits += count_vc2_ue_uint(c_abs);
+                            bits += !!c_abs;
                         }
                     }
                     buf += b->stride;
@@ -1122,19 +1113,20 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx)
         goto alloc_fail;
 
     /* Lookup tables */
-    s->coef_lut_len = av_malloc(2*COEF_LUT_TAB*s->q_ceil*sizeof(*s->coef_lut_len));
+    s->coef_lut_len = av_malloc(COEF_LUT_TAB*s->q_ceil*sizeof(*s->coef_lut_len));
     if (!s->coef_lut_len)
         goto alloc_fail;
 
-    s->coef_lut_val = av_malloc(2*COEF_LUT_TAB*s->q_ceil*sizeof(*s->coef_lut_val));
+    s->coef_lut_val = av_malloc(COEF_LUT_TAB*s->q_ceil*sizeof(*s->coef_lut_val));
     if (!s->coef_lut_val)
         goto alloc_fail;
 
     for (i = 0; i < s->q_ceil; i++) {
-        for (j = -COEF_LUT_TAB; j < COEF_LUT_TAB; j++) {
-            uint8_t  *len_lut = &s->coef_lut_len[2*i*COEF_LUT_TAB + COEF_LUT_TAB];
-            uint32_t *val_lut = &s->coef_lut_val[2*i*COEF_LUT_TAB + COEF_LUT_TAB];
-            coeff_quantize_get(j, ff_dirac_qscale_tab[i], &len_lut[j], &val_lut[j]);
+        for (j = 0; j < COEF_LUT_TAB; j++) {
+            uint8_t  *len_lut = &s->coef_lut_len[i*COEF_LUT_TAB];
+            uint16_t *val_lut = &s->coef_lut_val[i*COEF_LUT_TAB];
+            get_vc2_ue_uint(QUANT(j, ff_dirac_qscale_tab[i]),
+                            &len_lut[j], &val_lut[j]);
         }
     }
 
diff --git a/libavcodec/vc2enc_dwt.h b/libavcodec/vc2enc_dwt.h
index 8e1b614..a39a1cd 100644
--- a/libavcodec/vc2enc_dwt.h
+++ b/libavcodec/vc2enc_dwt.h
@@ -26,7 +26,6 @@
 #include <stdint.h>
 
 typedef int16_t dwtcoef;
-typedef int32_t qcoef;   /* Quantization needs more precision */
 
 /* Only Deslauriers-Dubuc (9,7) and LeGall (5,3) supported! */