[Ffmpeg-devel] Trellis Quantization Applied To (A)DPCM

Sun Jun 4 06:45:36 CEST 2006

On Sat, 3 Jun 2006, Loren Merritt wrote:

> On Sat, 3 Jun 2006, Mike Melanson wrote:
>
>> Hi,
>> 	I see that Loren took Michael's suggestion seriously and applied 
>> Trellis quantization to the ADPCM encoder. This reminds me of discussions 
>> on MPlayer-devel about optimizing ADPCM decoding with SIMD instructions. :)
>>
>> 	So, can this be applied to, e.g., Creative YUV? I had wanted to write 
>> an encoder for that at one point but I sort of got stuck wondering about an 
>> optimal (quality-wise) method for creating the delta tables.
>>
>>  http://wiki.multimedia.cx/index.php?title=CYUV
>
> Trellis can tell you the optimal sequence of deltas in each line, but can't 
> determine the optimal table to choose them from.
>
> The obvious heuristic would be: count up how many times each delta appears in 
> the frame, then run K-Means.

Hmm... my initial implementation doesn't see noticeable gains from K-Means 
over just using a constant exponential table.

--Loren Merritt
-------------- next part --------------
Index: allcodecs.c
===================================================================

--- allcodecs.c	(revision 5451)
+++ allcodecs.c	(working copy)
@@ -172,6 +172,9 @@
 #ifdef CONFIG_SNOW_ENCODER
     register_avcodec(&snow_encoder);
 #endif //CONFIG_SNOW_ENCODER
+#ifdef CONFIG_CYUV_ENCODER
+    register_avcodec(&cyuv_encoder);
+#endif //CONFIG_CYUV_ENCODER
 #ifdef CONFIG_ZLIB_ENCODER
     register_avcodec(&zlib_encoder);
 #endif //CONFIG_ZLIB_ENCODER
Index: avcodec.h
===================================================================
--- avcodec.h	(revision 5451)
+++ avcodec.h	(working copy)
@@ -2097,6 +2097,7 @@
 extern AVCodec vcr1_encoder;
 extern AVCodec ffv1_encoder;
 extern AVCodec snow_encoder;
+extern AVCodec cyuv_encoder;
 extern AVCodec mdec_encoder;
 extern AVCodec zlib_encoder;
 extern AVCodec sonic_encoder;
Index: cyuv.c
===================================================================
--- cyuv.c	(revision 5451)
+++ cyuv.c	(working copy)
@@ -39,15 +39,15 @@
 #include "mpegvideo.h"
 
 
-typedef struct CyuvDecodeContext {
+typedef struct CyuvContext {
     AVCodecContext *avctx;
     int width, height;
     AVFrame frame;
-} CyuvDecodeContext;
+} CyuvContext;
 
 static int cyuv_decode_init(AVCodecContext *avctx)
 {
-    CyuvDecodeContext *s = avctx->priv_data;
+    CyuvContext *s = avctx->priv_data;
 
     s->avctx = avctx;
     s->width = avctx->width;
@@ -65,7 +65,7 @@
                              void *data, int *data_size,
                              uint8_t *buf, int buf_size)
 {
-    CyuvDecodeContext *s=avctx->priv_data;
+    CyuvContext *s=avctx->priv_data;
 
     unsigned char *y_plane;
     unsigned char *u_plane;
@@ -168,16 +168,225 @@
 
 static int cyuv_decode_end(AVCodecContext *avctx)
 {
-/*    CyuvDecodeContext *s = avctx->priv_data;*/
+/*    CyuvContext *s = avctx->priv_data;*/
 
     return 0;
 }
 
+static int cyuv_encode_init(AVCodecContext *avctx)
+{
+    CyuvContext *s = avctx->priv_data;
+
+    s->avctx = avctx;
+    s->height = avctx->height;
+    s->width = avctx->width;
+    /* width needs to be divisible by 4 for this codec to work */
+    if (s->width & 0x3)
+        return -1;
+    if (avctx->pix_fmt != PIX_FMT_YUV411P)
+        return -1;
+    avctx->pix_fmt = PIX_FMT_YUV411P;
+    avctx->bits_per_sample= 12;
+    avctx->coded_frame= &s->frame;
+
+    return 0;
+}
+
+static const int8_t exp_deltas[16] = {
+    -128, -64, -32, -16, -8, -4, -2, -1, 0, 1, 2, 4, 8, 16, 32, 64
+};
+
+#define EXCHANGE(a,b) {typeof(a) t = a; a = b; b = t;}
+#undef rand
+
+static uint64_t k_means(int8_t *table, int counts[256], int k)
+{
+    int avg[k], den[k];
+    int pass, done, i, j, v;
+    uint64_t residual;
+    for(pass=0; pass<20; pass++){
+        done = 1;
+        residual = 0;
+        memset(avg, 0, sizeof(avg));
+        memset(den, 0, sizeof(den));
+        for(j=0; j<256; j++){
+            int i, bi=0, bscore=INT_MAX;
+            for(i=0; i<k; i++){
+                int score = abs((int8_t)j - table[i]);
+                if(score < bscore){
+                    bscore = score;
+                    bi = i;
+                }
+            }
+            avg[bi] += (int8_t)j * counts[j];
+            den[bi] += counts[j];
+            residual += (j-bi)*(j-bi) * (uint64_t)counts[j];
+        }
+        for(i=0; i<k; i++){
+            if(den[i]) {
+                v = ROUNDED_DIV(avg[i], den[i]);
+                done &= (v == table[i]);
+            }else{
+                v = rand()&0xff;
+                done &= (v == table[i]) || (pass > 5);
+            }
+            table[i] = v;
+        }
+        if(done)
+            break;
+    }
+    for(i=k; i>0; i--){
+        for(j=0; j<i; j++){
+            if(table[j] > table[j+1])
+                EXCHANGE(table[j], table[j+1]);
+        }
+    }
+    av_log(NULL, AV_LOG_INFO, "table(%2d,%9lx):", pass, residual);
+    for(i=0; i<k; i++)
+        av_log(NULL, AV_LOG_INFO, " %3d", table[i]);
+    av_log(NULL, AV_LOG_INFO, "\n");
+    return residual;
+}
+
+static void build_table(CyuvContext *s, int8_t *table, int plane)
+{
+    const uint8_t *pix = s->frame.data[plane];
+    const int stride = s->frame.linesize[plane];
+    const int h = s->height;
+    const int w = s->width >> (plane ? 2 : 0);
+    int counts[256] = {0};
+    int x, y, i, j;
+    uint64_t residual, r;
+    int8_t buf[16];
+    for(y=0; y<h; y++){
+        for(x=1; x<w; x++)
+            counts[(pix[x]-pix[x-1])&0xff]++;
+        pix += stride;
+    }
+
+    memcpy(table, exp_deltas, 16);
+    residual = k_means(table, counts, 16);
+    if(s->avctx->context_model > 1){
+        for(i=0; i<30; i++){
+            for(j=0; j<16; j++)
+                buf[j] = rand();
+            r = k_means(buf, counts, 16);
+            if(r < residual){
+                memcpy(table, buf, 16);
+                residual = r;
+            }
+        }
+        av_log(NULL, AV_LOG_INFO, "\n");
+    }
+}
+
+static void build_inverse_table(uint8_t *itable, const int8_t *table)
+{
+    int d, i;
+    for(i=1; i<16; i++)
+        assert(table[i] > table[i-1]);
+    for(d=-128; d<128; d++){
+        int bscore = INT_MAX;
+        for(i=0; i<16; i++){
+            int score = abs(d - table[i]);
+            if(score > bscore)
+                break;
+            bscore = score;
+        }
+        itable[d&0xff] = i-1;
+    }
+}
+
+static int quantize_sample(int sample, int *pred, const int8_t *table, const uint8_t *itable)
+{
+    int p = *pred;
+    int bi = itable[(sample - p) & 0xff];
+    *pred = p + table[bi];
+    if(*pred & ~0xff){
+        int i, bscore=INT_MAX;
+        for(i=0; i<16; i++){
+            int score = abs(sample - ((p + table[i]) & 0xff));
+            if(score < bscore){
+                bscore = score;
+                bi = i;
+            }
+        }
+        *pred = (p + table[bi]) & 0xff;
+    }
+    return bi;
+}
+
+#define put_byte(a, b) (*buf++ = (a) + ((b)<<4))
+
+static int cyuv_encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data)
+{
+    CyuvContext *s = avctx->priv_data;
+    AVFrame *pict = data;
+    int8_t *y_table = buf +  0;
+    int8_t *u_table = buf + 16;
+    int8_t *v_table = buf + 32;
+    uint8_t y_itable[256], u_itable[256], v_itable[256];
+    int x, y;
+    const int coded_size = 48 + s->height * s->width * 3/4;
+    assert(buf_size >= coded_size);
+    s->frame = *pict;
+    if(avctx->context_model){
+        build_table(s, y_table, 0);
+        build_table(s, u_table, 1);
+        build_table(s, v_table, 2);
+    }else{
+        memcpy(y_table, exp_deltas, 16);
+        memcpy(u_table, exp_deltas, 16);
+        memcpy(v_table, exp_deltas, 16);
+    }
+    build_inverse_table(y_itable, y_table);
+    build_inverse_table(u_itable, u_table);
+    build_inverse_table(v_itable, v_table);
+    buf += 48;
+    for(y=0; y<s->height; y++){
+        uint8_t *y_ptr = pict->data[0] + y*pict->linesize[0];
+        uint8_t *u_ptr = pict->data[1] + y*pict->linesize[1];
+        uint8_t *v_ptr = pict->data[2] + y*pict->linesize[2];
+        int y_pred = clip_uint8(y_ptr[0]+8) & -16;
+        int u_pred = clip_uint8(u_ptr[0]+8) & -16;
+        int v_pred = clip_uint8(v_ptr[0]+8) & -16;
+        int y0, y1;
+
+        put_byte(y_pred>>4, u_pred>>4);
+        y0 = quantize_sample(y_ptr[1], &y_pred, y_table, y_itable);
+        put_byte(y0, v_pred>>4);
+        y0 = quantize_sample(y_ptr[2], &y_pred, y_table, y_itable);
+        y1 = quantize_sample(y_ptr[3], &y_pred, y_table, y_itable);
+        put_byte(y0, y1);
+        for(x=4; x<s->width; x+=4){
+            y_ptr+=4;
+            u_ptr++;
+            v_ptr++;
+            y0 = quantize_sample(y_ptr[0], &y_pred, y_table, y_itable);
+            y1 = quantize_sample(u_ptr[0], &u_pred, u_table, u_itable);
+            put_byte(y0, y1);
+            y0 = quantize_sample(y_ptr[1], &y_pred, y_table, y_itable);
+            y1 = quantize_sample(v_ptr[0], &v_pred, v_table, v_itable);
+            put_byte(y0, y1);
+            y0 = quantize_sample(y_ptr[2], &y_pred, y_table, y_itable);
+            y1 = quantize_sample(y_ptr[3], &y_pred, y_table, y_itable);
+            put_byte(y0, y1);
+        }
+    }
+    return coded_size;
+}
+
+static int cyuv_encode_end(AVCodecContext *avctx)
+{
+    return 0;
+}
+
+
 AVCodec cyuv_decoder = {
     "cyuv",
     CODEC_TYPE_VIDEO,
     CODEC_ID_CYUV,
-    sizeof(CyuvDecodeContext),
+    sizeof(CyuvContext),
     cyuv_decode_init,
     NULL,
     cyuv_decode_end,
@@ -186,3 +395,13 @@
     NULL
 };
 
+AVCodec cyuv_encoder = {
+    "cyuv",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_CYUV,
+    sizeof(CyuvContext),
+    cyuv_encode_init,
+    cyuv_encode_frame,
+    cyuv_encode_end,
+};
+