[FFmpeg-devel] [PATCH] Mimic encoder

Ramiro Polla ramiro
Wed May 21 01:33:35 CEST 2008


Hello,

>> static VlcSymbol vlc_alphabet_gen[16][8] = {
>> { {  3, 0x00000001,  0, 0x00000000, }, {  4, 0x00000007,  0, 0x00000000, },
>>   {  6, 0x00000027,  0, 0x00000000, }, {  8, 0x000000cf,  0, 0x00000000, },
>>   { 10, 0x0000035f,  0, 0x00000000, }, { 12, 0x00000eff,  0, 0x00000000, },
>>   { 17, 0x0001fd7f,  0, 0x00000000, }, { 17, 0x0001fd01,  0, 0x00000000, }, },
>> { {  5, 0x00000017,  0, 0x00000000, }, {  8, 0x000000e7,  0, 0x00000000, },
>>   {  9, 0x000001d7,  0, 0x00000000, }, { 12, 0x00000f8f,  0, 0x00000000, },
>>   { 15, 0x00007f1f,  0, 0x00000000, }, { 16, 0x0000fe7f,  0, 0x00000000, },
>>   { 27, 0x07fffff9,  7, 0x0000007f, }, { 27, 0x07fffff9,  7, 0x00000001, }, },
>> { {  6, 0x00000037,  0, 0x00000000, }, {  9, 0x000001ef,  0, 0x00000000, },
>>   { 12, 0x00000fd7,  0, 0x00000000, }, { 13, 0x00001fbf,  0, 0x00000000, },
>>   { 25, 0x01ffff7f,  0, 0x00000000, }, { 30, 0x3ffffe3f,  0, 0x00000000, },
>>   { 27, 0x07fffffa,  7, 0x0000007f, }, { 27, 0x07fffffa,  7, 0x00000001, }, },
>> { {  7, 0x00000071,  0, 0x00000000, }, { 10, 0x000003ef,  0, 0x00000000, },
>>   { 17, 0x0001ffdf,  0, 0x00000000, }, { 21, 0x001fffbf,  0, 0x00000000, },
>>   { 26, 0x03ffff1f,  0, 0x00000000, }, { 30, 0x3ffffe7f,  0, 0x00000000, },
>>   { 27, 0x07fffffb,  7, 0x0000007f, }, { 27, 0x07fffffb,  7, 0x00000001, }, },
>> { {  8, 0x000000f1,  0, 0x00000000, }, { 11, 0x000007e3,  0, 0x00000000, },
>>   { 18, 0x0003ffc7,  0, 0x00000000, }, { 22, 0x003fff8f,  0, 0x00000000, },
>>   { 26, 0x03ffff3f,  0, 0x00000000, }, { 30, 0x3ffffebf,  0, 0x00000000, },
>>   { 28, 0x0ffffff8,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
>> { {  8, 0x000000f3,  0, 0x00000000, }, { 11, 0x000007e7,  0, 0x00000000, },
>>   { 18, 0x0003ffcf,  0, 0x00000000, }, { 22, 0x003fff9f,  0, 0x00000000, },
>>   { 26, 0x03ffff5f,  0, 0x00000000, }, { 30, 0x3ffffeff,  0, 0x00000000, },
>>   { 28, 0x0ffffff9,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
>> { {  8, 0x000000f5,  0, 0x00000000, }, { 14, 0x00003feb,  0, 0x00000000, },
>>   { 18, 0x0003ffd7,  0, 0x00000000, }, { 22, 0x003fffaf,  0, 0x00000000, },
>>   { 26, 0x03ffff7f,  0, 0x00000000, }, { 31, 0x7ffffe3f,  0, 0x00000000, },
>>   { 28, 0x0ffffffa,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
>> { {  9, 0x000001f3,  0, 0x00000000, }, { 14, 0x00003fef,  0, 0x00000000, },
>>   { 18, 0x0003ffdf,  0, 0x00000000, }, { 22, 0x003fffbf,  0, 0x00000000, },
>>   { 27, 0x07ffff1f,  0, 0x00000000, }, { 31, 0x7ffffe7f,  0, 0x00000000, },
>>   { 28, 0x0ffffffb,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
>> { {  9, 0x000001f5,  0, 0x00000000, }, { 15, 0x00007fe3,  0, 0x00000000, },
>>   { 19, 0x0007ffc7,  0, 0x00000000, }, { 23, 0x007fff8f,  0, 0x00000000, },
>>   { 27, 0x07ffff3f,  0, 0x00000000, }, { 31, 0x7ffffebf,  0, 0x00000000, },
>>   { 29, 0x1ffffff8,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
>> { { 11, 0x000007f7,  0, 0x00000000, }, { 15, 0x00007fe7,  0, 0x00000000, },
>>   { 19, 0x0007ffcf,  0, 0x00000000, }, { 23, 0x007fff9f,  0, 0x00000000, },
>>   { 27, 0x07ffff5f,  0, 0x00000000, }, { 31, 0x7ffffeff,  0, 0x00000000, },
>>   { 29, 0x1ffffff9,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
>> { { 12, 0x00000ff1,  0, 0x00000000, }, { 15, 0x00007feb,  0, 0x00000000, },
>>   { 19, 0x0007ffd7,  0, 0x00000000, }, { 23, 0x007fffaf,  0, 0x00000000, },
>>   { 27, 0x07ffff7f,  0, 0x00000000, }, { 32, 0xfffffe3f,  0, 0x00000000, },
>>   { 29, 0x1ffffffa,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
>> { { 12, 0x00000ff3,  0, 0x00000000, }, { 15, 0x00007fef,  0, 0x00000000, },
>>   { 19, 0x0007ffdf,  0, 0x00000000, }, { 23, 0x007fffbf,  0, 0x00000000, },
>>   { 28, 0x0fffff1f,  0, 0x00000000, }, { 32, 0xfffffe7f,  0, 0x00000000, },
>>   { 29, 0x1ffffffb,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
>> { { 12, 0x00000ff5,  0, 0x00000000, }, { 16, 0x0000ffe3,  0, 0x00000000, },
>>   { 20, 0x000fffc7,  0, 0x00000000, }, { 24, 0x00ffff8f,  0, 0x00000000, },
>>   { 28, 0x0fffff3f,  0, 0x00000000, }, { 32, 0xfffffebf,  0, 0x00000000, },
>>   { 30, 0x1fff7400,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
>> { { 12, 0x00000ff7,  0, 0x00000000, }, { 16, 0x0000ffe7,  0, 0x00000000, },
>>   { 20, 0x000fffcf,  0, 0x00000000, }, { 24, 0x00ffff9f,  0, 0x00000000, },
>>   { 28, 0x0fffff5f,  0, 0x00000000, }, { 32, 0xfffffeff,  0, 0x00000000, },
>>   { 30, 0x3ffffff9,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
>> { { 13, 0x00001ff1,  0, 0x00000000, }, { 16, 0x0000ffeb,  0, 0x00000000, },
>>   { 20, 0x000fffd7,  0, 0x00000000, }, { 24, 0x00ffffaf,  0, 0x00000000, },
>>   { 28, 0x0fffff7f,  0, 0x00000000, }, { 27, 0x07fffff8,  6, 0x0000003f, },
>>   { 30, 0x3ffffffa,  7, 0x0000007f, }, {  0, 0x00000000,  0, 0x00000000, }, },
>> { { 13, 0x00001ff3,  0, 0x00000000, }, {  2, 0x00000003,  0, 0x00000000, },
>>   {  3, 0x00000007,  0, 0x00000000, }, { 31, 0x7ffffffb,  4, 0x0000000f, },
>>   {  5, 0x0000001f,  0, 0x00000000, }, {  6, 0x0000003f,  0, 0x00000000, },
>>   {  7, 0x0000007f,  0, 0x00000000, }, {  0, 0x00000000,  0, 0x00000000, }, },
>> };
> 
> This looks duplicated from mimic.c

I couldn't find a way to split it further or reuse values from mimic.c. 
I don't think they can be reused.

> [...]
>> static int mimic_encode_init(AVCodecContext *avctx)
>> {
>>     MimicContext *ctx = avctx->priv_data;
>>     int i;
>>
>>     if(!(avctx->width == 160 && avctx->height == 120) &&
>>        !(avctx->width == 320 && avctx->height == 240)) {
>>         av_log(avctx, AV_LOG_ERROR, "size must be 320x240 or 160x120\n");
>>         return -1;
>>     }
>>
>>     ctx->avctx = avctx;
>>
>>     for (i = 0 ; i < 3 ; i++) {
>>         ctx->num_vblocks[i] = -((-avctx->height) >> (3 + !!i));
>>         ctx->num_hblocks[i] =     avctx->width   >> (3 + !!i) ;
>>     }
>>
>>     ctx->cur_index = 15;
>>     ctx->num_coeffs = 28;
>>
>>     for (i = 0; i < 16; i++) {
>>         if(avctx->get_buffer(avctx, &ctx->buf_ptrs[i])) {
>>             av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
>>             return -1;
>>         }
>>         ff_mimic_prepare_avpic(ctx, &ctx->flipped_ptrs[i],
>>               (AVPicture*) &ctx->buf_ptrs    [i]);
>>     }
>>
>>     /* TODO Add a way to get quality per frame from the context. */
>>     ctx->quality = ENCODER_QUALITY_DEFAULT;
>>
> 
>>     avcodec_get_frame_defaults((AVFrame*)&ctx->picture);
>>     avctx->coded_frame = (AVFrame*)&ctx->picture;
> 
> senseless casts

Removed.

>> static void vlc_encode_block(MimicContext *ctx,
>>                               DCTELEM *idct_block, const DCTELEM *dct_block,
>>                               int num_coeffs, int qscale)
>> {
>>     const int qscale_dec = qscale << 2;
>>     int num_zeroes = 0;
>>     int value;
>>     int i;
>>
>>     memset(idct_block, 0, sizeof(DCTELEM)*64);
>>
>>     value = shift_rnd(dct_block[0], 6);
>>     idct_block[0] = value << 3;
>>
>>     /* The DC value is written out as is. */
>>     put_bits(&ctx->pb, 8, value);
>>
>>     for (i = 1; i < num_coeffs && num_zeroes <= 14; i++) {
>                                                ^^ ^^
> If you cannot encode >14 zeros but there are >14 zeros then you should
> check if its better to encoder the element most different from zero as
> non zero or to encode the rest of the block as non zero.

It becomes harder to do that with dct_quantize, since I don't have 
access to fdct's unquantized output. Do you think this is so important 
that it'd be worth re-fdct'ing and quantizing again in those cases?

>>         int coeff;
>>         value = dct_block[ff_zigzag_transposed[i]];
>>
>>         if(i < 3) {
>>             value = av_clip(shift_rnd(value, 7), -120, 120);
>>             coeff = value << 4;
>>         } else {
> 
>>             value = av_rescale(shift_rnd(value,5),1001,qscale);
> 
> no, double rounding/quantization is not acceptable
> 
> Also we have existing dct_quantize() code which also exists in MMX&co, 
> that should be used. And if needed be cleaned up so it is cleanly useable.

Done.

>>             value = av_clip(value, -120, 120);
>>             coeff = (value * qscale_dec) / 1001;
>>         }
>>
>>         idct_block[ctx->scantable.permutated[i]] = coeff;
>>
>>         if(value) {
>>             VlcSymbol sym = vlc_alphabet[num_zeroes][FFABS(value) - 1];
>>
> 
>>             if(sym.length1 <= 0)
>>                 break;
> 
> When can this be true?

Never.

>> static uint8_t *encode_diff(MimicContext *ctx, int plane, int is_chroma,
>>                             uint8_t *cur, uint8_t *prev, int offset,
>>                             int stride, int rows)
>> {
>>     uint8_t *ret = NULL;
>>     int match;
>>     int i;
>>
>>     match = ctx->dsp.sse[1](NULL, prev, cur, stride, rows);
> 
> This should use the function selected with mb_cmp

Done, but I couldn't find a clean way to adapt the results into a 
normalized range, so I experimentally got thresholds that gave about the 
same results.

>>     if(match < threshold[is_chroma]) {
>>         put_bits(&ctx->pb, 1, !is_chroma);
>>         ret = prev;
>>     } else if(!is_chroma) {
>>         int num_backrefs = av_clip(ctx->avctx->frame_number - 1, 0, 15);
>>         int best_match = threshold[is_chroma];
>>         int best_index = 0;
>>
>>         put_bits(&ctx->pb, 1, 0);
>>
>>         for (i = 1; i <= num_backrefs; i++) {
>>             int backref = (ctx->cur_index + i) & 15;
>>             uint8_t *backbuf = ctx->flipped_ptrs[backref].data[plane] + offset;
>>
>>             match = ctx->dsp.sse[1](NULL, backbuf, cur, stride, rows);
>>
>>             if(match < best_match) {
>>                 best_index = i;
>>                 best_match = match;
>>             }
>>         }
>>
>>         if(best_index) {
>>             int backref = (ctx->cur_index + best_index) & 15;
>>
>>             ret = ctx->flipped_ptrs[backref].data[plane] + offset;
>>             put_bits(&ctx->pb, 1, 1);
>>             put_bits(&ctx->pb, 4, best_index);
>>         }
>>     }
> 
> You should encode the block in all possible choices and select the one
> which minimizes SSE + lambda2*bitrate. Where lambda2 is a constant set
> based on quality. At least when mb_decission == FF_MB_DECISION_RD
> if mb_decission is something else then you can use such a heuristic
> as above ...
> 
> 
> lambda= AVFrame.quality;
> lambda2= (lambda*lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
> 
> and then minimize (SSE<<FF_LAMBDA_SHIFT) + lambda2*bitrate
> 
> Why is this better than a simple threshold?
> Well, if you have 2 independant blocks and you choose for each the encoding
> which minimizes SSE[block_i] + C*bitrate[block_i] then its obvious that
> both together will be encoded so as SSE + C*bitrate of both are at their
> global minimum.
> 
> Why is this mimimum a good choice?
> Because each such minimum for each specific constant corresponds to a
> encoding which maximizes the quality (minimizes sse) for a given bitrate.

Should I test only in backreferences, or with the encoding too? It's 
always 1 bit to copy from previous, 6 bits to copy from backreferences, 
and at a bunch of bits (normally at least 30 to up to hundreds) to 
encode a new block.

In SSE + C*bitrate, I didn't really understand what value C should be.

>>     return ret;
>> }
>>
> 
>> static void encode_plane(MimicContext *ctx, int plane, int is_pframe)
>> {
>>     const int is_chroma = !!plane;
>>     const int stride = ctx->flipped_ptrs[ctx->cur_index ].linesize[plane];
>>     uint8_t *cur     = ctx->flipped_ptrs[ctx->cur_index ].data    [plane];
>>     uint8_t *prev    = ctx->flipped_ptrs[ctx->prev_index].data    [plane];
>>     const int qscale = av_clip(10000-ctx->quality,is_chroma?1000:2000,10000);
>>     int rows_shift = 0;
>>     int offset = 0;
>>     int x, y;
>>
> 
>>     /* Bleed bottom line for 160x120 videos */
>>     if(plane && ctx->avctx->height & 15) {
>>         ctx->dsp.draw_edges(cur, stride,
>>                             ctx->avctx->width>>1, ctx->avctx->height>>1, 4);
>>         rows_shift = 8;
>>     }
> 
> Reflection is better then duplication for "invissible pixels" quality wise
> IIRC.

Reflected.

I also removed the IDCT. The output file sizes were from 30% to 400% 
bigger than no IDCT, and the quality wasn't so great. I don't think I 
really need it since it's either copy or encode. There's no motion 
estimation or prediction of any kind.

I tested with a bunch of inputs, ranging from cif to good webcams and 
noisy webcams. Using IDCT made the encoder decide to not copy quite 
frequently, leading to lots of updates on static parts of the image, 
which is kind of annoying. If I raised the threshold, the quality on 
moving parts would be crap.

Ramiro Polla
-------------- next part --------------
A non-text attachment was scrubbed...
Name: mimicenc.c
Type: text/x-csrc
Size: 17765 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080521/b0196745/attachment.c>



More information about the ffmpeg-devel mailing list