[FFmpeg-devel] [PATCH] RoQ encoder

Fri May 4 23:57:45 CEST 2007

Hi

On Fri, May 04, 2007 at 06:56:31PM +0200, Vitor wrote:
> Hi,
> 
> As suggested in Mike Melanson blog, I modified Eric Lasota's RoQ 
> encoding patch for inclusion in FFmpeg. The main problem with the 
> original patch was that while RoQ is an YUV based codec, the patch 
> expected RGB data from FFmpeg.
> 
> I modified it to work with YUV without doing any conversion to RGB. 
> There were a point where the algorithm calculated the squared difference 
> of the RGB values of two pixel blocks. I changed it to calculate the 
> squared difference of the YUV values. It looks like it didn't worsen the 
> video quality.
> 
> I also renamed the variables and structures not to have the sb3_ prefix 
> anymore, merged a lot of files and changed the indentation.
> 
> This is my first patch, so any suggestion is welcome.

[...]
> +#ifdef CONFIG_ENCODERS

unneeded the file wont be compiled if this isnt defined

> +
> +//#define ROQ_FIRST_FRAME_SIZE     11025
> +//#define ROQ_FRAME_SIZE           735

useless

[...]
> +static void roq_dpcm_table_init()
> +{
> +    unsigned short i;
> +
> +    short diff;
> +    short diff2;
> +    unsigned short baseline;
> +    unsigned short projection;
> +
> +    unsigned short projectionIndex;
> +
> +    projectionIndex = 0;
> +    baseline = 0;
> +    projection = 1;
> +
> +    // Create a table of quick DPCM values
> +    for(i=0;i<MAX_DPCM;i++)
> +    {
> +        // Check the difference of the last projection
> +        // and (possibly) the next projection
> +        diff = i - baseline;
> +        diff2 = i - projection;
> +
> +        if(diff < 0)
> +            diff = -diff;
> +        if(diff2 < 0)
> +            diff2 = -diff2;
> +
> +        // Move the DPCM index up a notch if it's closer
> +        if(diff2 < diff)
> +        {
> +            projectionIndex++;
> +            baseline = projection;
> +            projection = (projectionIndex+1)*(projectionIndex+1);
> +        }
> +        dpcmValues[i] = (unsigned char)projectionIndex;
> +    }
> +}

the following is (significantly) simpler

for(i=0; i<MAX_DPCM; i++){
    int s= ff_sqrt(i);
    int mid= s*s + s;
    dpcmValues[i]= s + (i>mid);
}

[...]
> +    diff = (int)current - (int)(*previous);

useless cast, and there are many more ...

> +
> +    negative = 0;
> +    if(diff < 0)
> +    {
> +        negative = 1;
> +        diff = -diff;
> +    }

negative= diff<0;
diff= FFABS(diff)

[...]
> +    diff = result*result;
> +    if(negative)
> +        diff = -diff;
> +    predicted = (int)(*previous) + diff;
> +
> +    // If it overflows, back off a step
> +    if(predicted > 32767 || predicted < -32768)
> +    {
> +        result--;
> +        diff = result*result;
> +        if(negative)
> +            diff = -diff;
> +        predicted = (int)(*previous) + diff;
> +    }

retry:
    diff = result*result;
    if(negative)
        diff = -diff;
    predicted = *previous + diff;

    if(predicted > 32767 || predicted < -32768){
        result--;
        goto retry;
    }

> +
> +    // Add the sign bit
> +    if(negative)
> +        result |= 128;

unpredictable branches are slow ...

result |= negative<<7;

[...]
> +    if(stereo)
> +    {
> +        // Stereo packet header
> +        out[0] = 0x21;
> +        out[1] = 0x10;
> +
> +        out[2] = (unsigned char)(avctx->frame_size*2);
> +        out[3] = (unsigned char)((avctx->frame_size*2)>>8);
> +        out[4] = (unsigned char)((avctx->frame_size*2)>>16);
> +        out[5] = (unsigned char)((avctx->frame_size*2)>>24);
> +
> +        out[6] = (unsigned char)((context->lastSample[1])>>8);
> +        out[7] = (unsigned char)((context->lastSample[0])>>8);
> +    }
> +    else
> +    {
> +        // Mono packet header
> +        out[0] = 0x20;
> +        out[1] = 0x10;
> +
> +        out[2] = (unsigned char)(avctx->frame_size);
> +        out[3] = (unsigned char)((avctx->frame_size)>>8);
> +        out[4] = (unsigned char)((avctx->frame_size)>>16);
> +        out[5] = (unsigned char)((avctx->frame_size)>>24);
> +
> +        out[6] = (unsigned char)(context->lastSample[0]);
> +        out[7] = (unsigned char)((context->lastSample[0])>>8);
> +    }

code duplication
also see bytestream_put_*

[...]
> +        *out = dpcm_predict(&context->lastSample[0], *in);
> +        in++; out++;
> +        if(stereo)
> +        {
> +            *out = dpcm_predict(&context->lastSample[1], *in);
> +            in++; out++;
> +        }

for(ch=0; ch<avctx->channels; ch++)
    *out++ = dpcm_predict(&context->lastSample[ch], *in++);

[...]

> Index: libavcodec/roqvideoenc.c
> ===================================================================
> --- libavcodec/roqvideoenc.c	(revision 0)
> +++ libavcodec/roqvideoenc.c	(revision 0)

please split video, audio and demuxer in seperate patches
ill review the video part later (maybe after the audio and demuxer part
has passed review and is in svn maybe before ...)

[...]

> Index: libavcodec/allcodecs.c
> ===================================================================
> --- libavcodec/allcodecs.c	(revision 8856)
> +++ libavcodec/allcodecs.c	(working copy)
> @@ -121,7 +121,7 @@
>      REGISTER_DECODER(QPEG, qpeg);
>      REGISTER_DECODER(QTRLE, qtrle);
>      REGISTER_ENCDEC (RAWVIDEO, rawvideo);
> -    REGISTER_DECODER(ROQ, roq);
> +    REGISTER_ENCDEC(ROQ, roq);
>      REGISTER_DECODER(RPZA, rpza);

please align the (

[...]

> Index: libavformat/idroqenc.c
> ===================================================================
> --- libavformat/idroqenc.c	(revision 0)
> +++ libavformat/idroqenc.c	(revision 0)
[...]
> +#include "avformat.h"
> +
> +// RoQ is very sensitive about audio timing.  Too little causes no audio, too
> +// much causes crackling.  To compensate, I'm using a custom packet scheduler.
> +// This will also allow audio packets to be encoded first, compliant with
> +// the standard encoder

the proper way to enforce a custom packet order is to use 
AVOutputFormat.interleave_packet()

[...]

> +static unsigned short get_packet_type(void *b)
> +{
> +    return AV_RL16(b);
> +}
> +
> +static unsigned short get_packet_size(void *b)
> +{
> +    unsigned char *c;
> +
> +    c = b;
> +    c+=2;
> +
> +    return AV_RL32(c);
> +}

senseless wraper functions

> +
> +static int roqenc_write_header(struct AVFormatContext *s)
> +{
> +    roq_stream_context *ctx = s->priv_data;
> +
> +    memset(ctx, 0, sizeof(*ctx));
> +
> +    return 0;
> +}
> +
> +static int roqenc_write_packet(struct AVFormatContext *s, AVPacket *pkt)
> +{
> +    unsigned char header[] = {0x84, 0x10, 0xff, 0xff, 0xff, 0xff, 0x1e, 0x00};
> +    roq_stream_context *ctx = s->priv_data;
> +
> +    if(!ctx->wroteheader)
> +    {
> +        put_buffer(&s->pb, header, 8);
> +        put_flush_packet(&s->pb);
> +        ctx->wroteheader = 1;
> +    }

the header should be written in roqenc_write_header()

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Those who are too smart to engage in politics are punished by being
governed by those who are dumber. -- Plato 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20070504/0953e5d4/attachment.pgp>