[FFmpeg-devel] [PATCH] RoQ video encoder

Thu May 10 11:33:38 CEST 2007

Hi,

my 2 cents ;)

Vitor wrote:
> Hi,
>
> Now that the muxer and the audio encoder are in svn, I'm sending the
> video encoder. Suggestions (and criticism) expected and welcome!
>
> -Vitor
> ------------------------------------------------------------------------
>
> Index: libavcodec/roqvideoenc.c
> ===================================================================
> --- libavcodec/roqvideoenc.c	(revision 0)
> +++ libavcodec/roqvideoenc.c	(revision 0)
> +/*
> + * The functions prefixed by nq_ are a modified version of NeuQuant designed to
> + * operate on an arbitrary number of color elements, turning it into a codebook
> + * generator.  Also made it  thread-safe. The following copyright
> + * notice applies to these functions.
> + *
> + * Modifications (c)2004 Eric Lasota/Orbiter Productions
> + *               (c)2007 Vitor
> +*/
>   

nit: lacks a space

> +/**
> + * Returns MSE between two YUV blocks
> + */
> +static inline int squared_diff_yuv(const roq_pixel_t *a, int awidth, const roq_pixel_t *b, int bwidth, int checkwidth, int checkheight)
> +{
> +    int diff=0;
> +    int subDiffs[3];
> +    int x;
> +
> +    while(checkheight) {
>   

while(checkheight--) ?

> +        x = checkwidth;
> +        while(x) {
>   

while(x--) ?

> +#define ENLARGE_ELEMENT(x,y)    \
> +    src = &image4[(y*4)+x];\
> +    memcpy(&image8[(y*16)+(x*2)], src, sizeof(roq_pixel_t));\
> +    memcpy(&image8[(y*16)+(x*2)+1], src, sizeof(roq_pixel_t));\
> +    memcpy(&image8[(y*16)+(x*2)+8], src, sizeof(roq_pixel_t));\
> +    memcpy(&image8[(y*16)+(x*2)+9], src, sizeof(roq_pixel_t))
> +
>   

i would add some parenthesis around x and y

> +static void free_temp_data(roq_tempdata_t *tempData)
> +{
> +    if (tempData->cel_evals)
> +        av_free(tempData->cel_evals);
> +    if (tempData->plists)
> +        av_free(tempData->plists);
> +    if (tempData->yuvClusters)
> +        av_free(tempData->yuvClusters);
> +    if (tempData->sortOptions)
> +        av_free(tempData->sortOptions);
> +    if (tempData->sortOptionsSorted)
> +        av_free(tempData->sortOptionsSorted);
> +    if (tempData->reconstruct)
> +        av_free(tempData->reconstruct);
> +    if (tempData->outbuffer)
> +        av_free(tempData->outbuffer);
> +}
>   

if's aren't needed

> +
> +/**
> + * Initializes cel evaluators and sets their source coordinates
> + */
> +static int create_cel_evals(roq_encoder_t *enc, roq_tempdata_t *tempData)
> +{
> +    int width, height;
> +    int n,x,y;
> +
> +    width = enc->width;
> +    height= enc->height;
> +
>   

why not use directly enc->width/height ?

> +static int initialize_single_possibility_list(roq_possibility_list_t *plist, int fsk)
> +{
> +    int i,j,k,l;
> +    int n;
> +    int firstAllowed;
> +
> +    plist->p[0].evalType = ROQ_EVALTYPE_MOTION;
> +    plist->p[0].allowed = (fsk >= 1);
> +
> +    plist->p[1].evalType = ROQ_EVALTYPE_SKIP;
> +    plist->p[1].allowed = (fsk >= 2);
> +
> +    plist->p[2].evalType = ROQ_EVALTYPE_CODEBOOK;
> +    plist->p[2].allowed = 1;
> +
> +    n = 3;
> +
>   

why not initialize it when defining it ?

> +    if (fsk >= 2)
> +        firstAllowed = 0;
> +    else if (fsk >= 1)
> +        firstAllowed = 1;
> +    else
> +        firstAllowed = 2;
> +
> +    for (i=firstAllowed; i<4; i++)
> +        for (j=firstAllowed; j<4; j++)
> +            for (k=firstAllowed; k<4; k++)
> +               for (l=firstAllowed; l<4; l++) {
> +                   plist->p[n].evalType = ROQ_EVALTYPE_SUBDIVIDE;
> +                   plist->p[n].allowed = 1;
> +                   plist->p[n].subEvalTypes[0] = i;
> +                   plist->p[n].subEvalTypes[1] = j;
> +                   plist->p[n].subEvalTypes[2] = k;
> +                   plist->p[n].subEvalTypes[3] = l;
> +                   n++;
> +               }
> +
> +    while(n < ROQ_MAX_POSSIBILITIES) {
> +        plist->p[n].allowed = 0;
> +        n++;
> +    }
>   

plist->p[n++].allowed

> +/**
> + * Initializes all possibility lists
> + */
> +static int create_possibility_lists(roq_encoder_t *enc, roq_tempdata_t *tempData)
> +{
> +    roq_possibility_list_t *plists;
> +    int max, fsk;
> +
> +    max = enc->width*enc->height/64;
> +
>   

/64 == >>6

> +    tempData->plists = plists = av_malloc(sizeof(roq_possibility_list_t) * max);
> +    if (!plists)
> +        return 0;
> +
>   

maybe this could be cool to return a no mem error and handle it, no ?

> +    fsk = enc->framesSinceKeyframe;
> +    while (max) {
> +        initialize_single_possibility_list(plists, fsk);
>   

while(max--)
    initialize_single_possibility_list(plists++, fsk);

> +/**
> + * Creates YUV clusters for the entire image
> + */
> +static void create_clusters(const roq_pixel_t *image, int w, int h, roq_yuvcluster4_t *yuvClusters)
> +{
> +  /* Although it's a 4x4 block, it's treated like an 8x2 block during this */
>   

indentation

> +    roq_pixel_t blocks2[16];
> +    int x, y, i, i2;
> +
> +    int c_average;
> +
> +    for (y=0; y<h; y+=4) {
> +        for (x=0; x<w; x+=4) {
> +            /* Copy data */
> +            blit(image + (y*w)+x, w, &blocks2[0], 8, 2, 2);
> +            blit(image + (y*w)+x+2, w, &blocks2[2], 8, 2, 2);
> +            blit(image + ((y+2)*w)+x, w, &blocks2[4], 8, 2, 2);
> +            blit(image + ((y+2)*w)+x+2, w, &blocks2[6], 8, 2, 2);
> +
> +            /* Convert to mini-images */
> +            i2 = 0;
> +            for (i=0; i<4; i++) {
>   

is there really a need for i2 ?

> +/**
> + * Converts a 2x2 codebook list into cached mini-images
> + */
> +void convert_cb2_to_mini_images(roq_cb2_entry_t *cb2, int numCB2)
> +{
> +    int i;
> +
> +    while(numCB2) {
> +
>   

while(numCB2--)

> +/**
> + * Converts a 4x4 codebook list into cached mini-images
> + */
> +static void convert_cb4_to_mini_images(roq_cb4_entry_t *cb4, roq_cb2_entry_t *cb2, int numCB4)
> +{
> +    while(numCB4) {
>   

while(numCB4--)

> +/**
> + * Template code to find the codebook with the lowest median squared error from an image
> + */
> +#define GET_LOWEST_CB_MSE(FUNCT, CBTYPE, COMMAND1, COMMAND2) \
>   

can't you find a more accurate name for COMMAND1/2 ?

> +static int generate_new_codebooks(roq_encoder_t *enc, roq_tempdata_t *tempData, const roq_pixel_t *image)
> +{
> +    int w, h;
> +    int numCB2, numCB4, max;
> +    roq_yuvcluster4_t *results4;
> +    roq_yuvcluster2_t *results2;
> +    roq_codebooks_t *codebooks;
> +    int i,n;
> +
> +    w = enc->width;
> +    h = enc->height;
>   

why not use directly enc->width/height, tempData->codebooks ?
(there are more like that afterwards)

> +    max = w*h/16;
> +
> +    tempData->yuvClusters = av_malloc(sizeof(roq_yuvcluster4_t)*max);
> +    if (!tempData->yuvClusters)
> +        return 0;
> +
>   

error code management ?

> +/**
> + * Performs motion searching on an image at an offset, sets outDX and outDY to motion offset
> + */
> +static int motion_search(roq_encoder_t *enc, const roq_pixel_t *image, int x, int y, int8_t *outDX, int8_t *outDY, uint32_t d)
> +{
> +    roq_motionsearch_vector_t offsets[9] = {
> +        {0,0},
> +        {0,-1},
> +        {-1,-1},
> +        {-1,0},
> +        {-1,1},
> +        {0,1},
> +        {1,1},
> +        {1,0},
> +        {1,-1},
> +    };
> +
> +    int diffs[9];
> +    int diffPick, lowestDiff;
> +
> +    int w,h,i;
> +
> +    uint32_t rx,ry;
> +
> +    int finalDX, finalDY;
> +
> +    int step;
> +
> +    w = enc->width;
> +    h = enc->height;
> +
> +    finalDX = 0;
> +    finalDY = 0;
> +
> +    /* Simple three-step search */
> +
> +    /* The first cel is centered and recycled, so it has to be calculated for the first one now */
> +    diffs[0] = squared_diff_yuv(image + (y*w)+x, w, enc->frameHistory1 + (y*w)+x, w, d, d);
> +
> +    step = 8;
> +    while(step != 1) {
> +        step >>= 1;
> +
>   

you can also define step = 4 and shift it at the end of the while loop

> +/**
> + * Loads possibility lists with actual data for one block,
> + * assigning all possibilities a cached MSE and bit consumption
> + */
> +static int gather_possibility_data_for_block(roq_possibility_list_t *plist, roq_cel_evaluation_t *celEval)
> +{
> +    int i,j;
> +    roq_possibility_t *p;
> +
> +    for (i=0; i<ROQ_MAX_POSSIBILITIES; i++) {
> +        p = plist->p+i;
> +        if (!p->allowed)
> +            continue;
> +
> +        if (p->evalType == ROQ_EVALTYPE_SKIP) {
> +            p->codeConsumption = 1;
> +            p->byteConsumption = 0;
> +            p->mse = celEval->eval_mse[ROQ_EVALTYPE_SKIP];
> +        } else if (p->evalType == ROQ_EVALTYPE_MOTION || p->evalType == ROQ_EVALTYPE_CODEBOOK) {
> +            p->codeConsumption = 1;
> +            /* 3.11 - Was = 0, oops */
> +            p->byteConsumption = 1;
> +            p->mse = celEval->eval_mse[p->evalType];
> +        } else { //if (p->evalType == ROQ_EVALTYPE_SUBDIVIDE)
> +            p->codeConsumption = 5;        // 1 for main code, 4 for the subcodes
> +            p->byteConsumption = 0;
> +            p->mse = 0;
> +
> +            for (j=0; j<4; j++) {
> +                p->mse += celEval->subCels[j].eval_mse[p->subEvalTypes[j]];
> +                //if (p->subEvalTypes[j] == ROQ_EVALTYPE_SKIP)
> +                //{
> +                //}
> +                //else
>   

to be removed ?

> +                if (p->subEvalTypes[j] == ROQ_EVALTYPE_MOTION || p->subEvalTypes[j] == ROQ_EVALTYPE_CODEBOOK)
> +                    p->byteConsumption++;
> +                else if (p->subEvalTypes[j] == ROQ_EVALTYPE_SUBDIVIDE)
> +                    p->byteConsumption += 4;
> +            }
> +        }
> +
> +        p->combinedBitConsumption = (p->codeConsumption * 1) + (p->byteConsumption * 4);
>   

is the * 1 really necessary ?

> +/**
> + * Template code for both add and subtract size calc modifiers
> + */
> +#define SIZE_CALC_BASE_CODE        \
> +    uint32_t cb4Changes[4];\
> +    uint32_t cb2Changes[16];\
> +    int numCB4Changes=0;\
> +    int numCB2Changes=0;\
> +    int argumentsChange=0;\
> +    int codeChange=0;\
> +    int i;\
> +\
> +    codeChange = 1;\
>   

why initializing it to 0 if you set it to 1 just after ?

> +#define SPOOL_MOTION(dx, dy)    \
> +do {\
> +    uint8_t arg, ax, ay;\
> +    ax = 8 - (uint8_t)dx;\
> +    ay = 8 - (uint8_t)dy;\
> +    arg = (uint8_t)(((ax&15)<<4) | (ay&15));\
> +    SPOOL_ARGUMENT(arg);\
> +} while(0)
> +
>   

here too, i would add () around dx and dy

> +#define SPOOL_TYPECODE(type)        \
> +do {\
> +    typeSpool |= (type & 3) << (14 - typeSpoolLength);\
>   

and here around type

> +int roq_encode_video(roq_encoder_t *enc, const roq_pixel_t *rgbData, writebuffer_t *wb)
> +{
> +    int width, height;
> +    roq_tempdata_t tempData;
> +    roq_pixel_t *img;
> +
> +    memset(&tempData, 0, sizeof(tempData));
> +
> +    width = enc->width;
> +    height = enc->height;
> +
> +    if (!create_cel_evals(enc, &tempData)) {
> +        free_temp_data(&tempData);
>   

IIRC, the only error case is that memory couldn't have been allocated, no ?

> +        return 0;
> +    }
> +
> +    if (!create_possibility_lists(enc, &tempData)) {
> +        free_temp_data(&tempData);
>   
here too
(maybe some more afterwards too)
maybe you can add a label and goto it in case of error, to avoid
duplicating all those:
free();
return 0;

> +/**
> + * Initialise network in range (0,0,0) to (255,255,255) and set parameters
> + */
> +static inline void nq_init_net(neuquant_instance_t *nqi, unsigned char *thepic, int len, int sample, int num_colors, int num_bias)
> +{
> +    register int i,j;
> +    register int *p;
> +
>   

does it change anything to declare them as register ?

> +    nqi->network  = (int **) av_malloc (nq_netsize * sizeof(int *));
> +    for (i=0; i < nq_netsize; i++)
> +        nqi->network[i] = (int *) av_malloc ((num_colors+1)*sizeof(int));
> +
>   

unneeded cast

> +/**
> + * Output colour map
> + */
> +static inline void nq_dump_colormap(neuquant_instance_t *nqi, unsigned char *c)
> +{
> +    int i,j;
> +
> +    for (i=0; i<nq_netsize; i++) {
> +        for (j=0; j<nqi->num_colors; j++) {
> +            *c = nqi->network[i][j];
> +            c++;
>   

*c++ = ...

> +/**
> + * Move neuron i towards biased (b,g,r) by factor alpha
> + */
> +static inline void nq_alter_single(neuquant_instance_t *nqi, int alpha, int i, int *colors)
> +{
> +    register int *n;
> +    int j;
> +
> +    n = nqi->network[i];                /* alter hit neuron */
> +
> +    for (j=0; j<nqi->num_colors; j++) {
> +        *n -= (alpha*(*n - colors[j])) / nq_initalpha;
> +        n++;
>   

*n++ -= ...
(there are others)

> +/**
> + * Move adjacent neurons by precomputed alpha*(1-((i-j)^2/[r]^2)) in radpower[|i-j|]
> + */
> +static inline void nq_alter_neigh(neuquant_instance_t *nqi, int rad, int i, int *colors)
> +{
> +    register int j,k,lo,hi,a;
> +    register int *p, *q;
> +    int c;
> +
> +    lo = i-rad;   if (lo<-1) lo=-1;
> +    hi = i+rad;   if (hi>nq_netsize) hi=nq_netsize;
> +
> +    j = i+1;
> +    k = i-1;
> +    q = nqi->radpower;
> +    while ((j<hi) || (k>lo)) {
> +        a = (*(++q));
>   

superfluous (())

> +/**
> + * Main Learning Loop
> + */
> +static inline void nq_learn(neuquant_instance_t *nqi)
> +{
> +    register int i,j;
> +    int radius,rad,alpha,step,delta,samplepixels;
> +    register unsigned char *p;
> +    unsigned char *lim;
> +    int c;
> +    int *colors = (int *) av_malloc(nqi->num_colors*sizeof(int));
> +
>   

unneeded cast

> +    nqi->alphadec = 30 + ((nqi->samplefac-1)/3);
> +    p = nqi->thepicture;
> +    lim = nqi->thepicture + nqi->lengthcount;
> +    samplepixels = nqi->lengthcount/(nqi->num_colors*nqi->samplefac);
> +    delta = samplepixels/nq_ncycles;
> +    alpha = nq_initalpha;
> +    radius = nq_initradius;
> +
> +    rad = radius >> nq_radiusbiasshift;
> +    if (rad <= 1)
> +        rad = 0;
> +
> +    for (i=0; i<rad; i++)
> +        nqi->radpower[i] = alpha*(((rad*rad - i*i)*nq_radbias)/(rad*rad));
> +
> +    //fprintf(stderr,"beginning 1D learning: initial radius=%d\n", rad);
> +
> +    if ((nqi->lengthcount%nq_prime1) != 0)
> +        step = nqi->num_colors*nq_prime1;
> +    else {
> +        if ((nqi->lengthcount%nq_prime2) !=0)
> +            step = nqi->num_colors*nq_prime2;
> +        else {
> +            if ((nqi->lengthcount%nq_prime3) !=0)
> +                step = nqi->num_colors*nq_prime3;
> +            else
> +                step = nqi->num_colors*nq_prime4;
> +        }
> +    }
> +
> +    i = 0;
> +    while (i < samplepixels) {
> +        for (c=0; c<nqi->num_colors; c++)
> +            colors[c] = p[c] << nq_netbiasshift;
> +        for (c=0; c<nqi->num_colors; c+=6) {
> +            colors[c+4] *= nqi->num_bias;
> +            colors[c+5] *= nqi->num_bias;
> +        }
> +        j = nq_contest(nqi, colors);
> +
> +        nq_alter_single(nqi, alpha,j,colors);
> +
> +        if (rad)
> +            nq_alter_neigh(nqi, rad,j,colors);   /* alter neighbours */
> +
> +        p += step;
> +        if (p >= lim)
> +            p -= nqi->lengthcount;
> +
> +        i++;
> +        if (i%delta == 0) {
> +            alpha -= alpha / nqi->alphadec;
> +            radius -= radius / nq_radiusdec;
> +            rad = radius >> nq_radiusbiasshift;
> +            if (rad <= 1)
> +                rad = 0;
> +            for (j=0; j<rad; j++)
> +                nqi->radpower[j] = alpha*(((rad*rad - j*j)*nq_radbias)/(rad*rad));
> +        }
> +    }
> +    //fprintf(stderr,"finished 1D learning: final alpha=%f !\n",((float)alpha)/nq_initalpha);
>   

to be removed ? or use an av_log, instead

> +/**
> + * Generate 2x2 codebooks using the NeuQuant-based vector quantizer
> + */
> +int generate_codebooks2(roq_encoder_t *handle, roq_yuvcluster2_t *input, uint32_t inputCount, uint32_t goalCells, uint32_t *resultCount, roq_yuvcluster2_t **resultElements)
> +{
> +    unsigned char *prep;
> +    roq_yuvcluster2_t *cluster;
> +    roq_yuvcluster2_t *result;
> +    unsigned char resultCodebook[256 * 6];
> +    unsigned char *c;
> +    int i;
> +    int numResults;
> +
> +    neuquant_instance_t nqi;
> +
> +    prep = av_malloc(6 * inputCount);
> +    if (!prep)
> +        return 0;
> +
> +    c = prep;
> +    cluster = input;
> +    for (i=0; i<inputCount; i++) {
> +        c[0] = cluster->y[0];
> +        c[1] = cluster->y[1];
> +        c[2] = cluster->y[2];
> +        c[3] = cluster->y[3];
> +        c[4] = cluster->u;
> +        c[5] = cluster->v;
> +        c += 6;
> +        cluster++;
> +    }
> +
> +    numResults = 256;
> +
>   

could be moved to the else statement below

> +    i = basic_quant(prep, inputCount, 6, resultCodebook);
> +    if (i)
> +        numResults = i;
> +    else {
> +        nq_init_net(&nqi, prep, inputCount*6, 1, 6, 1);
> +        nq_learn(&nqi);
> +        nq_unbias_net(&nqi);
> +        nq_dump_colormap(&nqi, resultCodebook);
> +        for (i=0; i < nq_netsize; i++)
> +            av_free(nqi.network[i]);
> +        av_free(nqi.network);
> +    }
> +
> +    av_free(prep);
> +
> +    result = av_malloc(sizeof(roq_yuvcluster2_t) * 256);
> +    if (!result)
> +        return 0;
> +
> +    c = resultCodebook;
> +    cluster = result;
> +    for (i=0; i<numResults; i++) {
> +        cluster->y[0] = c[0];
> +        cluster->y[1] = c[1];
> +        cluster->y[2] = c[2];
> +        cluster->y[3] = c[3];
> +        cluster->u = c[4];
> +        cluster->v = c[5];
> +        c += 6;
>   
cluster->y[0] = *c++;
cluster->y[1] = *c++;
cluster->y[2] = *c++;
cluster->y[3] = *c++;
cluster->u = *c++;
cluster->v = *c++;

> +/**
> + * Generate 4x4 codebooks using the NeuQuant-based vector quantizer
> + */
> +int generate_codebooks4(roq_encoder_t *handle, roq_yuvcluster4_t *input, uint32_t inputCount, uint32_t goalCells, uint32_t *resultCount, roq_yuvcluster4_t **resultElements)
> +{
> +    unsigned char *prep;
> +    roq_yuvcluster4_t *cluster;
> +    roq_yuvcluster4_t *result;
> +    unsigned char resultCodebook[256 * 24];
> +    unsigned char *c;
> +    int i,j;
> +    int numResults;
> +
> +    neuquant_instance_t nqi;
> +
> +    prep = av_malloc(24 * inputCount);
> +    if (!prep)
> +        return 0;
> +
> +    c = prep;
> +    cluster = input;
> +    for (i=0; i<inputCount; i++) {
> +        for (j=0; j<4; j++) {
> +            c[0] = cluster->block[j].y[0];
> +            c[1] = cluster->block[j].y[1];
> +            c[2] = cluster->block[j].y[2];
> +            c[3] = cluster->block[j].y[3];
> +            c[4] = cluster->block[j].u;
> +            c[5] = cluster->block[j].v;
>   

dito, but *c++ = ...

> +            c += 6;
> +        }
> +        cluster++;
> +    }
> +
> +    numResults = 256;
> +
>   

in the else below ?
btw, it seems there is some code that could be shared between the two
functions, no ?

> +    i = basic_quant(prep, inputCount, 24, resultCodebook);
> +    if (i)
> +        numResults = i;
> +    else {
> +        nq_init_net(&nqi, prep, inputCount*24, 1, 24, 1);
> +        nq_learn(&nqi);
> +        nq_unbias_net(&nqi);
> +        nq_dump_colormap(&nqi, resultCodebook);
> +        for (i=0; i < nq_netsize; i++)
> +            av_free(nqi.network[i]);
> +        av_free(nqi.network);
> +    }
>   

[snip]

> +static int roq_encode_end(AVCodecContext *avctx)
> +{
> +    roq_encoder_t *enc = avctx->priv_data;
> +
> +    av_freep(&enc->pixels);
> +
> +    if (enc->frameHistory1)
> +        av_free(enc->frameHistory1);
> +    if (enc->frameHistory2)
> +        av_free(enc->frameHistory2);
> +
>   

if's unneeded

> Index: libavcodec/roqvideoenc.h
> ===================================================================
> --- libavcodec/roqvideoenc.h	(revision 0)
> +++ libavcodec/roqvideoenc.h	(revision 0)
> +/* NeuQuant Neural-Net Quantization Algorithm Interface definitions */
> +
> +#define nq_netsize        256            /**< number of colours used in the NeuQuant neural-net */
> +
>   

usually, constant defines are uppercase, no ?

well, you now have my 2 cents ;)

Ben
-- 
Purple Labs S.A.
www.purplelabs.com