[FFmpeg-devel] [PATCH] RoQ video encoder
Benoit Fouet
benoit.fouet
Thu May 10 11:33:38 CEST 2007
Hi,
my 2 cents ;)
Vitor wrote:
> Hi,
>
> Now that the muxer and the audio encoder are in svn, I'm sending the
> video encoder. Suggestions (and criticism) expected and welcome!
>
> -Vitor
> ------------------------------------------------------------------------
>
> Index: libavcodec/roqvideoenc.c
> ===================================================================
> --- libavcodec/roqvideoenc.c (revision 0)
> +++ libavcodec/roqvideoenc.c (revision 0)
> +/*
> + * The functions prefixed by nq_ are a modified version of NeuQuant designed to
> + * operate on an arbitrary number of color elements, turning it into a codebook
> + * generator. Also made it thread-safe. The following copyright
> + * notice applies to these functions.
> + *
> + * Modifications (c)2004 Eric Lasota/Orbiter Productions
> + * (c)2007 Vitor
> +*/
>
nit: lacks a space
> +/**
> + * Returns MSE between two YUV blocks
> + */
> +static inline int squared_diff_yuv(const roq_pixel_t *a, int awidth, const roq_pixel_t *b, int bwidth, int checkwidth, int checkheight)
> +{
> + int diff=0;
> + int subDiffs[3];
> + int x;
> +
> + while(checkheight) {
>
while(checkheight--) ?
> + x = checkwidth;
> + while(x) {
>
while(x--) ?
> +#define ENLARGE_ELEMENT(x,y) \
> + src = &image4[(y*4)+x];\
> + memcpy(&image8[(y*16)+(x*2)], src, sizeof(roq_pixel_t));\
> + memcpy(&image8[(y*16)+(x*2)+1], src, sizeof(roq_pixel_t));\
> + memcpy(&image8[(y*16)+(x*2)+8], src, sizeof(roq_pixel_t));\
> + memcpy(&image8[(y*16)+(x*2)+9], src, sizeof(roq_pixel_t))
> +
>
i would add some parenthesis around x and y
> +static void free_temp_data(roq_tempdata_t *tempData)
> +{
> + if (tempData->cel_evals)
> + av_free(tempData->cel_evals);
> + if (tempData->plists)
> + av_free(tempData->plists);
> + if (tempData->yuvClusters)
> + av_free(tempData->yuvClusters);
> + if (tempData->sortOptions)
> + av_free(tempData->sortOptions);
> + if (tempData->sortOptionsSorted)
> + av_free(tempData->sortOptionsSorted);
> + if (tempData->reconstruct)
> + av_free(tempData->reconstruct);
> + if (tempData->outbuffer)
> + av_free(tempData->outbuffer);
> +}
>
if's aren't needed
> +
> +/**
> + * Initializes cel evaluators and sets their source coordinates
> + */
> +static int create_cel_evals(roq_encoder_t *enc, roq_tempdata_t *tempData)
> +{
> + int width, height;
> + int n,x,y;
> +
> + width = enc->width;
> + height= enc->height;
> +
>
why not use directly enc->width/height ?
> +static int initialize_single_possibility_list(roq_possibility_list_t *plist, int fsk)
> +{
> + int i,j,k,l;
> + int n;
> + int firstAllowed;
> +
> + plist->p[0].evalType = ROQ_EVALTYPE_MOTION;
> + plist->p[0].allowed = (fsk >= 1);
> +
> + plist->p[1].evalType = ROQ_EVALTYPE_SKIP;
> + plist->p[1].allowed = (fsk >= 2);
> +
> + plist->p[2].evalType = ROQ_EVALTYPE_CODEBOOK;
> + plist->p[2].allowed = 1;
> +
> + n = 3;
> +
>
why not initialize it when defining it ?
> + if (fsk >= 2)
> + firstAllowed = 0;
> + else if (fsk >= 1)
> + firstAllowed = 1;
> + else
> + firstAllowed = 2;
> +
> + for (i=firstAllowed; i<4; i++)
> + for (j=firstAllowed; j<4; j++)
> + for (k=firstAllowed; k<4; k++)
> + for (l=firstAllowed; l<4; l++) {
> + plist->p[n].evalType = ROQ_EVALTYPE_SUBDIVIDE;
> + plist->p[n].allowed = 1;
> + plist->p[n].subEvalTypes[0] = i;
> + plist->p[n].subEvalTypes[1] = j;
> + plist->p[n].subEvalTypes[2] = k;
> + plist->p[n].subEvalTypes[3] = l;
> + n++;
> + }
> +
> + while(n < ROQ_MAX_POSSIBILITIES) {
> + plist->p[n].allowed = 0;
> + n++;
> + }
>
plist->p[n++].allowed
> +/**
> + * Initializes all possibility lists
> + */
> +static int create_possibility_lists(roq_encoder_t *enc, roq_tempdata_t *tempData)
> +{
> + roq_possibility_list_t *plists;
> + int max, fsk;
> +
> + max = enc->width*enc->height/64;
> +
>
/64 == >>6
> + tempData->plists = plists = av_malloc(sizeof(roq_possibility_list_t) * max);
> + if (!plists)
> + return 0;
> +
>
maybe this could be cool to return a no mem error and handle it, no ?
> + fsk = enc->framesSinceKeyframe;
> + while (max) {
> + initialize_single_possibility_list(plists, fsk);
>
while(max--)
initialize_single_possibility_list(plists++, fsk);
> +/**
> + * Creates YUV clusters for the entire image
> + */
> +static void create_clusters(const roq_pixel_t *image, int w, int h, roq_yuvcluster4_t *yuvClusters)
> +{
> + /* Although it's a 4x4 block, it's treated like an 8x2 block during this */
>
indentation
> + roq_pixel_t blocks2[16];
> + int x, y, i, i2;
> +
> + int c_average;
> +
> + for (y=0; y<h; y+=4) {
> + for (x=0; x<w; x+=4) {
> + /* Copy data */
> + blit(image + (y*w)+x, w, &blocks2[0], 8, 2, 2);
> + blit(image + (y*w)+x+2, w, &blocks2[2], 8, 2, 2);
> + blit(image + ((y+2)*w)+x, w, &blocks2[4], 8, 2, 2);
> + blit(image + ((y+2)*w)+x+2, w, &blocks2[6], 8, 2, 2);
> +
> + /* Convert to mini-images */
> + i2 = 0;
> + for (i=0; i<4; i++) {
>
is there really a need for i2 ?
> +/**
> + * Converts a 2x2 codebook list into cached mini-images
> + */
> +void convert_cb2_to_mini_images(roq_cb2_entry_t *cb2, int numCB2)
> +{
> + int i;
> +
> + while(numCB2) {
> +
>
while(numCB2--)
> +/**
> + * Converts a 4x4 codebook list into cached mini-images
> + */
> +static void convert_cb4_to_mini_images(roq_cb4_entry_t *cb4, roq_cb2_entry_t *cb2, int numCB4)
> +{
> + while(numCB4) {
>
while(numCB4--)
> +/**
> + * Template code to find the codebook with the lowest median squared error from an image
> + */
> +#define GET_LOWEST_CB_MSE(FUNCT, CBTYPE, COMMAND1, COMMAND2) \
>
can't you find a more accurate name for COMMAND1/2 ?
> +static int generate_new_codebooks(roq_encoder_t *enc, roq_tempdata_t *tempData, const roq_pixel_t *image)
> +{
> + int w, h;
> + int numCB2, numCB4, max;
> + roq_yuvcluster4_t *results4;
> + roq_yuvcluster2_t *results2;
> + roq_codebooks_t *codebooks;
> + int i,n;
> +
> + w = enc->width;
> + h = enc->height;
>
why not use directly enc->width/height, tempData->codebooks ?
(there are more like that afterwards)
> + max = w*h/16;
> +
> + tempData->yuvClusters = av_malloc(sizeof(roq_yuvcluster4_t)*max);
> + if (!tempData->yuvClusters)
> + return 0;
> +
>
error code management ?
> +/**
> + * Performs motion searching on an image at an offset, sets outDX and outDY to motion offset
> + */
> +static int motion_search(roq_encoder_t *enc, const roq_pixel_t *image, int x, int y, int8_t *outDX, int8_t *outDY, uint32_t d)
> +{
> + roq_motionsearch_vector_t offsets[9] = {
> + {0,0},
> + {0,-1},
> + {-1,-1},
> + {-1,0},
> + {-1,1},
> + {0,1},
> + {1,1},
> + {1,0},
> + {1,-1},
> + };
> +
> + int diffs[9];
> + int diffPick, lowestDiff;
> +
> + int w,h,i;
> +
> + uint32_t rx,ry;
> +
> + int finalDX, finalDY;
> +
> + int step;
> +
> + w = enc->width;
> + h = enc->height;
> +
> + finalDX = 0;
> + finalDY = 0;
> +
> + /* Simple three-step search */
> +
> + /* The first cel is centered and recycled, so it has to be calculated for the first one now */
> + diffs[0] = squared_diff_yuv(image + (y*w)+x, w, enc->frameHistory1 + (y*w)+x, w, d, d);
> +
> + step = 8;
> + while(step != 1) {
> + step >>= 1;
> +
>
you can also define step = 4 and shift it at the end of the while loop
> +/**
> + * Loads possibility lists with actual data for one block,
> + * assigning all possibilities a cached MSE and bit consumption
> + */
> +static int gather_possibility_data_for_block(roq_possibility_list_t *plist, roq_cel_evaluation_t *celEval)
> +{
> + int i,j;
> + roq_possibility_t *p;
> +
> + for (i=0; i<ROQ_MAX_POSSIBILITIES; i++) {
> + p = plist->p+i;
> + if (!p->allowed)
> + continue;
> +
> + if (p->evalType == ROQ_EVALTYPE_SKIP) {
> + p->codeConsumption = 1;
> + p->byteConsumption = 0;
> + p->mse = celEval->eval_mse[ROQ_EVALTYPE_SKIP];
> + } else if (p->evalType == ROQ_EVALTYPE_MOTION || p->evalType == ROQ_EVALTYPE_CODEBOOK) {
> + p->codeConsumption = 1;
> + /* 3.11 - Was = 0, oops */
> + p->byteConsumption = 1;
> + p->mse = celEval->eval_mse[p->evalType];
> + } else { //if (p->evalType == ROQ_EVALTYPE_SUBDIVIDE)
> + p->codeConsumption = 5; // 1 for main code, 4 for the subcodes
> + p->byteConsumption = 0;
> + p->mse = 0;
> +
> + for (j=0; j<4; j++) {
> + p->mse += celEval->subCels[j].eval_mse[p->subEvalTypes[j]];
> + //if (p->subEvalTypes[j] == ROQ_EVALTYPE_SKIP)
> + //{
> + //}
> + //else
>
to be removed ?
> + if (p->subEvalTypes[j] == ROQ_EVALTYPE_MOTION || p->subEvalTypes[j] == ROQ_EVALTYPE_CODEBOOK)
> + p->byteConsumption++;
> + else if (p->subEvalTypes[j] == ROQ_EVALTYPE_SUBDIVIDE)
> + p->byteConsumption += 4;
> + }
> + }
> +
> + p->combinedBitConsumption = (p->codeConsumption * 1) + (p->byteConsumption * 4);
>
is the * 1 really necessary ?
> +/**
> + * Template code for both add and subtract size calc modifiers
> + */
> +#define SIZE_CALC_BASE_CODE \
> + uint32_t cb4Changes[4];\
> + uint32_t cb2Changes[16];\
> + int numCB4Changes=0;\
> + int numCB2Changes=0;\
> + int argumentsChange=0;\
> + int codeChange=0;\
> + int i;\
> +\
> + codeChange = 1;\
>
why initializing it to 0 if you set it to 1 just after ?
> +#define SPOOL_MOTION(dx, dy) \
> +do {\
> + uint8_t arg, ax, ay;\
> + ax = 8 - (uint8_t)dx;\
> + ay = 8 - (uint8_t)dy;\
> + arg = (uint8_t)(((ax&15)<<4) | (ay&15));\
> + SPOOL_ARGUMENT(arg);\
> +} while(0)
> +
>
here too, i would add () around dx and dy
> +#define SPOOL_TYPECODE(type) \
> +do {\
> + typeSpool |= (type & 3) << (14 - typeSpoolLength);\
>
and here around type
> +int roq_encode_video(roq_encoder_t *enc, const roq_pixel_t *rgbData, writebuffer_t *wb)
> +{
> + int width, height;
> + roq_tempdata_t tempData;
> + roq_pixel_t *img;
> +
> + memset(&tempData, 0, sizeof(tempData));
> +
> + width = enc->width;
> + height = enc->height;
> +
> + if (!create_cel_evals(enc, &tempData)) {
> + free_temp_data(&tempData);
>
IIRC, the only error case is that memory couldn't have been allocated, no ?
> + return 0;
> + }
> +
> + if (!create_possibility_lists(enc, &tempData)) {
> + free_temp_data(&tempData);
>
here too
(maybe some more afterwards too)
maybe you can add a label and goto it in case of error, to avoid
duplicating all those:
free();
return 0;
> +/**
> + * Initialise network in range (0,0,0) to (255,255,255) and set parameters
> + */
> +static inline void nq_init_net(neuquant_instance_t *nqi, unsigned char *thepic, int len, int sample, int num_colors, int num_bias)
> +{
> + register int i,j;
> + register int *p;
> +
>
does it change anything to declare them as register ?
> + nqi->network = (int **) av_malloc (nq_netsize * sizeof(int *));
> + for (i=0; i < nq_netsize; i++)
> + nqi->network[i] = (int *) av_malloc ((num_colors+1)*sizeof(int));
> +
>
unneeded cast
> +/**
> + * Output colour map
> + */
> +static inline void nq_dump_colormap(neuquant_instance_t *nqi, unsigned char *c)
> +{
> + int i,j;
> +
> + for (i=0; i<nq_netsize; i++) {
> + for (j=0; j<nqi->num_colors; j++) {
> + *c = nqi->network[i][j];
> + c++;
>
*c++ = ...
> +/**
> + * Move neuron i towards biased (b,g,r) by factor alpha
> + */
> +static inline void nq_alter_single(neuquant_instance_t *nqi, int alpha, int i, int *colors)
> +{
> + register int *n;
> + int j;
> +
> + n = nqi->network[i]; /* alter hit neuron */
> +
> + for (j=0; j<nqi->num_colors; j++) {
> + *n -= (alpha*(*n - colors[j])) / nq_initalpha;
> + n++;
>
*n++ -= ...
(there are others)
> +/**
> + * Move adjacent neurons by precomputed alpha*(1-((i-j)^2/[r]^2)) in radpower[|i-j|]
> + */
> +static inline void nq_alter_neigh(neuquant_instance_t *nqi, int rad, int i, int *colors)
> +{
> + register int j,k,lo,hi,a;
> + register int *p, *q;
> + int c;
> +
> + lo = i-rad; if (lo<-1) lo=-1;
> + hi = i+rad; if (hi>nq_netsize) hi=nq_netsize;
> +
> + j = i+1;
> + k = i-1;
> + q = nqi->radpower;
> + while ((j<hi) || (k>lo)) {
> + a = (*(++q));
>
superfluous (())
> +/**
> + * Main Learning Loop
> + */
> +static inline void nq_learn(neuquant_instance_t *nqi)
> +{
> + register int i,j;
> + int radius,rad,alpha,step,delta,samplepixels;
> + register unsigned char *p;
> + unsigned char *lim;
> + int c;
> + int *colors = (int *) av_malloc(nqi->num_colors*sizeof(int));
> +
>
unneeded cast
> + nqi->alphadec = 30 + ((nqi->samplefac-1)/3);
> + p = nqi->thepicture;
> + lim = nqi->thepicture + nqi->lengthcount;
> + samplepixels = nqi->lengthcount/(nqi->num_colors*nqi->samplefac);
> + delta = samplepixels/nq_ncycles;
> + alpha = nq_initalpha;
> + radius = nq_initradius;
> +
> + rad = radius >> nq_radiusbiasshift;
> + if (rad <= 1)
> + rad = 0;
> +
> + for (i=0; i<rad; i++)
> + nqi->radpower[i] = alpha*(((rad*rad - i*i)*nq_radbias)/(rad*rad));
> +
> + //fprintf(stderr,"beginning 1D learning: initial radius=%d\n", rad);
> +
> + if ((nqi->lengthcount%nq_prime1) != 0)
> + step = nqi->num_colors*nq_prime1;
> + else {
> + if ((nqi->lengthcount%nq_prime2) !=0)
> + step = nqi->num_colors*nq_prime2;
> + else {
> + if ((nqi->lengthcount%nq_prime3) !=0)
> + step = nqi->num_colors*nq_prime3;
> + else
> + step = nqi->num_colors*nq_prime4;
> + }
> + }
> +
> + i = 0;
> + while (i < samplepixels) {
> + for (c=0; c<nqi->num_colors; c++)
> + colors[c] = p[c] << nq_netbiasshift;
> + for (c=0; c<nqi->num_colors; c+=6) {
> + colors[c+4] *= nqi->num_bias;
> + colors[c+5] *= nqi->num_bias;
> + }
> + j = nq_contest(nqi, colors);
> +
> + nq_alter_single(nqi, alpha,j,colors);
> +
> + if (rad)
> + nq_alter_neigh(nqi, rad,j,colors); /* alter neighbours */
> +
> + p += step;
> + if (p >= lim)
> + p -= nqi->lengthcount;
> +
> + i++;
> + if (i%delta == 0) {
> + alpha -= alpha / nqi->alphadec;
> + radius -= radius / nq_radiusdec;
> + rad = radius >> nq_radiusbiasshift;
> + if (rad <= 1)
> + rad = 0;
> + for (j=0; j<rad; j++)
> + nqi->radpower[j] = alpha*(((rad*rad - j*j)*nq_radbias)/(rad*rad));
> + }
> + }
> + //fprintf(stderr,"finished 1D learning: final alpha=%f !\n",((float)alpha)/nq_initalpha);
>
to be removed ? or use an av_log, instead
> +/**
> + * Generate 2x2 codebooks using the NeuQuant-based vector quantizer
> + */
> +int generate_codebooks2(roq_encoder_t *handle, roq_yuvcluster2_t *input, uint32_t inputCount, uint32_t goalCells, uint32_t *resultCount, roq_yuvcluster2_t **resultElements)
> +{
> + unsigned char *prep;
> + roq_yuvcluster2_t *cluster;
> + roq_yuvcluster2_t *result;
> + unsigned char resultCodebook[256 * 6];
> + unsigned char *c;
> + int i;
> + int numResults;
> +
> + neuquant_instance_t nqi;
> +
> + prep = av_malloc(6 * inputCount);
> + if (!prep)
> + return 0;
> +
> + c = prep;
> + cluster = input;
> + for (i=0; i<inputCount; i++) {
> + c[0] = cluster->y[0];
> + c[1] = cluster->y[1];
> + c[2] = cluster->y[2];
> + c[3] = cluster->y[3];
> + c[4] = cluster->u;
> + c[5] = cluster->v;
> + c += 6;
> + cluster++;
> + }
> +
> + numResults = 256;
> +
>
could be moved to the else statement below
> + i = basic_quant(prep, inputCount, 6, resultCodebook);
> + if (i)
> + numResults = i;
> + else {
> + nq_init_net(&nqi, prep, inputCount*6, 1, 6, 1);
> + nq_learn(&nqi);
> + nq_unbias_net(&nqi);
> + nq_dump_colormap(&nqi, resultCodebook);
> + for (i=0; i < nq_netsize; i++)
> + av_free(nqi.network[i]);
> + av_free(nqi.network);
> + }
> +
> + av_free(prep);
> +
> + result = av_malloc(sizeof(roq_yuvcluster2_t) * 256);
> + if (!result)
> + return 0;
> +
> + c = resultCodebook;
> + cluster = result;
> + for (i=0; i<numResults; i++) {
> + cluster->y[0] = c[0];
> + cluster->y[1] = c[1];
> + cluster->y[2] = c[2];
> + cluster->y[3] = c[3];
> + cluster->u = c[4];
> + cluster->v = c[5];
> + c += 6;
>
cluster->y[0] = *c++;
cluster->y[1] = *c++;
cluster->y[2] = *c++;
cluster->y[3] = *c++;
cluster->u = *c++;
cluster->v = *c++;
> +/**
> + * Generate 4x4 codebooks using the NeuQuant-based vector quantizer
> + */
> +int generate_codebooks4(roq_encoder_t *handle, roq_yuvcluster4_t *input, uint32_t inputCount, uint32_t goalCells, uint32_t *resultCount, roq_yuvcluster4_t **resultElements)
> +{
> + unsigned char *prep;
> + roq_yuvcluster4_t *cluster;
> + roq_yuvcluster4_t *result;
> + unsigned char resultCodebook[256 * 24];
> + unsigned char *c;
> + int i,j;
> + int numResults;
> +
> + neuquant_instance_t nqi;
> +
> + prep = av_malloc(24 * inputCount);
> + if (!prep)
> + return 0;
> +
> + c = prep;
> + cluster = input;
> + for (i=0; i<inputCount; i++) {
> + for (j=0; j<4; j++) {
> + c[0] = cluster->block[j].y[0];
> + c[1] = cluster->block[j].y[1];
> + c[2] = cluster->block[j].y[2];
> + c[3] = cluster->block[j].y[3];
> + c[4] = cluster->block[j].u;
> + c[5] = cluster->block[j].v;
>
dito, but *c++ = ...
> + c += 6;
> + }
> + cluster++;
> + }
> +
> + numResults = 256;
> +
>
in the else below ?
btw, it seems there is some code that could be shared between the two
functions, no ?
> + i = basic_quant(prep, inputCount, 24, resultCodebook);
> + if (i)
> + numResults = i;
> + else {
> + nq_init_net(&nqi, prep, inputCount*24, 1, 24, 1);
> + nq_learn(&nqi);
> + nq_unbias_net(&nqi);
> + nq_dump_colormap(&nqi, resultCodebook);
> + for (i=0; i < nq_netsize; i++)
> + av_free(nqi.network[i]);
> + av_free(nqi.network);
> + }
>
[snip]
> +static int roq_encode_end(AVCodecContext *avctx)
> +{
> + roq_encoder_t *enc = avctx->priv_data;
> +
> + av_freep(&enc->pixels);
> +
> + if (enc->frameHistory1)
> + av_free(enc->frameHistory1);
> + if (enc->frameHistory2)
> + av_free(enc->frameHistory2);
> +
>
if's unneeded
> Index: libavcodec/roqvideoenc.h
> ===================================================================
> --- libavcodec/roqvideoenc.h (revision 0)
> +++ libavcodec/roqvideoenc.h (revision 0)
> +/* NeuQuant Neural-Net Quantization Algorithm Interface definitions */
> +
> +#define nq_netsize 256 /**< number of colours used in the NeuQuant neural-net */
> +
>
usually, constant defines are uppercase, no ?
well, you now have my 2 cents ;)
Ben
--
Purple Labs S.A.
www.purplelabs.com
More information about the ffmpeg-devel
mailing list