[FFmpeg-soc] Review of dirac code

Michael Niedermayer michaelni at gmx.at
Thu Aug 16 04:03:25 CEST 2007


Hi

the matroska muxer already found its way to ffmpeg-dev and was reviewed
once
marco asked for a full review somewhere, so heres my first dirac review

dirac is a mess, not marcos implementation, that is quite nice considering
the complexity of dirac and marcos lack of experience with video coding
but the design of dirac itself is well ummm ...
i would have expected higher quality from the BBC
for example you dont use 10tap filters to do halfpel motion compensation
mpeg4 used 8tap filters with coefficients -1,3,-6,20,20,-6,3,-1 the
coefficients are all small which allows them to be calculated by simple
add and shift which is nice both for hardware and software still mpeg4
qpel is slow in practice
h.264 as well as snow use 6tap filters with coefficients 1,-5,20,20,-5,1 which are
faster then what mpeg4 uses while having nearly the same quality
also a-5b+20c can be rewritten as a+5(4c-b) which is really quite fast,
simple and can nicely be implemented by add and shift
now dirac uses the following
3 -11 25 -56 167 167 -56 25 -11 3

does that have any quality gain? i doubt it, but its pretty slow that is
certain

now the actual review ...
dirac_arith.c:
[...]
> static unsigned int arith_lookup[256] = {
>     0,    2,    5,    8,    11,   15,   20,   24,
>     29,   35,   41,   47,   53,   60,   67,   74,
>     82,   89,   97,   106,  114,  123,  132,  141,
>     150,  160,  170,  180,  190,  201,  211,  222,
>     233,  244,  256,  267,  279,  291,  303,  315,
>     327,  340,  353,  366,  379,  392,  405,  419,
>     433,  447,  461,  475,  489,  504,  518,  533,
>     548,  563,  578,  593,  609,  624,  640,  656,
>     672,  688,  705,  721,  738,  754,  771,  788,
>     805,  822,  840,  857,  875,  892,  910,  928,
>     946,  964,  983,  1001, 1020, 1038, 1057, 1076,
>     1095, 1114, 1133, 1153, 1172, 1192, 1211, 1231,
>     1251, 1271, 1291, 1311, 1332, 1352, 1373, 1393,
>     1414, 1435, 1456, 1477, 1498, 1520, 1541, 1562,
>     1584, 1606, 1628, 1649, 1671, 1694, 1716, 1738,
>     1760, 1783, 1806, 1828, 1851, 1874, 1897, 1920,
>     1935, 1942, 1949, 1955, 1961, 1968, 1974, 1980,
>     1985, 1991, 1996, 2001, 2006, 2011, 2016, 2021,
>     2025, 2029, 2033, 2037, 2040, 2044, 2047, 2050,
>     2053, 2056, 2058, 2061, 2063, 2065, 2066, 2068,
>     2069, 2070, 2071, 2072, 2072, 2072, 2072, 2072,
>     2072, 2071, 2070, 2069, 2068, 2066, 2065, 2063,
>     2060, 2058, 2055, 2052, 2049, 2045, 2042, 2038,
>     2033, 2029, 2024, 2019, 2013, 2008, 2002, 1996,
>     1989, 1982, 1975, 1968, 1960, 1952, 1943, 1934,
>     1925, 1916, 1906, 1896, 1885, 1874, 1863, 1851,
>     1839, 1827, 1814, 1800, 1786, 1772, 1757, 1742,
>     1727, 1710, 1694, 1676, 1659, 1640, 1622, 1602,
>     1582, 1561, 1540, 1518, 1495, 1471, 1447, 1422,
>     1396, 1369, 1341, 1312, 1282, 1251, 1219, 1186,
>     1151, 1114, 1077, 1037, 995,  952,  906,  857,
>     805, 750,   690,  625,  553,  471,  376,  255
> };

this fits in uint16_t / unsigned short which would safe 512 bytes space


[...]

>     arith->code      = get_bits_long(gb, 16);

get_bits() is enough for 16 bit


[...]
> /**
>  * Read a single bit using the arithmetic decoder
>  *
>  * @param arith state of arithmetic decoder
>  * @param context the context of the bit to read
>  * @return the bit read
>  */
> int dirac_arith_get_bit (dirac_arith_state_t arith, int context) {
>     GetBitContext *gb = arith->gb;
>     unsigned int prob_zero = arith->contexts[context];
>     unsigned int count;
>     unsigned int range_times_prob;
>     unsigned int ret;
> 
>     assert(!arith->pb);
> 
>     count             = arith->code - arith->low;
>     range_times_prob  = (arith->range * prob_zero) >> 16;
>     if (count >= range_times_prob) {
>         ret = 1;
>         arith->low   += range_times_prob;
>         arith->range -= range_times_prob;
>     } else {
>         ret = 0;
>         arith->range  = range_times_prob;
>     }
> 
>     /* Update contexts. */
>     if (ret)
>         arith->contexts[context] -= arith_lookup[arith->contexts[context] >> 8];
>     else
>         arith->contexts[context] += arith_lookup[255 - (arith->contexts[context] >> 8)];

this if/else can be merged with the previous


> 
>     while (arith->range <= 0x4000) {
>         if (((arith->low + arith->range - 1)^arith->low) >= 0x8000) {
>             arith->code ^= 0x4000;
>             arith->low  ^= 0x4000;
>         }
>         arith->low   <<= 1;
>         arith->range <<= 1;
>         arith->low    &= 0xFFFF;
>         arith->code  <<= 1;
>         if (arith->bits_left > 0) {
>             arith->code |= get_bits (gb, 1);
>             arith->bits_left--;
>         }
>         else {

the } placement differs from the previous if/else


[...]
> /**
>  * Write a signed int using the arithmetic coder
>  *
>  * @param arith        state of arithmetic coder
>  * @param context_set  the collection of contexts used to write the signed int
>  * @param i            value to write
>  */
> void dirac_arith_write_int(dirac_arith_state_t arith,
>                            struct dirac_arith_context_set *context_set,
>                            int i) {
>     dirac_arith_write_uint(arith, context_set, FFABS(i));

>     if (i > 0)
>         dirac_arith_put_bit(arith, context_set->sign, 0);
>     else if (i < 0)
>         dirac_arith_put_bit(arith, context_set->sign, 1);

if(i)
    dirac_arith_put_bit(arith, context_set->sign, i<0);

[...]

dirac_arith.h:
[...]
> typedef struct dirac_arith_state {
>     /* Arithmetic decoding.  */
>     unsigned int low;
>     unsigned int range;
>     unsigned int code;
>     unsigned int bits_left;
>     int carry;
>     unsigned int contexts[ARITH_CONTEXT_COUNT];
> 
>     GetBitContext *gb;
>     PutBitContext *pb;

GetBitContext gb;
PutBitContext pb;
would avoid a pointer dereference if these 2 are used in the innermost
loops, though maybe their use can be avoided by reading/writing 8bit
at a time similar to how our cabac reader and range coder works


> } *dirac_arith_state_t;
> 
> struct dirac_arith_context_set {
>     unsigned int follow[6];     ///< the first follow contexts
>     unsigned int follow_length; ///< the amount of follow contexts in follow
>     unsigned int data;          ///< context to read data
>     unsigned int sign;          ///< context to read the sign
> };

the follow_length can be removed, replaced by 6 and the new entries in the
follow array filled with the last value

[...]

dirac.c:
[...]

> /* Defaults for sequence parameters.  */
> static const struct sequence_parameters sequence_parameters_defaults[13] =
> {
>     /* Width   Height   Chroma format   Depth  */
>     {  640,    480,     2,              8  },
>     {  176,    120,     2,              8  },
>     {  176,    144,     2,              8  },
>     {  352,    240,     2,              8  },
>     {  352,    288,     2,              8  },
>     {  704,    480,     2,              8  },
>     {  704,    576,     2,              8  },
> 
>     {  720,    480,     2,              8  },
>     {  720,    576,     2,              8  },
>     {  1280,   720,     2,              8  },
>     {  1920,   1080,    2,              8  },
>     {  2048,   1556,    0,              16 },
>     {  4096,   3112,    0,              16 },
> };
> 
> /* Defaults for source parameters.  */
> static const struct source_parameters source_parameters_defaults[13] =
> {
>     { 0, 1, 0, {30, 1},        {1, 1},   640,  480,  0, 0, 0,  255,   128,   254,   0, 0, 0.2126, 0.0722, TRANSFER_FUNC_TV },
>     { 0, 1, 0, {15000, 1001},  {10, 11}, 176,  120,  0, 0, 0,  255,   128,   254,   1, 0, 0.299,  0.144,  TRANSFER_FUNC_TV },
>     { 0, 1, 0, {25, 2},        {12, 11}, 176,  144,  0, 0, 0,  255,   128,   254,   2, 0, 0.299,  0.144,  TRANSFER_FUNC_TV },
>     { 0, 1, 0, {15000, 1001},  {10, 11}, 352,  240,  0, 0, 0,  255,   128,   254,   1, 0, 0.299,  0.144,  TRANSFER_FUNC_TV },
>     { 0, 1, 0, {25, 2},        {12, 11}, 352,  288,  0, 0, 0,  255,   128,   254,   2, 0, 0.299,  0.144,  TRANSFER_FUNC_TV },
>     { 0, 1, 0, {15000, 1001},  {10, 11}, 704,  480,  0, 0, 0,  255,   128,   254,   1, 0, 0.299,  0.144,  TRANSFER_FUNC_TV },
>     { 0, 1, 0, {25, 2},        {12, 11}, 704,  576,  0, 0, 0,  255,   128,   254,   2, 0, 0.299,  0.144,  TRANSFER_FUNC_TV },
> 
>     { 0, 1, 0, {24000, 1001},  {10, 11}, 720,  480,  0, 0, 16, 235,   128,   224,   1, 0, 0.299,  0.144,  TRANSFER_FUNC_TV },
>     { 0, 1, 0, {35, 1},        {12, 11}, 720,  576,  0, 0, 16, 235,   128,   224,   2, 0, 0.299,  0.144,  TRANSFER_FUNC_TV },
>     { 0, 1, 0, {24, 1},        {1, 1},   1280, 720,  0, 0, 16, 235,   128,   224,   0, 0, 0.2126, 0.0722, TRANSFER_FUNC_TV },
>     { 0, 1, 0, {24, 1},        {1, 1},   1920, 1080, 0, 0, 16, 235,   128,   224,   0, 0, 0.2126, 0.0722, TRANSFER_FUNC_TV },
>     { 0, 1, 0, {24, 1},        {1, 1},   2048, 1536, 0, 0, 0,  65535, 32768, 65534, 3, 0, 0.25,   0.25,   TRANSFER_FUNC_LINEAR },
>     { 0, 1, 0, {24, 1},        {1, 1},   4096, 3072, 0, 0, 0,  65535, 32768, 65534, 3, 0, 0.25,   0.25,   TRANSFER_FUNC_LINEAR },
> };
> 
> /* Defaults for decoding parameters.  */
> static const struct decoding_parameters decoding_parameters_defaults[13] =
> {
>     { 4, 0, 1, 8,  12, 8,  12, 2, 1, 1, 1, 1, 1, 4, 3, 1, 1, 8, 6, 12, 8, 32, 32, 512  },
>     { 4, 0, 1, 4,   8, 4,   8, 2, 1, 1, 1, 1, 1, 4, 3, 1, 1, 8, 6, 12, 8, 16, 16, 512  },
>     { 4, 0, 1, 4,   8, 4,   8, 2, 1, 1, 1, 1, 1, 4, 3, 1, 1, 8, 6, 12, 8, 16, 16, 512  },
>     { 4, 0, 1, 8,  12, 8,  12, 2, 1, 1, 1, 1, 1, 4, 3, 1, 1, 8, 6, 12, 8, 32, 32, 512  },
>     { 4, 0, 1, 8,  12, 8,  12, 2, 1, 1, 1, 1, 1, 4, 3, 1, 1, 8, 6, 12, 8, 32, 32, 512  },
>     { 4, 0, 1, 8,  12, 8,  12, 2, 1, 1, 1, 1, 1, 4, 3, 1, 1, 8, 6, 12, 8, 32, 32, 512  },
>     { 4, 0, 1, 8,  12, 8,  12, 2, 1, 1, 1, 1, 1, 4, 3, 1, 1, 8, 6, 12, 8, 32, 32, 512  },
> 
>     { 4, 0, 1, 8,  12, 8,  12, 2, 1, 1, 1, 1, 1, 4, 3, 1, 1, 8, 6, 12, 8, 32, 32, 512  },
>     { 4, 0, 1, 8,  12, 8,  12, 2, 1, 1, 1, 1, 1, 4, 3, 1, 1, 8, 6, 12, 8, 32, 32, 512  },
>     { 4, 0, 1, 12, 16, 12, 16, 2, 1, 1, 1, 1, 1, 4, 3, 1, 1, 8, 6, 12, 8, 48, 48, 768  },
>     { 4, 0, 1, 16, 24, 16, 24, 2, 1, 1, 1, 1, 1, 4, 3, 1, 1, 8, 6, 12, 8, 48, 48, 1024 },
>     { 4, 6, 1, 16, 24, 16, 24, 2, 1, 1, 1, 1, 1, 4, 3, 1, 1, 8, 6, 12, 8, 48, 48, 1024 },
>     { 4, 6, 0, 16, 24, 16, 24, 2, 1, 1, 1, 1, 1, 4, 3, 1, 1, 8, 6, 12, 8, 48, 48, 1024 }
> };

maybe char/short or (u)intXY_t could be used in the structs for these tables
so as to reduce the size of the tables (except for things accessed in the
innermost loops, there int might be faster on some architectures ...)


> 
> static const AVRational preset_frame_rates[8] =
> {
>     {24000, 1001}, {24, 1}, {25, 1}, {30000, 1001},
>     {30, 1}, {50, 1}, {60000, 1001}, {60, 1}
> };

this duplicates ff_frame_rate_tab


> 
> static const AVRational preset_aspect_ratios[3] =
> {
>     {1, 1}, {10, 11}, {12, 11}
> };
> 
> static const int preset_luma_offset[3] = { 0, 16, 64 };
> static const int preset_luma_excursion[3] = { 255, 235, 876 };
> static const int preset_chroma_offset[3] = { 128, 128, 512 };
> static const int preset_chroma_excursion[3] = { 255, 224, 896 };
> 
> static const int preset_primaries[4] = { 0, 1, 2, 3 };
> static const int preset_matrix[4] = {0, 1, 1, 2 };

int isnt needed for these tables, a tiny amount of space could be
safed by uisng some smaller data type


[...]

> typedef int vect_t[2];
> 
> struct dirac_blockmotion {
>     int use_ref[2];
>     int use_global;
>     vect_t vect[2];
>     int dc[3];
> };

40byte per block ...
accesses to an array of structs with 40byte elements is not particulary
fast (32 would be faster), also using smaller datatypes would reduce the
amount of data cache this "needs"

it also might be worth to try several array of simple data types instead
of one array of structs, though i do not know if that will be faster or
slower


[...]
> typedef struct DiracContext {
>     int access_unit;
>     unsigned int profile;
>     unsigned int level;
> 
>     AVCodecContext *avctx;
>     GetBitContext *gb;

GetBitContext gb;
would avoid a pointer dereference


[...]
>     uint32_t ref1;            ///< first reference picture
>     uint32_t ref2;            ///< second reference picture
> 
>     uint8_t *ref1data;
>     int ref1width;
>     int ref1height;
>     uint8_t *ref2data;
>     int ref2width;
>     int ref2height;

refdata[2]
refwidth[2]
...
might allow some simplifications in the code


[...]
>     if (get_bits(gb, 1)) {

you could replace get_bits(gb, 1) by get_bits1()


>         s->sequence.luma_width = svq3_get_ue_golomb(gb);
>         s->sequence.luma_height = svq3_get_ue_golomb(gb);

these could be aligned like
s->sequence.luma_width  = svq3_get_ue_golomb(gb);
s->sequence.luma_height = svq3_get_ue_golomb(gb);

and all width/height values should be checked with avcodec_check_dimensions()
or your own code to avoid things like

av_malloc(width*height) where width*height overflows the int range
and thus a smaller array is allocated then the mathematical width*height


>     }
> 
>     /* Override the chroma format.  */
>     if (get_bits(gb, 1))
>         s->sequence.chroma_format = svq3_get_ue_golomb(gb);
> 

>     /* Override the chroma dimensions.  */
>     switch (s->sequence.chroma_format) {
>     case 0:
>         /* 4:4:4 */
>         s->sequence.chroma_width = s->sequence.luma_width;
>         s->sequence.chroma_height = s->sequence.luma_height;
>         s->chroma_hratio = 1;
>         s->chroma_vratio = 1;
>         break;
> 
>     case 1:
>         /* 4:2:2 */
>         s->sequence.chroma_width = s->sequence.luma_width >> 1;
>         s->sequence.chroma_height = s->sequence.luma_height;
>         s->chroma_hratio = 1;
>         s->chroma_vratio = 2;
>         break;
> 
>     case 2:
>         /* 4:2:0 */
>         s->sequence.chroma_width = s->sequence.luma_width >> 1;
>         s->sequence.chroma_height = s->sequence.luma_height >> 1;
>         s->chroma_hratio = 2;
>         s->chroma_vratio = 2;
>         break;
>     }


s->chroma_hratio = 1 + (s->sequence.chroma_format>1);
s->chroma_vratio = 1 + (s->sequence.chroma_format>0);
s->sequence.chroma_width  = s->sequence.luma_width  / s->chroma_hratio;
s->sequence.chroma_height = s->sequence.luma_height / s->chroma_vratio;

or
static const chroma_ratio[3][2]={...
s->chroma_hratio = chroma_ratio[s->sequence.chroma_format][0];
...


[...]
>     /* Framerate.  */
>     if (get_bits(gb, 1)) {
>         int idx = svq3_get_ue_golomb(gb);
>         if (! idx) {
>             s->source.frame_rate.num = svq3_get_ue_golomb(gb);
>             s->source.frame_rate.den = svq3_get_ue_golomb(gb);
>         } else {
>             /* Use a pre-set framerate.  */
>             s->source.frame_rate = preset_frame_rates[idx - 1];
>         }

idx should be checked against the table size


[...]
>     video_format = svq3_get_ue_golomb(gb);
>     dprintf(s->avctx, "Video format: %d\n", video_format);
> 
>     /* Fill in defaults for the sequence parameters.  */
>     memcpy(&s->sequence, &sequence_parameters_defaults[video_format],
>            sizeof(s->sequence));

if(video_format > table size)
    return -1;
s->sequence= sequence_parameters_defaults[video_format];


[...]
> static int inline coeff_quant_factor(int idx) {
>     uint64_t base;
>     idx = FFMAX(idx, 0);

is <0 allowed? if no this should be checked for somewhere and lead to failure
to decode the frame


[...]
> 
> /**
>  * Dequantize a coefficient
>  *
>  * @param coeff coefficient to dequantize
>  * @param qoffset quantizer offset
>  * @param qfactor quantizer factor
>  * @return dequantized coefficient
>  */
> static int inline coeff_dequant(int coeff,
>                                 int qoffset, int qfactor) {
>     int64_t magnitude = abs(coeff) * qfactor;

FFABS
int*int is int so you either need a cast to int64_t here or you dont need
int64_t at all


> 
>     if (! magnitude)
>         return 0;

assuming qfactor is not 0 this check could be done before the multiply


> 
>     magnitude += qoffset;
>     magnitude >>= 2;
> 
>     /* Reintroduce the sign.  */
>     if (coeff < 0)
>         magnitude = -magnitude;

also as the coeffs seem to be stored as absolute value, sign
it would be alot more efficient if you would not combine them in
dirac_arith_read_int() and then split them again here just to
finally combine them again ...
something like
val= read uint
if(val){
    val=dequant(val)
    if(read_bit)
        val=-val;
}

should be faster


>     return magnitude;
> }
> 
> /**
>  * Calculate the horizontal position of a coefficient given a level,
>  * orientation and horizontal position within the subband.
>  *
>  * @param level subband level
>  * @param orientation orientation of the subband within the level
>  * @param x position within the subband
>  * @return horizontal position within the coefficient array
>  */
> static int inline coeff_posx(DiracContext *s, int level,
>                              subband_t orientation, int x) {
>     if (orientation == subband_hl || orientation == subband_hh)
>         return subband_width(s, level) + x;
> 
>     return x;
> }
> 
> /**
>  * Calculate the vertical position of a coefficient given a level,
>  * orientation and vertical position within the subband.
>  *
>  * @param level subband level
>  * @param orientation orientation of the subband within the level
>  * @param y position within the subband
>  * @return vertical position within the coefficient array
>  */
> static int inline coeff_posy(DiracContext *s, int level,
>                              subband_t orientation, int y) {
>     if (orientation == subband_lh || orientation == subband_hh)
>         return subband_height(s, level) + y;
> 
>     return y;
> }
> 
> /**
>  * Returns if the pixel has a zero neighbourhood (the coefficient at
>  * the left, top and left top of this coefficient are all zero)
>  *
>  * @param data coefficients
>  * @param level subband level
>  * @param orientation the orientation of the current subband
>  * @param v vertical position of the coefficient
>  * @param h horizontal position of the coefficient
>  * @return 1 if zero neighbourhood, otherwise 0
>  */
> static int zero_neighbourhood(DiracContext *s, int16_t *data, int level,
>                               subband_t orientation, int v, int h) {
>     int x = coeff_posx(s, level, orientation, h);
>     int y = coeff_posy(s, level, orientation, v);

all that what coeff_pos* does can be calculated
outside of this function which seems to be not unimportant speed wise
basically id suggest to make data point to the proper place
though maybe there are better solutions


> 
>     /* Check if there is a zero to the left and top left of this
>        coefficient.  */
>     if (v > 0 && ((data[x + (y - 1) * s->padded_width])
>                   || ( h > 0 && data[x + (y - 1) * s->padded_width - 1])))
>         return 0;
>     else if (h > 0 && data[x + y * s->padded_width - 1])
>         return 0;
> 
>     return 1;

if you make the data array slightly bigger and give it a 0 border 
(this might be too hard)
or if you have seperate functions for the border and middle(h>0,v>0)
then this would be just a
return !(data[-s->padded_width]|data[-1]);
assuming data has already been corrected to point to x+y*padded_width


[...]
>     sign_pred = sign_predict(s, data, level, orientation, v, h);
> 
>     /* Calculate an index into context_sets_waveletcoeff.  */
>     idx = parent * 6 + (!nhood) * 3;
>     if (sign_pred == -1)
>         idx += 1;
>     else if (sign_pred == 1)
>         idx += 2;

the return value of sign_predict could already be what needs to be
added to idx that would avoid the double remapping of values


[...]
>     if (!blockcnt_one) {
>         /* Determine if this codeblock is a zero block.  */
>         zero = dirac_arith_get_bit(&s->arith, ARITH_CONTEXT_ZERO_BLOCK);
>     }
> 
>     if (zero)
>         return; /* All coefficients remain 0.  */

if (!blockcnt_one) {
    if(dirac_arith_get_bit(&s->arith, ARITH_CONTEXT_ZERO_BLOCK))
        return;
}


[...]
> /**
>  * Decode a subband
>  *
>  * @param data coefficients
>  * @param level subband level
>  * @param orientation orientation of the subband
>  */
> static int subband(DiracContext *s, int16_t *data, int level,
>                    subband_t orientation) {
>     GetBitContext *gb = s->gb;
>     int length;
>     int quant, qoffset, qfactor;
>     int x, y;
> 
>     length = svq3_get_ue_golomb(gb);
>     if (! length) {
>         align_get_bits(gb);
>     } else {
>         quant = svq3_get_ue_golomb(gb);
>         qfactor = coeff_quant_factor(quant);
>         qoffset = coeff_quant_offset(s, quant) + 2;
> 
>         dirac_arith_init(&s->arith, gb, length);
> 
>         for (y = 0; y < s->codeblocksv[level]; y++)
>             for (x = 0; x < s->codeblocksh[level]; x++)
>                 codeblock(s, data, level, orientation, x, y,
>                           qoffset, qfactor);
>         dirac_arith_flush(&s->arith);
>     }
> 
>     if (level == 0 && s->refs == 0)
>         intra_dc_prediction(s, data);

level can never be 0 here unless this function is merged with subband_dc()
which might be a good idea as they are quite similar


[...]
>     /* Setup the blen and bsep parameters for the chroma
>        component.  */
>     s->frame_decoding.chroma_xblen = s->frame_decoding.luma_xblen / s->chroma_hratio;
>     s->frame_decoding.chroma_yblen = s->frame_decoding.luma_yblen / s->chroma_vratio;
>     s->frame_decoding.chroma_xbsep = s->frame_decoding.luma_xbsep / s->chroma_hratio;
>     s->frame_decoding.chroma_ybsep = s->frame_decoding.luma_ybsep / s->chroma_vratio;

maybe s->chroma_?ratio; should be changed so that
a simple >> could be used instead of slow division


[...]
>             /* Pan/til parameters.  */
>             if (get_bits(gb, 1)) {
>                 s->globalmc.b[0] = dirac_get_se_golomb(gb);
>                 s->globalmc.b[1] = dirac_get_se_golomb(gb);
>             } else {
>                 s->globalmc.b[0] = 0;
>                 s->globalmc.b[1] = 0;
>             }
> 
>             /* Rotation/shear parameters.  */
>             if (get_bits(gb, 1)) {
>                 s->globalmc.zrs_exp = svq3_get_ue_golomb(gb);
>                 s->globalmc.A[0][0] = dirac_get_se_golomb(gb);
>                 s->globalmc.A[0][1] = dirac_get_se_golomb(gb);
>                 s->globalmc.A[1][0] = dirac_get_se_golomb(gb);
>                 s->globalmc.A[1][1] = dirac_get_se_golomb(gb);
>             } else {
>                 s->globalmc.zrs_exp = 0;
>                 s->globalmc.A[0][0] = 0;
>                 s->globalmc.A[0][1] = 0;
>                 s->globalmc.A[1][0] = 0;
>                 s->globalmc.A[1][1] = 0;
>             }
> 
>             /* Perspective parameters.  */
>             if (get_bits(gb, 1)) {
>                 s->globalmc.perspective_exp = svq3_get_ue_golomb(gb);
>                 s->globalmc.c[0]            = dirac_get_se_golomb(gb);
>                 s->globalmc.c[1]            = dirac_get_se_golomb(gb);
>             } else {
>                 s->globalmc.perspective_exp = 0;
>                 s->globalmc.c[0]            = 0;
>                 s->globalmc.c[1]            = 0;
>             }

these could be simplified with a memset(0) at the top


[...]
> static inline int split_prediction(DiracContext *s, int x, int y) {
>     if (x == 0 && y == 0)
>         return 0;
>     else if (y == 0)
>         return s->sbsplit[ y      * s->sbwidth + x - 1];
>     else if (x == 0)
>         return s->sbsplit[(y - 1) * s->sbwidth + x    ];
> 
>     /* XXX: Is not precisely the same as mean() in the spec.  */
>     return (  s->sbsplit[(y - 1) * s->sbwidth + x    ]
>             + s->sbsplit[ y      * s->sbwidth + x - 1]
>             + s->sbsplit[(y - 1) * s->sbwidth + x - 1] + 1) / 3;

considering that sbsplit is 0..2 the sum is just 0..6 so a LUT could be
used to avoid the slow division (and +1)


> }
> 
> /**
>  * Mode prediction
>  *
>  * @param x    horizontal position of the MC block
>  * @param y    vertical position of the MC block
>  * @param ref reference frame
>  */
> static inline int mode_prediction(DiracContext *s,
>                                   int x, int y, int ref) {
>     int cnt;
> 
>     if (x == 0 && y == 0)
>         return 0;
>     else if (y == 0)
>         return s->blmotion[ y      * s->blwidth + x - 1].use_ref[ref];
>     else if (x == 0)
>         return s->blmotion[(y - 1) * s->blwidth + x    ].use_ref[ref];
> 

>     /* Return the majority.  */
>     cnt = s->blmotion[ y      * s->blwidth + x - 1].use_ref[ref]
>         + s->blmotion[(y - 1) * s->blwidth + x    ].use_ref[ref]
>         + s->blmotion[(y - 1) * s->blwidth + x - 1].use_ref[ref];
> 
>     if (cnt >= 2)
>         return 1;
>     else
>         return 0;

return cnt>>1;


> }
> 
> /**
>  * Global mode prediction
>  *
>  * @param x    horizontal position of the MC block
>  * @param y    vertical position of the MC block
>  */
> static inline int global_mode_prediction(DiracContext *s,
>                                          int x, int y) {
>     int cnt;
> 
>     if (x == 0 && y == 0)
>         return 0;
>     else if (y == 0)
>         return s->blmotion[ y      * s->blwidth + x - 1].use_global;
>     else if (x == 0)
>         return s->blmotion[(y - 1) * s->blwidth + x    ].use_global;
> 
>     /* Return the majority.  */
>     cnt = s->blmotion[ y      * s->blwidth + x - 1].use_global
>         + s->blmotion[(y - 1) * s->blwidth + x    ].use_global
>         + s->blmotion[(y - 1) * s->blwidth + x - 1].use_global;
>     if (cnt >= 2)
>         return 1;
>     else
>         return 0;
> }

this is near duplicated from mode_prediction() maybe that could be avoided


[...]
>     if (s->blmotion[y * s->blwidth + x].use_ref[0] == 0
>         || s->blmotion[y * s->blwidth + x].use_ref[0] == 0) {

could be vertical aligned


[...]
> /**
>  * Predict the motion vector
>  *
>  * @param x    horizontal position of the MC block
>  * @param y    vertical position of the MC block
>  * @param ref reference frame
>  * @param dir direction horizontal=0, vertical=1
>  */
> static int motion_vector_prediction(DiracContext *s, int x, int y,
>                                     int ref, int dir) {
>     int cnt = 0;
>     int left = 0, top = 0, lefttop = 0;
> 
>     if (x > 0) {
>         /* Test if the block to the left has a motion vector for this
>            reference frame.  */
>         if (!s->blmotion[y * s->blwidth + x - 1].use_global
>             && s->blmotion[y * s->blwidth + x - 1].use_ref[ref]) {
>             left = s->blmotion[y * s->blwidth + x - 1].vect[ref][dir];
>             cnt++;
>         }
> 
>         /* This is the only reference, return it.  */
>         if (y == 0)
>             return left;
>     }
> 
>     if (y > 0) {
>         /* Test if the block above the current one has a motion vector
>            for this reference frame.  */
>         if (!s->blmotion[(y - 1) * s->blwidth + x].use_global
>             && s->blmotion[(y - 1) * s->blwidth + x].use_ref[ref]) {
>             top = s->blmotion[(y - 1) * s->blwidth + x].vect[ref][dir];
>             cnt++;
>             }
> 
>         /* This is the only reference, return it.  */
>         if (x == 0)
>             return top;
>     }
> 
>     if (x > 0 && y > 0) {
>         /* Test if the block above the current one has a motion vector
>            for this reference frame.  */
>         if (!s->blmotion[(y - 1) * s->blwidth + x - 1].use_global
>             && s->blmotion[(y - 1) * s->blwidth + x - 1].use_ref[ref]) {
>             lefttop = s->blmotion[(y - 1) * s->blwidth + x - 1].vect[ref][dir];
>             cnt++;
>         }
>     }
> 
>     /* No references for the prediction.  */
>     if (cnt == 0)
>         return 0;
> 
>     if (cnt == 1)
>         return left + top + lefttop;
> 
>     /* Return the median of two motion vectors.  */
>     if (cnt == 2)
>         return (left + top + lefttop + 1) >> 1;
> 
>     /* Return the median of three motion vectors.  */
>     return mid_pred(left, top, lefttop);
> }

all the checks above are done twice, once with dir=0 then dir=1 maybe that
can be avoided though it seems diracs design doesnt make that easy :(


> 
> static int block_dc_prediction(DiracContext *s,
>                                int x, int y, int comp) {
>     int total = 0;
>     int cnt = 0;
> 
>     if (x > 0) {
>         if (   !s->blmotion[y * s->blwidth + x - 1].use_ref[0]
>             && !s->blmotion[y * s->blwidth + x - 1].use_ref[1]) {

maybe storing the 2 use_ref and use_global as flags in a single byte would
simplify some of the code, it also would reduce the space/cache needed for
blmotion


>             total += s->blmotion[y * s->blwidth + x - 1].dc[comp];
>             cnt++;
>         }
>     }
> 
>     if (y > 0) {
>         if (   !s->blmotion[(y - 1) * s->blwidth + x].use_ref[0]
>             && !s->blmotion[(y - 1) * s->blwidth + x].use_ref[1]) {
>             total += s->blmotion[(y - 1) * s->blwidth + x].dc[comp];
>             cnt++;
>         }
>     }
> 
>     if (x > 0 && y > 0) {
>         if (   !s->blmotion[(y - 1) * s->blwidth + x - 1].use_ref[0]
>             && !s->blmotion[(y - 1) * s->blwidth + x - 1].use_ref[1]) {
>             total += s->blmotion[(y - 1) * s->blwidth + x - 1].dc[comp];
>             cnt++;
>         }
>     }

if(x>0){
}
if(y>0){
    ...
    if(x>0){
    }
}

would avoid the && y>0

also &s->blmotion[y * s->blwidth + x] could be calculated at the top to
simplify the access


[...]
> static void unpack_block_dc(DiracContext *s, int x, int y, int comp) {
>     int res;
> 
>     s->blmotion[y * s->blwidth + x].dc[comp] = 0; /* XXX */
>     if (   s->blmotion[y * s->blwidth + x].use_ref[0]
>         || s->blmotion[y * s->blwidth + x].use_ref[1])
>         return;

the = 0 can be moved into the if(){}


[...]
>         for (x = 0; x < s->sbwidth; x++) {
>                         int q, p;
>             int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];

indention



[...]
>     /* Unpack the motion vectors.  */
>     dirac_unpack_motion_vectors(s, 0, 0);
>     dirac_unpack_motion_vectors(s, 0, 1);
>     if (s->refs == 2) {
>         dirac_unpack_motion_vectors(s, 1, 0);
>         dirac_unpack_motion_vectors(s, 1, 1);
>     }

for(i=0; i<s->refs; i++){
    dirac_unpack_motion_vectors(s, i, 0);
    dirac_unpack_motion_vectors(s, i, 1);
}


[...]
>    /* Align for coefficient bitstream.  */
>     align_get_bits(gb);
> 
>      /* Unpack LL, level 0.  */
>     subband_dc(s, coeffs);

the comments are strange indented


[...]
>     int16_t *synth_line;
>     int16_t *line_ll;
>     int16_t *line_lh;
>     int16_t *line_hl;
>     int16_t *line_hh;
> 
>     line_ll    = data;
>     line_hl    = data + width;
>     line_lh    = data + height * s->padded_width;
>     line_hh    = data + height * s->padded_width + width;
>     synth_line = synth;

declaration and initalization can be merged


[...]
> /**
>  * IDWT transform (5,3) for a specific subband
>  *
>  * @param data coefficients to transform
>  * @param level level of the current transform
>  * @return 0 when successful, otherwise -1 is returned
>  */
> static int dirac_subband_idwt_53(DiracContext *s,
>                                  int16_t *data, int level) {

the whole (inverse) wavelet transform related code could be split into its
own file


>     int16_t *synth, *synthline;
>     int x, y;
>     int width = subband_width(s, level);
>     int height = subband_height(s, level);
>     int synth_width = width  << 1;
>     int synth_height = height << 1;
> 
> START_TIMER
> 
>     synth = av_malloc(synth_width * synth_height * sizeof(int16_t));
>     if (!synth) {
>         av_log(s->avctx, AV_LOG_ERROR, "av_malloc() failed\n");
>         return -1;
>     }

theres no need to do a malloc() during idwt


> 
>     dirac_subband_idwt_reorder(s, data, synth, level);
> 
>     /* LeGall(5,3)
>        First lifting step)
>        Even, predict, s=5, t_{-1}=-1, t_0=9, t_1=9, t_2=-1:
>          A[2*n]   -= (-A[2*n-1] + A[2*n+1] + 2) >> 2
> 
>        Second lifting step)
>        Odd, update, s=1, t_0=1, t_1=1:
>          A[2*n+1] += (A[2*n] + A[2*n+2] + 1) >> 1
>     */
> 
>     /* Vertical synthesis: Lifting stage 1.  */
>     synthline = synth;
>     for (x = 0; x < synth_width; x++) {
>         synthline[x] -= (synthline[synth_width + x]
>                        + synthline[synth_width + x]
>                        + 2) >> 2;
>     }
>     synthline = synth + (synth_width << 1);
>     for (y = 1; y < height - 1; y++) {
>         for (x = 0; x < synth_width; x++) {
>             synthline[x] -= (synthline[x - synth_width]
>                            + synthline[x + synth_width]
>                            + 2) >> 2;
>         }
>         synthline += (synth_width << 1);
>     }
>     synthline = synth + (synth_height - 2) * synth_width;
>     for (x = 0; x < synth_width; x++) {
>         synthline[x] -= (synthline[x - synth_width]
>                        + synthline[x + synth_width]
>                        + 2) >> 2;
>     }
> 
>     /* Vertical synthesis: Lifting stage 2.  */
>     synthline = synth + synth_width;
>     for (x = 0; x < synth_width; x++)
>         synthline[x] += (synthline[x - synth_width]
>                        + synthline[x + synth_width]
>                        + 1) >> 1;
>     synthline = synth + (synth_width << 1);
>     for (y = 1; y < height - 1; y++) {
>         for (x = 0; x < synth_width; x++) {
>             synthline[x + synth_width] += (synthline[x]
>                                          + synthline[x + synth_width * 2]
>                                          + 1) >> 1;
>         }
>         synthline += (synth_width << 1);
>     }
>     synthline = synth + (synth_height - 1) * synth_width;
>     for (x = 0; x < synth_width; x++)
>         synthline[x] += (synthline[x - synth_width]
>                        + synthline[x - synth_width]
>                        + 1) >> 1;
> 
> 
>     /* Horizontal synthesis.  */
>     synthline = synth;
>     for (y = 0; y < synth_height; y++) {
> 
>         /* Lifting stage 1.  */
>         synthline[0] -= (synthline[1]
>                        + synthline[1]
>                        + 2) >> 2;
>         for (x = 1; x < width - 1; x++) {
>             synthline[2*x] -= (synthline[2*x - 1]
>                              + synthline[2*x + 1]
>                              + 2) >> 2;
>         }
>         synthline[synth_width - 2] -= (synthline[synth_width - 3]
>                                      + synthline[synth_width - 1]
>                                      + 2) >> 2;
> 
>         /* Lifting stage 2.  */
>         for (x = 0; x < width - 1; x++) {
>             synthline[2*x + 1] += (synthline[2*x]
>                                  + synthline[2*x + 2]
>                                  + 1) >> 1;
>         }
>         synthline[synth_width - 1] += (synthline[synth_width - 2]
>                                      + synthline[synth_width - 2]
>                                      + 1) >> 1;
>         synthline += synth_width;
>     }

as already said the stages can be interleaved so that only a single pass
is done over the data, this should significantly speed the code up
as its likely limited by memory speed ...


[...]

>             for (i = 0; i <= 4; i++) {
>                 val += t[i] * (li1[x] + li2[x]);
> 
>                 li1 -= refframe->linesize[comp];
>                 li2 += refframe->linesize[comp];
>             }

i would unroll this loop by hand ...


> 
>             val += 128;

int val= 128 above avoids this add


[...]
> 
> /**
>  * Get a pixel from the halfpel interpolated frame
>  *
>  * @param refframe frame to grab the upconverted pixel from
>  * @param width    frame width
>  * @param height   frame height
>  * @param x        horizontal pixel position
>  * @param y        vertical pixel position
>  */
> static inline int get_halfpel(uint8_t *refframe, int width, int height,
>                               int x, int y) {
>     int xpos;
>     int ypos;
> 
>     xpos = av_clip(x, 0, width  - 1);
>     ypos = av_clip(y, 0, height - 1);
> 
>     return refframe[xpos + ypos * width];
> }
> 
> /**
>  * Upconvert pixel (qpel/eighth-pel)
>  *
>  * @param refframe frame to grab the upconverted pixel from
>  * @param width    frame width
>  * @param height   frame height
>  * @param x        horizontal pixel position
>  * @param y        vertical pixel position
>  * @param comp     component
>  */
> static int upconvert(DiracContext *s, uint8_t *refframe,
>                      int width, int height, int x, int y, int comp) {
>     int hx, hy;
>     int rx, ry;
>     int w00, w01, w10, w11;
>     int val = 0;
> 
>     if (s->frame_decoding.mv_precision == 0
>         || s->frame_decoding.mv_precision == 1)
>         return get_halfpel(refframe, width, height, x, y);
> 
>     hx = x >> (s->frame_decoding.mv_precision - 1);
>     hy = y >> (s->frame_decoding.mv_precision - 1);
>     rx = x - (hx << (s->frame_decoding.mv_precision - 1));
>     ry = y - (hy << (s->frame_decoding.mv_precision - 1));
> 
>     /* Calculate weights.  */
>     w00 = ((1 << (s->frame_decoding.mv_precision - 1)) - ry)
>         * ((1 << (s->frame_decoding.mv_precision - 1)) - rx);
>     w01 = ((1 << (s->frame_decoding.mv_precision - 1)) - ry) * rx;
>     w10 = ((1 << (s->frame_decoding.mv_precision - 1)) - rx) * ry;
>     w11 = ry * rx;
> 
>     val += w00 * get_halfpel(refframe, width, height, hx    , hy    );
>     val += w01 * get_halfpel(refframe, width, height, hx + 1, hy    );
>     val += w10 * get_halfpel(refframe, width, height, hx    , hy + 1);
>     val += w11 * get_halfpel(refframe, width, height, hx + 1, hy + 1);
>     val += 1 << (s->frame_decoding.mv_precision - 1);
> 
>     return val >> s->frame_decoding.mv_precision;
> }
> 
> /**
>  * Calculate WH or WV of the spatial weighting matrix
>  *
>  * @param i       block position
>  * @param x       current pixel
>  * @param bsep    block spacing
>  * @param blen    block length
>  * @param offset  xoffset/yoffset
>  * @param blocks  number of blocks
>  */
> static inline int spatial_wt(int i, int x, int bsep, int blen,
>                              int offset, int blocks) {
>     int pos = x - (i * bsep - offset);
>     int max;
> 
>     max = 2 * (blen - bsep);
>     if (i == 0 && pos < (blen >> 1))
>         return max;
>     else if (i == blocks - 1 && pos >= (blen >> 1))
>         return max;
>     else
>         return av_clip(blen - FFABS(2*pos - (blen - 1)), 0, max);
> }
> 
> /**
>  * Motion Compensation with two reference frames
>  *
>  * @param coeffs     coefficients to add the DC to
>  * @param i          horizontal position of the MC block
>  * @param j          vertical position of the MC block
>  * @param xstart     top left coordinate of the MC block
>  * @param ystop      top left coordinate of the MC block
>  * @param xstop      bottom right coordinate of the MC block
>  * @param ystop      bottom right coordinate of the MC block
>  * @param ref1       first reference frame
>  * @param ref2       second reference frame
>  * @param currblock  MC block to use
>  * @param comp       component
>  */
> static void motion_comp_block2refs(DiracContext *s, int16_t *coeffs,
>                                    int i, int j, int xstart, int xstop,
>                                    int ystart, int ystop, uint8_t *ref1,
>                                    uint8_t *ref2,
>                                    struct dirac_blockmotion *currblock,
>                                    int comp) {
>     int x, y;
>     int16_t *line;
>     int px1, py1;
>     int px2, py2;
>     int vect1[2];
>     int vect2[2];
> 
>     vect1[0] = currblock->vect[0][0];
>     vect1[1] = currblock->vect[0][1];
>     vect2[0] = currblock->vect[1][0];
>     vect2[1] = currblock->vect[1][1];
> 
>     if (comp != 0) {
>         if (s->chroma_hratio) {
>             vect1[0] >>= 1;
>             vect2[0] >>= 1;
>         }
>         if (s->chroma_vratio) {
>             vect1[1] >>= 1;
>             vect2[1] >>= 1;
>         }
>     }
> 
>     line = &coeffs[s->width * ystart];
>     for (y = ystart; y < ystop; y++) {
>         for (x = xstart; x < xstop; x++) {
>             int val1 = 0;
>             int val2 = 0;
>             int val = 0;
> 
>             if (s->frame_decoding.mv_precision > 0) {
>                 px1 = (x << s->frame_decoding.mv_precision) + vect1[0];
>                 py1 = (y << s->frame_decoding.mv_precision) + vect1[1];
>                 px2 = (x << s->frame_decoding.mv_precision) + vect2[0];
>                 py2 = (y << s->frame_decoding.mv_precision) + vect2[1];
>             } else {
>                 px1 = (x + vect1[0]) << 1;
>                 py1 = (y + vect1[1]) << 1;
>                 px2 = (x + vect2[0]) << 1;
>                 py2 = (y + vect2[1]) << 1;
>             }
> 
>             val1 = upconvert(s, ref1, s->ref1width, s->ref1height,
>                              px1, py1, comp);
>             val1 *= s->frame_decoding.picture_weight_ref1;
> 
>             val2 = upconvert(s, ref2, s->ref2width, s->ref2height,
>                              px2, py2, comp);
>             val2 *= s->frame_decoding.picture_weight_ref2;
> 
>             val = val1 + val2;
>             val = (val
>                    * spatial_wt(i, x, s->xbsep, s->xblen,
>                                 s->xoffset, s->current_blwidth)
>                    * spatial_wt(j, y, s->ybsep, s->yblen,
>                                 s->yoffset, s->current_blheight));
> 
>             line[x] += val;
>         }
>         line += s->width;
>     }
> }
> 
> /**
>  * Motion Compensation with one reference frame
>  *
>  * @param coeffs     coefficients to add the DC to
>  * @param i          horizontal position of the MC block
>  * @param j          vertical position of the MC block
>  * @param xstart     top left coordinate of the MC block
>  * @param ystop      top left coordinate of the MC block
>  * @param xstop      bottom right coordinate of the MC block
>  * @param ystop      bottom right coordinate of the MC block
>  * @param refframe   reference frame
>  * @param ref        0=first refframe 1=second refframe
>  * @param currblock  MC block to use
>  * @param comp       component
>  */
> static void motion_comp_block1ref(DiracContext *s, int16_t *coeffs,
>                                   int i, int j, int xstart, int xstop,
>                                   int ystart, int ystop, uint8_t *refframe,
>                                   int ref,
>                                   struct dirac_blockmotion *currblock,
>                                   int comp) {
>     int x, y;
>     int16_t *line;
>     int px, py;
>     int vect[2];
> 
>         vect[0] = currblock->vect[ref][0];
>         vect[1] = currblock->vect[ref][1];
> 
>     if (comp != 0) {
>         if (s->chroma_hratio)
>             vect[0] >>= 1;
>         if (s->chroma_vratio)
>             vect[1] >>= 1;
>     }
> 
>     line = &coeffs[s->width * ystart];
>     for (y = ystart; y < ystop; y++) {
>         for (x = xstart; x < xstop; x++) {
>             int val = 0;
> 
>             if (s->frame_decoding.mv_precision > 0) {
>                 px = (x << s->frame_decoding.mv_precision) + vect[0];
>                 py = (y << s->frame_decoding.mv_precision) + vect[1];
>             } else {
>                 px = (x + vect[0]) << 1;
>                 py = (y + vect[1]) << 1;
>             }
> 
>             val = upconvert(s, refframe, s->ref1width, s->ref1height,
>                             px, py, comp);
>             val *= s->frame_decoding.picture_weight_ref1
>                  + s->frame_decoding.picture_weight_ref2;
> 
>             val = (val
>                    * spatial_wt(i, x, s->xbsep, s->xblen,
>                                 s->xoffset, s->current_blwidth)
>                    * spatial_wt(j, y, s->ybsep, s->yblen,
>                                 s->yoffset, s->current_blheight));
> 
>             line[x] += val;
>         }
>         line += s->width;
>     }
> }

this is VERY inefficient, most of what is done here does not need to be done
per pixel or can be precalculated


[...]
>     if (cacheframe1)
>         s->refframes[refidx1].halfpel[comp] = s->ref1data;
>     else
>         av_free(s->ref1data);
> 
>     if (cacheframe2)
>         s->refframes[refidx2].halfpel[comp] = s->ref2data;
>     else
>         av_free(s->ref2data);

av_freep() might be safer here to avoid mistakely using freed memory


[...]
>         if (comp == 0) {
>             width = s->sequence.luma_width;
>             height = s->sequence.luma_height;
>             s->padded_width = s->padded_luma_width;
>             s->padded_height = s->padded_luma_height;
>         } else {
>             width = s->sequence.chroma_width;
>             height = s->sequence.chroma_height;
>             s->padded_width = s->padded_chroma_width;
>             s->padded_height = s->padded_chroma_height;
>         }

width  = s->sequence.width [!!comp];
height = s->sequence.height[!!comp];
...

could be used if they where in an array[2]


[...]
>     if (s->refs) {
>         s->ref1 = dirac_get_se_golomb(gb) + s->picnum;
>         if (s->refs == 2)
>             s->ref2 = dirac_get_se_golomb(gb) + s->picnum;
>     }

for(i=0; i<s->refs; i++)
    s->ref[i] = dirac_get_se_golomb(gb) + s->picnum;


[...]
>             /* Override the default partitioning.  */
>             if (get_bits(gb, 1)) {
>                 for (i = 0; i <= s->frame_decoding.wavelet_depth; i++) {
>                     s->codeblocksh[i] = svq3_get_ue_golomb(gb);
>                     s->codeblocksv[i] = svq3_get_ue_golomb(gb);

these and possible others should be checked for validity (>0) yes
svq3_get_ue_golomb() can return <0 for too large values i think or
codeblocks* should be unsigned

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Why not whip the teacher when the pupil misbehaves? -- Diogenes of Sinope
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-soc/attachments/20070816/a1aa8b49/attachment.pgp>


More information about the FFmpeg-soc mailing list