[FFmpeg-devel] [PATCH] VP8 decoder

Alex Converse alex.converse
Tue Jun 22 18:53:02 CEST 2010


On Tue, Jun 22, 2010 at 12:50 PM, Vitor Sessak <vitor1001 at gmail.com> wrote:
> On 06/22/2010 04:43 PM, Ronald S. Bultje wrote:
>>
>> Hi,
>>
>> On Tue, Jun 15, 2010 at 10:22 PM, Michael Niedermayer<michaelni at gmx.at>
>> ?wrote:
>>>
>>> On Tue, Jun 15, 2010 at 09:12:30AM -0400, David Conrad wrote:
>>>>
>>>> +static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x,
>>>> int mb_y,
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? uint8_t *intra4x4)
>>>> +{
>>>> + ? ?VP56RangeCoder *c =&s->c;
>>>> + ? ?int n;
>>>> +
>>>> + ? ?if (s->segmentation.update_map)
>>>> + ? ? ? ?mb->segment = vp8_rac_get_tree(c, vp8_segmentid_tree,
>>>> s->prob->segmentid);
>>>> +
>>>> + ? ?mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c,
>>>> s->prob->mbskip) : 0;
>>>> +
>>>> + ? ?if (s->keyframe) {
>>>> + ? ? ? ?mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
>>>> vp8_pred16x16_prob_intra);
>>>> +
>>>
>>>> + ? ? ? ?if (mb->mode == MODE_I4x4)
>>>> + ? ? ? ? ? ?decode_intra4x4_modes(c, intra4x4, s->b4_stride, 1);
>>>> + ? ? ? ?else
>>>
>>> {}
>>
>> Done.
>>
>>> also it might be worth to try patcheck
>>
>> Tried, didn't give me anything (I hope that's good).
>>
>>>> +static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16])
>>>> +{
>>>> + ? ?int i, t0, t1, t2, t3;
>>>> +
>>>> + ? ?for (i = 0; i< ?4; i++) {
>>>> + ? ? ? ?t0 = dc[0*4+i] + dc[3*4+i];
>>>> + ? ? ? ?t1 = dc[1*4+i] + dc[2*4+i];
>>>> + ? ? ? ?t2 = dc[1*4+i] - dc[2*4+i];
>>>> + ? ? ? ?t3 = dc[0*4+i] - dc[3*4+i];
>>>> +
>>>> + ? ? ? ?dc[0*4+i] = t0 + t1;
>>>> + ? ? ? ?dc[1*4+i] = t3 + t2;
>>>> + ? ? ? ?dc[2*4+i] = t0 - t1;
>>>> + ? ? ? ?dc[3*4+i] = t3 - t2;
>>>> + ? ?}
>>>> +
>>>> + ? ?for (i = 0; i< ?4; i++) {
>>>> + ? ? ? ?t0 = dc[i*4+0] + dc[i*4+3];
>>>> + ? ? ? ?t1 = dc[i*4+1] + dc[i*4+2];
>>>> + ? ? ? ?t2 = dc[i*4+1] - dc[i*4+2];
>>>> + ? ? ? ?t3 = dc[i*4+0] - dc[i*4+3];
>>>> +
>>>> + ? ? ? ?*block[i][0] = (t0 + t1 + 3)>> ?3;
>>>> + ? ? ? ?*block[i][1] = (t3 + t2 + 3)>> ?3;
>>>> + ? ? ? ?*block[i][2] = (t0 - t1 + 3)>> ?3;
>>>> + ? ? ? ?*block[i][3] = (t3 - t2 + 3)>> ?3;
>>>
>>> the +3 can be moved up and factored
>>
>> Done.
>>
>>>> +static av_always_inline void filter_common(uint8_t *p, int stride, int
>>>> is4tap)
>>>> +{
>>>> + ? ?LOAD_PIXELS
>>>> + ? ?int a, f1, f2;
>>>> +
>>>> + ? ?a = 3*(q0 - p0);
>>>> +
>>>> + ? ?if (is4tap)
>>>> + ? ? ? ?a += clip_int8(p1 - q1);
>>>> +
>>>> + ? ?a = clip_int8(a);
>>>> +
>>>
>>>> + ? ?// We deviate from the spec here with c(a+3)>> ?3
>>>> + ? ?// since that's what libvpx does.
>>>> + ? ?f1 = clip_int8(a+4)>> ?3;
>>>> + ? ?f2 = clip_int8(a+3)>> ?3;
>>>
>>> these only need cliping in one direction
>>> and or the clip_int8() could be avoided
>>
>> Changed to FFMIN(x, 127).
>>
>>>> + ? ?// Despite what the spec says, we do need to clamp here to
>>>> + ? ?// be bitexact with libvpx.
>>>> + ? ?p[-1*stride] = av_clip_uint8(p0 + f2);
>>>> + ? ?p[ 0*stride] = av_clip_uint8(q0 - f1);
>>>
>>> for the !is4tap i think theres a bit more cliping done than needed
>>
>> I looked through it and don't think this one can strictly be prevented.
>>
>>>> +static av_always_inline int simple_limit(uint8_t *p, int stride, int
>>>> flim)
>>>> +{
>>>> + ? ?LOAD_PIXELS
>>>> + ? ?return 2*FFABS(p0-q0) + FFABS(p1-q1)/2<= flim;
>>>
>>>>> 1 instead of /2 may be faster
>>
>> Changed.
>>
>>>> +static av_always_inline void filter_mbedge(uint8_t *p, int stride)
>>>> +{
>>>> + ? ?int a0, a1, a2, w;
>>>> +
>>>> + ? ?LOAD_PIXELS
>>>> +
>>>> + ? ?w = clip_int8(p1-q1);
>>>> + ? ?w = clip_int8(w + 3*(q0-p0));
>>>> +
>>>
>>>> + ? ?a0 = clip_int8((27*w + 63)>> ?7);
>>>> + ? ?a1 = clip_int8((18*w + 63)>> ?7);
>>>> + ? ?a2 = clip_int8(( 9*w + 63)>> ?7);
>>>
>>> the cliping is unneeded
>>
>> Indeed, removed.
>>
>>>> +static const uint8_t subpel_filters[7][6] = {
>>>> + ? ?{ 0, ? 6, 123, ?12, ? 1, ? 0 },
>>>> + ? ?{ 2, ?11, 108, ?36, ? 8, ? 1 },
>>>> + ? ?{ 0, ? 9, ?93, ?50, ? 6, ? 0 },
>>>> + ? ?{ 3, ?16, ?77, ?77, ?16, ? 3 },
>>>> + ? ?{ 0, ? 6, ?50, ?93, ? 9, ? 0 },
>>>> + ? ?{ 1, ? 8, ?36, 108, ?11, ? 2 },
>>>> + ? ?{ 0, ? 1, ?12, 123, ? 6, ? 0 },
>>>> +};
>>>> +
>>>> +
>>>> +#define FILTER_6TAP(src, F, stride) \
>>>> + ? ?av_clip_uint8((F[2]*src[x+0*stride] - F[1]*src[x-1*stride] +
>>>> F[0]*src[x-2*stride] + \
>>>> + ? ? ? ? ? ? ? ? ? F[3]*src[x+1*stride] - F[4]*src[x+2*stride] +
>>>> F[5]*src[x+3*stride] + 64)>> ?7)
>>>> +
>>>> +#define VP8_EPEL(SIZE) \
>>>> +static void put_vp8_epel ## SIZE ## _h_c(uint8_t *dst, uint8_t *src,
>>>> int stride, int h, int mx, int my) \
>>>> +{ \
>>>> + ? ?const uint8_t *filter = subpel_filters[mx-1]; \
>>>> + ? ?int x, y; \
>>>> +\
>>>> + ? ?for (y = 0; y< ?h; y++) { \
>>>> + ? ? ? ?for (x = 0; x< ?SIZE; x++) \
>>>> + ? ? ? ? ? ?dst[x] = FILTER_6TAP(src, filter, 1); \
>>>> + ? ? ? ?dst += stride; \
>>>> + ? ? ? ?src += stride; \
>>>> + ? ?} \
>>>> +} \
>>>> +\
>>>
>>> maybe it makes sense to write seperate functions for thr 4tap cases
>>> instead of 0*
>>
>> Added as suggested.
>>
>>> also i dont mind if you commit it to svn to continue development there.
>>> It surely is usefull alraedy to people if its faster than libvpx
>>
>> I have some x86 mmx optimizations already so I think this is a good
>> idea. Please let me know if the current patch is OK to apply.
>
> I risk to start a bikeshed, but should we mark it as experimental until it
> pass the conformance test suite?
>

IIRC experimental doesn't work that way on decoders



More information about the ffmpeg-devel mailing list