[FFmpeg-devel] [PATCH] VC1: merge put/add_pixels with IDCT8x8.
Ronald S. Bultje
rsbultje
Sat Feb 19 18:20:06 CET 2011
---
libavcodec/dsputil.c | 28 +++++++-------
libavcodec/dsputil.h | 8 ++++
libavcodec/vc1.c | 37 ++++++++++++++++++-
libavcodec/vc1dec.c | 101 ++++++++++++++++++++++---------------------------
libavcodec/vc1dsp.c | 58 ++++++++++++++++++++++++-----
libavcodec/vc1dsp.h | 8 +++-
6 files changed, 158 insertions(+), 82 deletions(-)
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index f7151b8..1f7bd4c 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -437,8 +437,8 @@ static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
}
-static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
- int line_size)
+void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+ int line_size)
{
int i;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
@@ -493,9 +493,9 @@ static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
}
}
-static void put_signed_pixels_clamped_c(const DCTELEM *block,
- uint8_t *restrict pixels,
- int line_size)
+void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
+ uint8_t *restrict pixels,
+ int line_size)
{
int i, j;
@@ -535,8 +535,8 @@ static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixe
}
}
-static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
- int line_size)
+void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+ int line_size)
{
int i;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
@@ -3961,22 +3961,22 @@ void ff_wmv2_idct_c(short * block){
static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_wmv2_idct_c(block);
- put_pixels_clamped_c(block, dest, line_size);
+ ff_put_pixels_clamped_c(block, dest, line_size);
}
static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_wmv2_idct_c(block);
- add_pixels_clamped_c(block, dest, line_size);
+ ff_add_pixels_clamped_c(block, dest, line_size);
}
static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
{
j_rev_dct (block);
- put_pixels_clamped_c(block, dest, line_size);
+ ff_put_pixels_clamped_c(block, dest, line_size);
}
static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
{
j_rev_dct (block);
- add_pixels_clamped_c(block, dest, line_size);
+ ff_add_pixels_clamped_c(block, dest, line_size);
}
static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
@@ -4135,10 +4135,10 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->get_pixels = get_pixels_c;
c->diff_pixels = diff_pixels_c;
- c->put_pixels_clamped = put_pixels_clamped_c;
- c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
+ c->put_pixels_clamped = ff_put_pixels_clamped_c;
+ c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
c->put_pixels_nonclamped = put_pixels_nonclamped_c;
- c->add_pixels_clamped = add_pixels_clamped_c;
+ c->add_pixels_clamped = ff_add_pixels_clamped_c;
c->add_pixels8 = add_pixels8_c;
c->add_pixels4 = add_pixels4_c;
c->sum_abs_dctelem = sum_abs_dctelem_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index f97b2b5..ac5b7e0 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -198,6 +198,14 @@ void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize,
int block_w, int block_h,
int src_x, int src_y, int w, int h);
+void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict ptr,
+ int linesize);
+void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict ptr,
+ int linesize);
+void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict ptr,
+ int linesize);
+
+
/**
* DSPContext.
*/
diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
index 8bd6647..d73a2c9 100644
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c
@@ -30,6 +30,7 @@
#include "avcodec.h"
#include "mpegvideo.h"
#include "vc1.h"
+#include "vc1dsp.h"
#include "vc1data.h"
#include "msmpeg4data.h"
#include "unary.h"
@@ -280,6 +281,36 @@ static int vop_dquant_decoding(VC1Context *v)
static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb);
+static void simple_idct_put_signed(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ ff_simple_idct(block);
+ ff_put_signed_pixels_clamped_c(block, dest, line_size);
+}
+
+static void simple_idct_put_signed_rangered(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ int i;
+
+ ff_simple_idct(block);
+ for (i = 0; i < 64; i++) block[i] <<= 1;
+ ff_put_signed_pixels_clamped_c(block, dest, line_size);
+}
+
+static void simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ ff_simple_idct(block);
+ ff_put_pixels_clamped_c(block, dest, line_size);
+}
+
+static void simple_idct_put_rangered(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ int i;
+
+ ff_simple_idct(block);
+ for (i = 0; i < 64; i++) block[i] <<= 1;
+ ff_put_pixels_clamped_c(block, dest, line_size);
+}
+
/**
* Decode Simple/Main Profiles sequence header
* @see Figure 7-8, p16-17
@@ -337,7 +368,11 @@ int vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitConte
v->res_fasttx = get_bits1(gb);
if (!v->res_fasttx)
{
- v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct;
+ v->vc1dsp.vc1_inv_trans_8x8_add = ff_simple_idct_add;
+ v->vc1dsp.vc1_inv_trans_8x8_put[0] = simple_idct_put_signed;
+ v->vc1dsp.vc1_inv_trans_8x8_put[1] = simple_idct_put_signed_rangered;
+ v->vc1dsp.vc1_inv_trans_8x8_put[2] = simple_idct_put;
+ v->vc1dsp.vc1_inv_trans_8x8_put[3] = simple_idct_put_rangered;
v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add;
v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add;
v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add;
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 7eb9576..1704bcd 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -187,39 +187,6 @@ static void vc1_loop_filter_iblk(VC1Context *v, int pq)
}
}
-/** Put block onto picture
- */
-static void vc1_put_block(VC1Context *v, DCTELEM block[6][64])
-{
- uint8_t *Y;
- int ys, us, vs;
- DSPContext *dsp = &v->s.dsp;
-
- if(v->rangeredfrm) {
- int i, j, k;
- for(k = 0; k < 6; k++)
- for(j = 0; j < 8; j++)
- for(i = 0; i < 8; i++)
- block[k][i + j*8] = (block[k][i + j*8] - 64) << 1;
-
- }
- ys = v->s.current_picture.linesize[0];
- us = v->s.current_picture.linesize[1];
- vs = v->s.current_picture.linesize[2];
- Y = v->s.dest[0];
-
- dsp->put_pixels_clamped(block[0], Y, ys);
- dsp->put_pixels_clamped(block[1], Y + 8, ys);
- Y += ys * 8;
- dsp->put_pixels_clamped(block[2], Y, ys);
- dsp->put_pixels_clamped(block[3], Y + 8, ys);
-
- if(!(v->s.flags & CODEC_FLAG_GRAY)) {
- dsp->put_pixels_clamped(block[4], v->s.dest[1], us);
- dsp->put_pixels_clamped(block[5], v->s.dest[2], vs);
- }
-}
-
/** Do motion compensation over 1 macroblock
* Mostly adapted hpel_motion and qpel_motion from mpegvideo.c
*/
@@ -2042,8 +2009,7 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
if(i==1)
v->vc1dsp.vc1_inv_trans_8x8_dc(dst, linesize, block);
else{
- v->vc1dsp.vc1_inv_trans_8x8(block);
- s->dsp.add_pixels_clamped(block, dst, linesize);
+ v->vc1dsp.vc1_inv_trans_8x8_add(dst, linesize, block);
}
if(apply_filter && cbp_top & 0xC)
v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
@@ -2150,7 +2116,7 @@ static int vc1_decode_p_mb(VC1Context *v)
{
MpegEncContext *s = &v->s;
GetBitContext *gb = &s->gb;
- int i, j;
+ int i;
int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
int cbp; /* cbp decoding stuff */
int mqdiff, mquant; /* MB quantization */
@@ -2165,6 +2131,7 @@ static int vc1_decode_p_mb(VC1Context *v)
int skipped, fourmv;
int block_cbp = 0, pat;
int apply_loop_filter;
+ vc1_idct_func idct8x8_fn;
mquant = v->pq; /* Loosy initialization */
@@ -2216,6 +2183,7 @@ static int vc1_decode_p_mb(VC1Context *v)
VC1_TTMB_VLC_BITS, 2);
if(!s->mb_intra) vc1_mc_1mv(v, 0);
dst_idx = 0;
+ idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put[!!v->rangeredfrm];
for (i=0; i<6; i++)
{
s->dc_val[0][s->block_index[i]] = 0;
@@ -2233,9 +2201,9 @@ static int vc1_decode_p_mb(VC1Context *v)
vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset);
if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
- v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
- if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
- s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
+ idct8x8_fn(s->dest[dst_idx] + off,
+ i & 4 ? s->uvlinesize : s->linesize,
+ s->block[i]);
if(v->pq >= 9 && v->overlap) {
if(v->c_avail)
v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
@@ -2349,6 +2317,7 @@ static int vc1_decode_p_mb(VC1Context *v)
}
if (!v->ttmbf && coded_inter)
ttmb = get_vlc2(gb, ff_vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2);
+ idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put[!!v->rangeredfrm];
for (i=0; i<6; i++)
{
dst_idx += i >> 2;
@@ -2364,9 +2333,9 @@ static int vc1_decode_p_mb(VC1Context *v)
vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant, (i&4)?v->codingset2:v->codingset);
if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
- v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
- if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
- s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize);
+ idct8x8_fn(s->dest[dst_idx] + off,
+ i & 4 ? s->uvlinesize : s->linesize,
+ s->block[i]);
if(v->pq >= 9 && v->overlap) {
if(v->c_avail)
v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
@@ -2442,7 +2411,7 @@ static void vc1_decode_b_mb(VC1Context *v)
{
MpegEncContext *s = &v->s;
GetBitContext *gb = &s->gb;
- int i, j;
+ int i;
int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
int cbp = 0; /* cbp decoding stuff */
int mqdiff, mquant; /* MB quantization */
@@ -2455,6 +2424,7 @@ static void vc1_decode_b_mb(VC1Context *v)
int skipped, direct;
int dmv_x[2], dmv_y[2];
int bmvtype = BMV_TYPE_BACKWARD;
+ vc1_idct_func idct8x8_fn;
mquant = v->pq; /* Loosy initialization */
s->mb_intra = 0;
@@ -2552,6 +2522,7 @@ static void vc1_decode_b_mb(VC1Context *v)
}
}
dst_idx = 0;
+ idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put[!!v->rangeredfrm];
for (i=0; i<6; i++)
{
s->dc_val[0][s->block_index[i]] = 0;
@@ -2569,9 +2540,9 @@ static void vc1_decode_b_mb(VC1Context *v)
vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset);
if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
- v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
- if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
- s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
+ idct8x8_fn(s->dest[dst_idx] + off,
+ i & 4 ? s->uvlinesize : s->linesize,
+ s->block[i]);
} else if(val) {
vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY), 0, 0, 0);
if(!v->ttmbf && ttmb < 8) ttmb = -1;
@@ -2584,11 +2555,12 @@ static void vc1_decode_b_mb(VC1Context *v)
*/
static void vc1_decode_i_blocks(VC1Context *v)
{
- int k, j;
+ int k;
MpegEncContext *s = &v->s;
int cbp, val;
uint8_t *coded_val;
int mb_pos;
+ vc1_idct_func idct8x8_fn;
/* select codingmode used for VLC tables selection */
switch(v->y_ac_table_index){
@@ -2623,11 +2595,19 @@ static void vc1_decode_i_blocks(VC1Context *v)
s->mb_x = s->mb_y = 0;
s->mb_intra = 1;
s->first_slice_line = 1;
+ idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put[(!!v->rangeredfrm) | ((v->pq < 9 || !v->overlap) << 1)];
for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
s->mb_x = 0;
ff_init_block_index(s);
for(; s->mb_x < s->mb_width; s->mb_x++) {
+ uint8_t *dst[6];
ff_update_block_index(s);
+ dst[0] = s->dest[0];
+ dst[1] = dst[0] + 8;
+ dst[2] = dst[0] + s->linesize * 8;
+ dst[3] = dst[2] + 8;
+ dst[4] = s->dest[1];
+ dst[5] = s->dest[2];
s->dsp.clear_blocks(s->block[0]);
mb_pos = s->mb_x + s->mb_y * s->mb_width;
s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA;
@@ -2651,13 +2631,12 @@ static void vc1_decode_i_blocks(VC1Context *v)
vc1_decode_i_block(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2);
- v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
- if(v->pq >= 9 && v->overlap) {
- for(j = 0; j < 64; j++) s->block[k][j] += 128;
- }
+ if(k > 3 && (v->s.flags & CODEC_FLAG_GRAY)) continue;
+ idct8x8_fn(dst[k],
+ k & 4 ? s->uvlinesize : s->linesize,
+ s->block[k]);
}
- vc1_put_block(v, s->block);
if(v->pq >= 9 && v->overlap) {
if(s->mb_x) {
v->vc1dsp.vc1_h_overlap(s->dest[0], s->linesize);
@@ -2704,7 +2683,7 @@ static void vc1_decode_i_blocks(VC1Context *v)
*/
static void vc1_decode_i_blocks_adv(VC1Context *v)
{
- int k, j;
+ int k;
MpegEncContext *s = &v->s;
int cbp, val;
uint8_t *coded_val;
@@ -2713,6 +2692,7 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
int mqdiff;
int overlap;
GetBitContext *gb = &s->gb;
+ vc1_idct_func idct8x8_fn;
/* select codingmode used for VLC tables selection */
switch(v->y_ac_table_index){
@@ -2743,11 +2723,19 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
s->mb_x = s->mb_y = 0;
s->mb_intra = 1;
s->first_slice_line = 1;
+ idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put[!!v->rangeredfrm];
for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
s->mb_x = 0;
ff_init_block_index(s);
for(;s->mb_x < s->mb_width; s->mb_x++) {
+ uint8_t *dst[6];
ff_update_block_index(s);
+ dst[0] = s->dest[0];
+ dst[1] = dst[0] + 8;
+ dst[2] = dst[0] + s->linesize * 8;
+ dst[3] = dst[2] + 8;
+ dst[4] = s->dest[1];
+ dst[5] = s->dest[2];
s->dsp.clear_blocks(s->block[0]);
mb_pos = s->mb_x + s->mb_y * s->mb_stride;
s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA;
@@ -2791,11 +2779,12 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
vc1_decode_i_block_adv(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant);
- v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
- for(j = 0; j < 64; j++) s->block[k][j] += 128;
+ if(k > 3 && (v->s.flags & CODEC_FLAG_GRAY)) continue;
+ idct8x8_fn(dst[k],
+ k & 4 ? s->uvlinesize : s->linesize,
+ s->block[k]);
}
- vc1_put_block(v, s->block);
if(overlap) {
if(s->mb_x) {
v->vc1dsp.vc1_h_overlap(s->dest[0], s->linesize);
diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
index 000dad7..ea1c840 100644
--- a/libavcodec/vc1dsp.c
+++ b/libavcodec/vc1dsp.c
@@ -199,7 +199,7 @@ static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
}
}
-static void vc1_inv_trans_8x8_c(DCTELEM block[64])
+static av_always_inline void vc1_inv_trans_8x8_c(DCTELEM block[64], int shl)
{
int i;
register int t1,t2,t3,t4,t5,t6,t7,t8;
@@ -254,20 +254,54 @@ static void vc1_inv_trans_8x8_c(DCTELEM block[64])
t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
- dst[ 0] = (t5 + t1) >> 7;
- dst[ 8] = (t6 + t2) >> 7;
- dst[16] = (t7 + t3) >> 7;
- dst[24] = (t8 + t4) >> 7;
- dst[32] = (t8 - t4 + 1) >> 7;
- dst[40] = (t7 - t3 + 1) >> 7;
- dst[48] = (t6 - t2 + 1) >> 7;
- dst[56] = (t5 - t1 + 1) >> 7;
+ dst[ 0] = ((t5 + t1 ) >> 7) << shl;
+ dst[ 8] = ((t6 + t2 ) >> 7) << shl;
+ dst[16] = ((t7 + t3 ) >> 7) << shl;
+ dst[24] = ((t8 + t4 ) >> 7) << shl;
+ dst[32] = ((t8 - t4 + 1) >> 7) << shl;
+ dst[40] = ((t7 - t3 + 1) >> 7) << shl;
+ dst[48] = ((t6 - t2 + 1) >> 7) << shl;
+ dst[56] = ((t5 - t1 + 1) >> 7) << shl;
src++;
dst++;
}
}
+static void vc1_inv_trans_8x8_add_c(uint8_t *dst, int linesize, DCTELEM block[64])
+{
+ vc1_inv_trans_8x8_c(block, 0);
+ ff_add_pixels_clamped_c(block, dst, linesize);
+}
+
+static void vc1_inv_trans_8x8_put_c(uint8_t *dst, int linesize,
+ DCTELEM block[64])
+{
+ vc1_inv_trans_8x8_c(block, 0);
+ ff_put_pixels_clamped_c(block, dst, linesize);
+}
+
+static void vc1_inv_trans_8x8_put_rangered_c(uint8_t *dst, int linesize,
+ DCTELEM block[64])
+{
+ vc1_inv_trans_8x8_c(block, 1);
+ ff_put_pixels_clamped_c(block, dst, linesize);
+}
+
+static void vc1_inv_trans_8x8_put_signed_c(uint8_t *dst, int linesize,
+ DCTELEM block[64])
+{
+ vc1_inv_trans_8x8_c(block, 0);
+ ff_put_signed_pixels_clamped_c(block, dst, linesize);
+}
+
+static void vc1_inv_trans_8x8_put_signed_rangered_c(uint8_t *dst, int linesize,
+ DCTELEM block[64])
+{
+ vc1_inv_trans_8x8_c(block, 1);
+ ff_put_signed_pixels_clamped_c(block, dst, linesize);
+}
+
/** Do inverse transform on 8x4 part of block
*/
static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
@@ -662,7 +696,11 @@ static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
}
av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) {
- dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c;
+ dsp->vc1_inv_trans_8x8_put[0] = vc1_inv_trans_8x8_put_signed_c;
+ dsp->vc1_inv_trans_8x8_put[1] = vc1_inv_trans_8x8_put_signed_rangered_c;
+ dsp->vc1_inv_trans_8x8_put[2] = vc1_inv_trans_8x8_put_c;
+ dsp->vc1_inv_trans_8x8_put[3] = vc1_inv_trans_8x8_put_rangered_c;
+ dsp->vc1_inv_trans_8x8_add = vc1_inv_trans_8x8_add_c;
dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c;
dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c;
dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c;
diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h
index a1f3d90..aac03ac 100644
--- a/libavcodec/vc1dsp.h
+++ b/libavcodec/vc1dsp.h
@@ -30,9 +30,15 @@
#include "dsputil.h"
+typedef void (*vc1_idct_func)(uint8_t *dst, int linesize, DCTELEM *block);
+
typedef struct VC1DSPContext {
/* vc1 functions */
- void (*vc1_inv_trans_8x8)(DCTELEM *b);
+ void (*vc1_inv_trans_8x8_add)(uint8_t *dest, int line_size, DCTELEM *b);
+ /* [0] is default idct, [1] is range reduction which does <<= 1 for each
+ * coeff after the idct itself. [2]/[3] are the same, but use
+ * put_pixels_clamped() instead of put_signed_pixels_clamped(). */
+ vc1_idct_func vc1_inv_trans_8x8_put[4];
void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
--
1.7.2.1
More information about the ffmpeg-devel
mailing list