[FFmpeg-cvslog] r26329 - in trunk/libavcodec: h264.c h264idct.c
darkshikari
subversion
Fri Jan 14 20:05:00 CET 2011
Author: darkshikari
Date: Fri Jan 14 20:04:59 2011
New Revision: 26329
Log:
H.264: eliminate non-transposed scantable support.
It was an ugly hack to begin with and didn't give any performance.
NOTE: this patch opens up some future simplifications to be made (such as
removing some of the scantables from H264Context) but doesn't take advantage
of them yet.
Modified:
trunk/libavcodec/h264.c
trunk/libavcodec/h264idct.c
Modified: trunk/libavcodec/h264.c
==============================================================================
--- trunk/libavcodec/h264.c Fri Jan 14 17:36:57 2011 (r26328)
+++ trunk/libavcodec/h264.c Fri Jan 14 20:04:59 2011 (r26329)
@@ -687,7 +687,6 @@ static void free_tables(H264Context *h){
static void init_dequant8_coeff_table(H264Context *h){
int i,q,x;
- const int transpose = (h->h264dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
h->dequant8_coeff[0] = h->dequant8_buffer[0];
h->dequant8_coeff[1] = h->dequant8_buffer[1];
@@ -701,7 +700,7 @@ static void init_dequant8_coeff_table(H2
int shift = div6[q];
int idx = rem6[q];
for(x=0; x<64; x++)
- h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
+ h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =
((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
h->pps.scaling_matrix8[i][x]) << shift;
}
@@ -710,7 +709,6 @@ static void init_dequant8_coeff_table(H2
static void init_dequant4_coeff_table(H264Context *h){
int i,j,q,x;
- const int transpose = (h->h264dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
for(i=0; i<6; i++ ){
h->dequant4_coeff[i] = h->dequant4_buffer[i];
for(j=0; j<i; j++){
@@ -726,7 +724,7 @@ static void init_dequant4_coeff_table(H2
int shift = div6[q] + 2;
int idx = rem6[q];
for(x=0; x<16; x++)
- h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
+ h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =
((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
h->pps.scaling_matrix4[i][x]) << shift;
}
@@ -1597,31 +1595,19 @@ static int init_poc(H264Context *h){
*/
static void init_scan_tables(H264Context *h){
int i;
- if(h->h264dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
- memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
- memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
- }else{
- for(i=0; i<16; i++){
+ for(i=0; i<16; i++){
#define T(x) (x>>2) | ((x<<2) & 0xF)
- h->zigzag_scan[i] = T(zigzag_scan[i]);
- h-> field_scan[i] = T( field_scan[i]);
+ h->zigzag_scan[i] = T(zigzag_scan[i]);
+ h-> field_scan[i] = T( field_scan[i]);
#undef T
- }
}
- if(h->h264dsp.h264_idct8_add == ff_h264_idct8_add_c){
- memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
- memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
- memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
- memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
- }else{
- for(i=0; i<64; i++){
+ for(i=0; i<64; i++){
#define T(x) (x>>3) | ((x&7)<<3)
- h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
- h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
- h->field_scan8x8[i] = T(field_scan8x8[i]);
- h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
+ h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
+ h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
+ h->field_scan8x8[i] = T(field_scan8x8[i]);
+ h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
#undef T
- }
}
if(h->sps.transform_bypass){ //FIXME same ugly
h->zigzag_scan_q0 = zigzag_scan;
Modified: trunk/libavcodec/h264idct.c
==============================================================================
--- trunk/libavcodec/h264idct.c Fri Jan 14 17:36:57 2011 (r26328)
+++ trunk/libavcodec/h264idct.c Fri Jan 14 20:04:59 2011 (r26329)
@@ -34,23 +34,23 @@ static av_always_inline void idct_intern
block[0] += 1<<(shift-1);
for(i=0; i<4; i++){
- const int z0= block[0 + block_stride*i] + block[2 + block_stride*i];
- const int z1= block[0 + block_stride*i] - block[2 + block_stride*i];
- const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i];
- const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1);
-
- block[0 + block_stride*i]= z0 + z3;
- block[1 + block_stride*i]= z1 + z2;
- block[2 + block_stride*i]= z1 - z2;
- block[3 + block_stride*i]= z0 - z3;
- }
-
- for(i=0; i<4; i++){
const int z0= block[i + block_stride*0] + block[i + block_stride*2];
const int z1= block[i + block_stride*0] - block[i + block_stride*2];
const int z2= (block[i + block_stride*1]>>1) - block[i + block_stride*3];
const int z3= block[i + block_stride*1] + (block[i + block_stride*3]>>1);
+ block[i + block_stride*0]= z0 + z3;
+ block[i + block_stride*1]= z1 + z2;
+ block[i + block_stride*2]= z1 - z2;
+ block[i + block_stride*3]= z0 - z3;
+ }
+
+ for(i=0; i<4; i++){
+ const int z0= block[0 + block_stride*i] + block[2 + block_stride*i];
+ const int z1= block[0 + block_stride*i] - block[2 + block_stride*i];
+ const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i];
+ const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1);
+
dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ];
dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ];
dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ];
@@ -78,51 +78,51 @@ void ff_h264_idct8_add_c(uint8_t *dst, D
for( i = 0; i < 8; i++ )
{
- const int a0 = block[0+i*8] + block[4+i*8];
- const int a2 = block[0+i*8] - block[4+i*8];
- const int a4 = (block[2+i*8]>>1) - block[6+i*8];
- const int a6 = (block[6+i*8]>>1) + block[2+i*8];
+ const int a0 = block[i+0*8] + block[i+4*8];
+ const int a2 = block[i+0*8] - block[i+4*8];
+ const int a4 = (block[i+2*8]>>1) - block[i+6*8];
+ const int a6 = (block[i+6*8]>>1) + block[i+2*8];
const int b0 = a0 + a6;
const int b2 = a2 + a4;
const int b4 = a2 - a4;
const int b6 = a0 - a6;
- const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1);
- const int a3 = block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1);
- const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1);
- const int a7 = block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1);
+ const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1);
+ const int a3 = block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1);
+ const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1);
+ const int a7 = block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1);
const int b1 = (a7>>2) + a1;
const int b3 = a3 + (a5>>2);
const int b5 = (a3>>2) - a5;
const int b7 = a7 - (a1>>2);
- block[0+i*8] = b0 + b7;
- block[7+i*8] = b0 - b7;
- block[1+i*8] = b2 + b5;
- block[6+i*8] = b2 - b5;
- block[2+i*8] = b4 + b3;
- block[5+i*8] = b4 - b3;
- block[3+i*8] = b6 + b1;
- block[4+i*8] = b6 - b1;
+ block[i+0*8] = b0 + b7;
+ block[i+7*8] = b0 - b7;
+ block[i+1*8] = b2 + b5;
+ block[i+6*8] = b2 - b5;
+ block[i+2*8] = b4 + b3;
+ block[i+5*8] = b4 - b3;
+ block[i+3*8] = b6 + b1;
+ block[i+4*8] = b6 - b1;
}
for( i = 0; i < 8; i++ )
{
- const int a0 = block[i+0*8] + block[i+4*8];
- const int a2 = block[i+0*8] - block[i+4*8];
- const int a4 = (block[i+2*8]>>1) - block[i+6*8];
- const int a6 = (block[i+6*8]>>1) + block[i+2*8];
+ const int a0 = block[0+i*8] + block[4+i*8];
+ const int a2 = block[0+i*8] - block[4+i*8];
+ const int a4 = (block[2+i*8]>>1) - block[6+i*8];
+ const int a6 = (block[6+i*8]>>1) + block[2+i*8];
const int b0 = a0 + a6;
const int b2 = a2 + a4;
const int b4 = a2 - a4;
const int b6 = a0 - a6;
- const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1);
- const int a3 = block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1);
- const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1);
- const int a7 = block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1);
+ const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1);
+ const int a3 = block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1);
+ const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1);
+ const int a7 = block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1);
const int b1 = (a7>>2) + a1;
const int b3 = a3 + (a5>>2);
More information about the ffmpeg-cvslog
mailing list