[FFmpeg-cvslog] r22048 - in trunk/libavcodec: h264.c h264.h
michael
subversion
Thu Feb 25 05:11:34 CET 2010
Author: michael
Date: Thu Feb 25 05:11:33 2010
New Revision: 22048
Log:
Cut the size of mvd_table by yet another factor of 2.
The code read/write code itself was 1 cycle faster, overall its
likely more due to cache effects
Modified:
trunk/libavcodec/h264.c
trunk/libavcodec/h264.h
Modified: trunk/libavcodec/h264.c
==============================================================================
--- trunk/libavcodec/h264.c Thu Feb 25 03:42:25 2010 (r22047)
+++ trunk/libavcodec/h264.c Thu Feb 25 05:11:33 2010 (r22048)
@@ -757,8 +757,8 @@ int ff_h264_alloc_tables(H264Context *h)
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
- FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 32*big_mb_num * sizeof(uint8_t), fail);
- FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 32*big_mb_num * sizeof(uint8_t), fail);
+ FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*big_mb_num * sizeof(uint8_t), fail);
+ FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*big_mb_num * sizeof(uint8_t), fail);
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 32*big_mb_num * sizeof(uint8_t) , fail);
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
@@ -775,7 +775,7 @@ int ff_h264_alloc_tables(H264Context *h)
const int b8_xy= 2*x + 2*y*h->b8_stride;
h->mb2b_xy [mb_xy]= b_xy;
- h->mb2br_xy[mb_xy]= FMO ? b_xy : (b_xy % (8*h->b_stride));
+ h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
h->mb2b8_xy[mb_xy]= b8_xy;
}
}
Modified: trunk/libavcodec/h264.h
==============================================================================
--- trunk/libavcodec/h264.h Thu Feb 25 03:42:25 2010 (r22047)
+++ trunk/libavcodec/h264.h Thu Feb 25 05:11:33 2010 (r22048)
@@ -1070,23 +1070,23 @@ static void fill_decode_caches(H264Conte
if( CABAC ) {
/* XXX beurk, Load mvd */
if(USES_LIST(top_type, list)){
- const int b_xy= h->mb2br_xy[top_xy] + 3*h->b_stride;
+ const int b_xy= h->mb2br_xy[top_xy];
AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]);
}else{
AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]);
}
if(USES_LIST(left_type[0], list)){
- const int b_xy= h->mb2br_xy[left_xy[0]] + 3;
- AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy + h->b_stride*left_block[0]]);
- AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy + h->b_stride*left_block[1]]);
+ const int b_xy= h->mb2br_xy[left_xy[0]] + 6;
+ AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy - left_block[0]]);
+ AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy - left_block[1]]);
}else{
AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 0*8]);
AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]);
}
if(USES_LIST(left_type[1], list)){
- const int b_xy= h->mb2br_xy[left_xy[1]] + 3;
- AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy + h->b_stride*left_block[2]]);
- AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy + h->b_stride*left_block[3]]);
+ const int b_xy= h->mb2br_xy[left_xy[1]] + 6;
+ AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy - left_block[2]]);
+ AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy - left_block[3]]);
}else{
AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 2*8]);
AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 3*8]);
@@ -1424,13 +1424,15 @@ static inline void write_back_motion(H26
AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y);
}
if( CABAC ) {
- uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? b_xy : h->mb2br_xy[h->mb_xy]];
+ uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]];
uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
if(IS_SKIP(mb_type))
- fill_rectangle(mvd_dst, 4, 4, h->b_stride, 0, 2);
- else
- for(y=0; y<4; y++){
- AV_COPY64(mvd_dst + y*b_stride, mvd_src + 8*y);
+ AV_ZERO128(mvd_dst);
+ else{
+ AV_COPY64(mvd_dst, mvd_src + 8*3);
+ for(y=0; y<3; y++){
+ AV_COPY16(mvd_dst + 3 + 3 - y, mvd_src + 3 + 8*y);
+ }
}
}
More information about the ffmpeg-cvslog
mailing list