[FFmpeg-devel] [PATCH] h264: don't touch H264Context->ref_count[] during MB decoding.
Clément Bœsch
ubitux at gmail.com
Thu Oct 4 09:09:22 CEST 2012
On Thu, Oct 04, 2012 at 03:56:12AM +0200, Michael Niedermayer wrote:
> On Wed, Oct 03, 2012 at 04:25:14PM -0700, Ronald S. Bultje wrote:
> > From: "Ronald S. Bultje" <rsbultje at gmail.com>
> >
> > The variable is copied to subsequent threads at the same time, so this
> > may cause wrong ref_count[] values to be copied to subsequent threads.
> >
> > This bug was found using TSAN.
> > ---
> > libavcodec/h264_cabac.c | 41 ++++++++++++++++-------------------------
> > libavcodec/h264_cavlc.c | 33 +++++++++++++--------------------
> > 2 files changed, 29 insertions(+), 45 deletions(-)
> >
> > diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
> > index 7e8947b..97e8128 100644
> > --- a/libavcodec/h264_cabac.c
> > +++ b/libavcodec/h264_cabac.c
> > @@ -2003,11 +2003,6 @@ decode_intra_mb:
> > return 0;
> > }
> >
> > - if(MB_MBAFF){
> > - h->ref_count[0] <<= 1;
> > - h->ref_count[1] <<= 1;
> > - }
> > -
> > fill_decode_caches(h, mb_type);
> >
> > if( IS_INTRA( mb_type ) ) {
>
> i suggest to add a priv/local/tls/whatever_ref_count[2]
> and set/change it as ref_count is before your patch
> that way the shift doesnt need to be done multiple times in the
> more inner loops. This should also keep the code smaller and reduce
> pressure on the code cache
>
Like attached?
--
Clément B.
-------------- next part --------------
From a96e345826b53bf93c6a7c99dc46fd44417679a0 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje at gmail.com>
Date: Thu, 4 Oct 2012 09:01:45 +0200
Subject: [PATCH] lavc/h264: don't touch H264Context->ref_count[] during MB
decoding.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The variable is copied to subsequent threads at the same time, so this
may cause wrong ref_count[] values to be copied to subsequent threads.
This bug was found using TSAN and Helgrind.
Original patch by Ronald, adapted with a local_ref_count by Clément,
following the suggestion of Michael Niedermayer.
Signed-off-by: Clément Bœsch <clement.boesch at smartjog.com>
---
libavcodec/h264_cabac.c | 36 +++++++++++++++++-------------------
libavcodec/h264_cavlc.c | 32 +++++++++++++++-----------------
2 files changed, 32 insertions(+), 36 deletions(-)
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index a064292..def3624 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1864,6 +1864,9 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
int dct8x8_allowed= h->pps.transform_8x8_mode;
int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
const int pixel_shift = h->pixel_shift;
+ unsigned local_ref_count[2];
+
+ memcpy(local_ref_count, h->ref_count, sizeof(local_ref_count));
mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
@@ -2005,8 +2008,8 @@ decode_intra_mb:
}
if(MB_MBAFF){
- h->ref_count[0] <<= 1;
- h->ref_count[1] <<= 1;
+ local_ref_count[0] <<= 1;
+ local_ref_count[1] <<= 1;
}
fill_decode_caches(h, mb_type);
@@ -2077,10 +2080,10 @@ decode_intra_mb:
for( i = 0; i < 4; i++ ) {
if(IS_DIRECT(h->sub_mb_type[i])) continue;
if(IS_DIR(h->sub_mb_type[i], 0, list)){
- if( h->ref_count[list] > 1 ){
+ if (local_ref_count[list] > 1) {
ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
- if(ref[list][i] >= (unsigned)h->ref_count[list]){
- av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
+ if (ref[list][i] >= (unsigned)local_ref_count[list]) {
+ av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], local_ref_count[list]);
return -1;
}
}else
@@ -2163,10 +2166,10 @@ decode_intra_mb:
for(list=0; list<h->list_count; list++){
if(IS_DIR(mb_type, 0, list)){
int ref;
- if(h->ref_count[list] > 1){
+ if (local_ref_count[list] > 1) {
ref= decode_cabac_mb_ref(h, list, 0);
- if(ref >= (unsigned)h->ref_count[list]){
- av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
+ if (ref >= (unsigned)local_ref_count[list]) {
+ av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, local_ref_count[list]);
return -1;
}
}else
@@ -2191,10 +2194,10 @@ decode_intra_mb:
for(i=0; i<2; i++){
if(IS_DIR(mb_type, i, list)){
int ref;
- if(h->ref_count[list] > 1){
+ if (local_ref_count[list] > 1) {
ref= decode_cabac_mb_ref( h, list, 8*i );
- if(ref >= (unsigned)h->ref_count[list]){
- av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
+ if (ref >= (unsigned)local_ref_count[list]) {
+ av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, local_ref_count[list]);
return -1;
}
}else
@@ -2226,10 +2229,10 @@ decode_intra_mb:
for(i=0; i<2; i++){
if(IS_DIR(mb_type, i, list)){ //FIXME optimize
int ref;
- if(h->ref_count[list] > 1){
+ if (local_ref_count[list] > 1) {
ref= decode_cabac_mb_ref( h, list, 4*i );
- if(ref >= (unsigned)h->ref_count[list]){
- av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
+ if (ref >= (unsigned)local_ref_count[list]) {
+ av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, local_ref_count[list]);
return -1;
}
}else
@@ -2402,10 +2405,5 @@ decode_intra_mb:
s->current_picture.f.qscale_table[mb_xy] = s->qscale;
write_back_non_zero_count(h);
- if(MB_MBAFF){
- h->ref_count[0] >>= 1;
- h->ref_count[1] >>= 1;
- }
-
return 0;
}
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index f5d403b..00d83be 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -700,6 +700,9 @@ int ff_h264_decode_mb_cavlc(H264Context *h){
int dct8x8_allowed= h->pps.transform_8x8_mode;
int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
const int pixel_shift = h->pixel_shift;
+ unsigned local_ref_count[2];
+
+ memcpy(local_ref_count, h->ref_count, sizeof(local_ref_count));
mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
@@ -786,8 +789,8 @@ decode_intra_mb:
}
if(MB_MBAFF){
- h->ref_count[0] <<= 1;
- h->ref_count[1] <<= 1;
+ local_ref_count[0] <<= 1;
+ local_ref_count[1] <<= 1;
}
fill_decode_neighbors(h, mb_type);
@@ -869,7 +872,7 @@ decode_intra_mb:
}
for(list=0; list<h->list_count; list++){
- int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
+ int ref_count= IS_REF0(mb_type) ? 1 : local_ref_count[list];
for(i=0; i<4; i++){
if(IS_DIRECT(h->sub_mb_type[i])) continue;
if(IS_DIR(h->sub_mb_type[i], 0, list)){
@@ -949,13 +952,13 @@ decode_intra_mb:
for(list=0; list<h->list_count; list++){
unsigned int val;
if(IS_DIR(mb_type, 0, list)){
- if(h->ref_count[list]==1){
+ if(local_ref_count[list]==1){
val= 0;
- }else if(h->ref_count[list]==2){
+ }else if(local_ref_count[list]==2){
val= get_bits1(&s->gb)^1;
}else{
val= get_ue_golomb_31(&s->gb);
- if(val >= h->ref_count[list]){
+ if(val >= local_ref_count[list]){
av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
return -1;
}
@@ -979,13 +982,13 @@ decode_intra_mb:
for(i=0; i<2; i++){
unsigned int val;
if(IS_DIR(mb_type, i, list)){
- if(h->ref_count[list] == 1){
+ if(local_ref_count[list] == 1){
val= 0;
- }else if(h->ref_count[list] == 2){
+ }else if(local_ref_count[list] == 2){
val= get_bits1(&s->gb)^1;
}else{
val= get_ue_golomb_31(&s->gb);
- if(val >= h->ref_count[list]){
+ if(val >= local_ref_count[list]){
av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
return -1;
}
@@ -1016,13 +1019,13 @@ decode_intra_mb:
for(i=0; i<2; i++){
unsigned int val;
if(IS_DIR(mb_type, i, list)){ //FIXME optimize
- if(h->ref_count[list]==1){
+ if(local_ref_count[list]==1){
val= 0;
- }else if(h->ref_count[list]==2){
+ }else if(local_ref_count[list]==2){
val= get_bits1(&s->gb)^1;
}else{
val= get_ue_golomb_31(&s->gb);
- if(val >= h->ref_count[list]){
+ if(val >= local_ref_count[list]){
av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
return -1;
}
@@ -1162,10 +1165,5 @@ decode_intra_mb:
s->current_picture.f.qscale_table[mb_xy] = s->qscale;
write_back_non_zero_count(h);
- if(MB_MBAFF){
- h->ref_count[0] >>= 1;
- h->ref_count[1] >>= 1;
- }
-
return 0;
}
--
1.7.10.4
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 490 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20121004/01d0f070/attachment.asc>
More information about the ffmpeg-devel
mailing list