[FFmpeg-devel] [PATCH 2/2] vp8: make mv_min/max thread-local if using partition threading.
Ronald S. Bultje
rsbultje at gmail.com
Wed Apr 5 23:20:04 EEST 2017
Fixes tsan warnings like this in fate-vp8-test-vector-007:
WARNING: ThreadSanitizer: data race (pid=65909)
Write of size 4 at 0x7d8c0000e088 by thread T1:
#0 vp8_decode_mb_row_sliced vp8.c:2519 (ffmpeg:x86_64+0x100995ede)
[..]
Previous write of size 4 at 0x7d8c0000e088 by thread T2:
#0 vp8_decode_mb_row_sliced vp8.c:2519 (ffmpeg:x86_64+0x100995ede)
---
libavcodec/vp8.c | 53 ++++++++++++++++++++++++++++-------------------------
libavcodec/vp8.h | 19 ++++++++++++-------
2 files changed, 40 insertions(+), 32 deletions(-)
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 9bc1d95..fe7aa23 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -772,7 +772,7 @@ static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si
}
static av_always_inline
-void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
+void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
{
dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
@@ -1031,7 +1031,7 @@ void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
}
static av_always_inline
-void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
+void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
int mb_x, int mb_y, int layout)
{
VP8Macroblock *mb_edge[3] = { 0 /* top */,
@@ -1102,7 +1102,7 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
/* Choose the best mv out of 0,0 and the nearest mv */
- clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
+ clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
(mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
(mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
@@ -1116,11 +1116,11 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
mb->bmv[0] = mb->mv;
}
} else {
- clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
+ clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
mb->bmv[0] = mb->mv;
}
} else {
- clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
+ clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
mb->bmv[0] = mb->mv;
}
} else {
@@ -1166,7 +1166,8 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
}
static av_always_inline
-void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
+ VP8Macroblock *mb, int mb_x, int mb_y,
uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
{
VP56RangeCoder *c = &s->c;
@@ -1230,7 +1231,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
if (is_vp7)
vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
else
- vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
+ vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
} else {
// intra MB, 16.1
mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
@@ -2205,8 +2206,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
VP8Context *s = avctx->priv_data;
int mb_x, mb_y;
- s->mv_min.y = -MARGIN;
- s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
+ s->mv_bounds.mv_min.y = -MARGIN;
+ s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
VP8Macroblock *mb = s->macroblocks_base +
((s->mb_width + 1) * (mb_y + 1) + 1);
@@ -2214,20 +2215,20 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
- s->mv_min.x = -MARGIN;
- s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
+ s->mv_bounds.mv_min.x = -MARGIN;
+ s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
if (mb_y == 0)
AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
DC_PRED * 0x01010101);
- decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
+ decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
prev_frame && prev_frame->seg_map ?
prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
- s->mv_min.x -= 64;
- s->mv_max.x -= 64;
+ s->mv_bounds.mv_min.x -= 64;
+ s->mv_bounds.mv_max.x -= 64;
}
- s->mv_min.y -= 64;
- s->mv_max.y -= 64;
+ s->mv_bounds.mv_min.y -= 64;
+ s->mv_bounds.mv_max.y -= 64;
}
}
@@ -2325,8 +2326,8 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void
if (!is_vp7 || mb_y == 0)
memset(td->left_nnz, 0, sizeof(td->left_nnz));
- s->mv_min.x = -MARGIN;
- s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
+ td->mv_bounds.mv_min.x = -MARGIN;
+ td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
if (c->end <= c->buffer && c->bits >= 0)
@@ -2350,7 +2351,7 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void
dst[2] - dst[1], 2);
if (!s->mb_layout)
- decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
+ decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
prev_frame && prev_frame->seg_map ?
prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
@@ -2397,8 +2398,8 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void
dst[0] += 16;
dst[1] += 8;
dst[2] += 8;
- s->mv_min.x -= 64;
- s->mv_max.x -= 64;
+ td->mv_bounds.mv_min.x -= 64;
+ td->mv_bounds.mv_max.x -= 64;
if (mb_x == s->mb_width + 1) {
update_pos(td, mb_y, s->mb_width + 3);
@@ -2504,6 +2505,8 @@ int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
int ret;
td->thread_nr = threadnr;
+ td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
+ td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
atomic_store(&td->thread_mb_pos, mb_y << 16);
ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
@@ -2515,8 +2518,8 @@ int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
s->filter_mb_row(avctx, tdata, jobnr, threadnr);
update_pos(td, mb_y, INT_MAX & 0xFFFF);
- s->mv_min.y -= 64;
- s->mv_max.y -= 64;
+ td->mv_bounds.mv_min.y -= 64 * num_jobs;
+ td->mv_bounds.mv_max.y -= 64 * num_jobs;
if (avctx->active_thread_type == FF_THREAD_FRAME)
ff_thread_report_progress(&curframe->tf, mb_y, 0);
@@ -2662,8 +2665,8 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
s->num_jobs = num_jobs;
s->curframe = curframe;
s->prev_frame = prev_frame;
- s->mv_min.y = -MARGIN;
- s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
+ s->mv_bounds.mv_min.y = -MARGIN;
+ s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
for (i = 0; i < MAX_THREADS; i++) {
VP8ThreadData *td = &s->thread_data[i];
atomic_init(&td->thread_mb_pos, 0);
diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
index d7e7680..8263997 100644
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@@ -93,6 +93,16 @@ typedef struct VP8Macroblock {
VP56mv bmv[16];
} VP8Macroblock;
+typedef struct VP8intmv {
+ int x;
+ int y;
+} VP8intmv;
+
+typedef struct VP8mvbounds {
+ VP8intmv mv_min;
+ VP8intmv mv_max;
+} VP8mvbounds;
+
typedef struct VP8ThreadData {
DECLARE_ALIGNED(16, int16_t, block)[6][4][16];
DECLARE_ALIGNED(16, int16_t, block_dc)[16];
@@ -122,6 +132,7 @@ typedef struct VP8ThreadData {
#define EDGE_EMU_LINESIZE 32
DECLARE_ALIGNED(16, uint8_t, edge_emu_buffer)[21 * EDGE_EMU_LINESIZE];
VP8FilterStrength *filter_strength;
+ VP8mvbounds mv_bounds;
} VP8ThreadData;
typedef struct VP8Frame {
@@ -129,11 +140,6 @@ typedef struct VP8Frame {
AVBufferRef *seg_map;
} VP8Frame;
-typedef struct VP8intmv {
- int x;
- int y;
-} VP8intmv;
-
#define MAX_THREADS 8
typedef struct VP8Context {
VP8ThreadData *thread_data;
@@ -152,8 +158,7 @@ typedef struct VP8Context {
uint8_t deblock_filter;
uint8_t mbskip_enabled;
uint8_t profile;
- VP8intmv mv_min;
- VP8intmv mv_max;
+ VP8mvbounds mv_bounds;
int8_t sign_bias[4]; ///< one state [0, 1] per ref frame type
int ref_count[3];
--
2.8.1
More information about the ffmpeg-devel
mailing list