[FFmpeg-devel] [PATCH] vp9: do unscaled MC in scaled path if size of this reference matches.
Ronald S. Bultje
rsbultje at gmail.com
Fri Sep 4 21:04:02 CEST 2015
This can happen if we do bidirectional MC, where one reference has the
same size as the current frame, but the other one doesn't.
---
libavcodec/vp9.c | 219 +++++++++++++++++++++++++++++--------------------------
1 file changed, 117 insertions(+), 102 deletions(-)
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 3bd2a0f..25a7b1d 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -2766,7 +2766,108 @@ static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv
intra_recon(ctx, y_off, uv_off, 2);
}
+static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const uint8_t *ref, ptrdiff_t ref_stride,
+ ThreadFrame *ref_frame,
+ ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
+ int bw, int bh, int w, int h, int bytesperpixel)
+{
+ int mx = mv->x, my = mv->y, th;
+
+ y += my >> 3;
+ x += mx >> 3;
+ ref += y * ref_stride + x * bytesperpixel;
+ mx &= 7;
+ my &= 7;
+ // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+ // we use +7 because the last 7 pixels of each sbrow can be changed in
+ // the longest loopfilter of the next sbrow
+ th = (y + bh + 4 * !!my + 7) >> 6;
+ ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+ if (x < !!mx * 3 || y < !!my * 3 ||
+ x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
+ s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+ ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
+ 160, ref_stride,
+ bw + !!mx * 7, bh + !!my * 7,
+ x - !!mx * 3, y - !!my * 3, w, h);
+ ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+ ref_stride = 160;
+ }
+ mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
+}
+
+static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
+ uint8_t *dst_u, uint8_t *dst_v,
+ ptrdiff_t dst_stride,
+ const uint8_t *ref_u, ptrdiff_t src_stride_u,
+ const uint8_t *ref_v, ptrdiff_t src_stride_v,
+ ThreadFrame *ref_frame,
+ ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
+ int bw, int bh, int w, int h, int bytesperpixel)
+{
+ int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
+
+ y += my >> 4;
+ x += mx >> 4;
+ ref_u += y * src_stride_u + x * bytesperpixel;
+ ref_v += y * src_stride_v + x * bytesperpixel;
+ mx &= 15;
+ my &= 15;
+ // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+ // we use +7 because the last 7 pixels of each sbrow can be changed in
+ // the longest loopfilter of the next sbrow
+ th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
+ ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+ if (x < !!mx * 3 || y < !!my * 3 ||
+ x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
+ s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+ ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
+ 160, src_stride_u,
+ bw + !!mx * 7, bh + !!my * 7,
+ x - !!mx * 3, y - !!my * 3, w, h);
+ ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+ mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
+
+ s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+ ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
+ 160, src_stride_v,
+ bw + !!mx * 7, bh + !!my * 7,
+ x - !!mx * 3, y - !!my * 3, w, h);
+ ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+ mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
+ } else {
+ mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
+ mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
+ }
+}
+
+#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
+ px, py, pw, ph, bw, bh, w, h, i) \
+ mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
+ mv, bw, bh, w, h, bytesperpixel)
+#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+ row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
+ mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+ row, col, mv, bw, bh, w, h, bytesperpixel)
+#define SCALED 0
+#define FN(x) x##_8bpp
+#define BYTES_PER_PIXEL 1
+#include "vp9_mc_template.c"
+#undef FN
+#undef BYTES_PER_PIXEL
+#define FN(x) x##_16bpp
+#define BYTES_PER_PIXEL 2
+#include "vp9_mc_template.c"
+#undef mc_luma_dir
+#undef mc_chroma_dir
+#undef FN
+#undef BYTES_PER_PIXEL
+#undef SCALED
+
static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
+ vp9_mc_func (*mc)[2],
uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
ThreadFrame *ref_frame,
@@ -2775,6 +2876,11 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
int bw, int bh, int w, int h, int bytesperpixel,
const uint16_t *scale, const uint8_t *step)
{
+ if (s->frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
+ s->frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
+ mc_luma_unscaled(s, mc, dst, dst_stride, ref, ref_stride, ref_frame,
+ y, x, in_mv, bw, bh, w, h, bytesperpixel);
+ } else {
#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
int mx, my;
int refbw_m1, refbh_m1;
@@ -2811,9 +2917,11 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
ref_stride = 288;
}
smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
+ }
}
static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
+ vp9_mc_func (*mc)[2],
uint8_t *dst_u, uint8_t *dst_v,
ptrdiff_t dst_stride,
const uint8_t *ref_u, ptrdiff_t src_stride_u,
@@ -2824,6 +2932,12 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
int bw, int bh, int w, int h, int bytesperpixel,
const uint16_t *scale, const uint8_t *step)
{
+ if (s->frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
+ s->frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
+ mc_chroma_unscaled(s, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
+ ref_v, src_stride_v, ref_frame,
+ y, x, in_mv, bw, bh, w, h, bytesperpixel);
+ } else {
int mx, my;
int refbw_m1, refbh_m1;
int th;
@@ -2879,16 +2993,17 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
}
+ }
}
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
px, py, pw, ph, bw, bh, w, h, i) \
- mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
+ mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
- mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+ mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define SCALED 1
@@ -2906,106 +3021,6 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
#undef BYTES_PER_PIXEL
#undef SCALED
-static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
- uint8_t *dst, ptrdiff_t dst_stride,
- const uint8_t *ref, ptrdiff_t ref_stride,
- ThreadFrame *ref_frame,
- ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
- int bw, int bh, int w, int h, int bytesperpixel)
-{
- int mx = mv->x, my = mv->y, th;
-
- y += my >> 3;
- x += mx >> 3;
- ref += y * ref_stride + x * bytesperpixel;
- mx &= 7;
- my &= 7;
- // FIXME bilinear filter only needs 0/1 pixels, not 3/4
- // we use +7 because the last 7 pixels of each sbrow can be changed in
- // the longest loopfilter of the next sbrow
- th = (y + bh + 4 * !!my + 7) >> 6;
- ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
- if (x < !!mx * 3 || y < !!my * 3 ||
- x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
- s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
- ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
- 160, ref_stride,
- bw + !!mx * 7, bh + !!my * 7,
- x - !!mx * 3, y - !!my * 3, w, h);
- ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
- ref_stride = 160;
- }
- mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
-}
-
-static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
- uint8_t *dst_u, uint8_t *dst_v,
- ptrdiff_t dst_stride,
- const uint8_t *ref_u, ptrdiff_t src_stride_u,
- const uint8_t *ref_v, ptrdiff_t src_stride_v,
- ThreadFrame *ref_frame,
- ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
- int bw, int bh, int w, int h, int bytesperpixel)
-{
- int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
-
- y += my >> 4;
- x += mx >> 4;
- ref_u += y * src_stride_u + x * bytesperpixel;
- ref_v += y * src_stride_v + x * bytesperpixel;
- mx &= 15;
- my &= 15;
- // FIXME bilinear filter only needs 0/1 pixels, not 3/4
- // we use +7 because the last 7 pixels of each sbrow can be changed in
- // the longest loopfilter of the next sbrow
- th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
- ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
- if (x < !!mx * 3 || y < !!my * 3 ||
- x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
- s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
- ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
- 160, src_stride_u,
- bw + !!mx * 7, bh + !!my * 7,
- x - !!mx * 3, y - !!my * 3, w, h);
- ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
- mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
-
- s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
- ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
- 160, src_stride_v,
- bw + !!mx * 7, bh + !!my * 7,
- x - !!mx * 3, y - !!my * 3, w, h);
- ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
- mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
- } else {
- mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
- mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
- }
-}
-
-#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
- px, py, pw, ph, bw, bh, w, h, i) \
- mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
- mv, bw, bh, w, h, bytesperpixel)
-#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
- row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
- mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
- row, col, mv, bw, bh, w, h, bytesperpixel)
-#define SCALED 0
-#define FN(x) x##_8bpp
-#define BYTES_PER_PIXEL 1
-#include "vp9_mc_template.c"
-#undef FN
-#undef BYTES_PER_PIXEL
-#define FN(x) x##_16bpp
-#define BYTES_PER_PIXEL 2
-#include "vp9_mc_template.c"
-#undef mc_luma_dir_dir
-#undef mc_chroma_dir_dir
-#undef FN
-#undef BYTES_PER_PIXEL
-#undef SCALED
-
static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
{
VP9Context *s = ctx->priv_data;
--
2.1.2
More information about the ffmpeg-devel
mailing list