[FFmpeg-devel] [PATCH 09/10] diracdec: run the final decoding stage/idwt for every plane in parallel
Rostislav Pehlivanov
atomnuker at gmail.com
Thu Jun 30 16:19:10 CEST 2016
On 23 June 2016 at 18:07, Rostislav Pehlivanov <rpehlivanov at ob-encoder.com>
wrote:
> 27% performance increase for a 12bit 4k file.
>
> Signed-off-by: Rostislav Pehlivanov <rpehlivanov at obe.tv>
> ---
> libavcodec/diracdec.c | 152
> ++++++++++++++++++++++++++------------------------
> 1 file changed, 80 insertions(+), 72 deletions(-)
>
> diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c
> index 63eb4d1..ec45132 100644
> --- a/libavcodec/diracdec.c
> +++ b/libavcodec/diracdec.c
> @@ -1804,99 +1804,107 @@ static int interpolate_refplane(DiracContext *s,
> DiracFrame *ref, int plane, int
> return 0;
> }
>
> -/**
> - * Dirac Specification ->
> - * 13.0 Transform data syntax. transform_data()
> - */
> -static int dirac_decode_frame_internal(DiracContext *s)
> +static int decode_plane(AVCodecContext *avctx, void *arg, int jobnr, int
> thread)
> {
> DWTContext d;
> - int y, i, comp, dsty;
> - int ret;
> + int i, y, ret, dsty;
> + DiracContext *s = avctx->priv_data;
> + Plane *p = &s->plane[jobnr];
> + uint8_t *frame = s->current_picture->avframe->data[jobnr];
>
> - if (s->low_delay) {
> - /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
> - for (comp = 0; comp < 3; comp++) {
> - Plane *p = &s->plane[comp];
> - memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
> - }
> - if (!s->zero_res) {
> - if ((ret = decode_lowdelay(s)) < 0)
> - return ret;
> - }
> + /* FIXME: small resolutions */
> + for (i = 0; i < 4; i++)
> + s->edge_emu_buffer[i] = s->edge_emu_buffer_base +
> i*FFALIGN(p->width, 16);
> +
> + if (!s->zero_res && !s->low_delay)
> + {
> + memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
> + decode_component(s, jobnr); /* [DIRAC_STD] 13.4.1
> core_transform_data() */
> }
> + ret = ff_spatial_idwt_init(&d, &p->idwt, s->wavelet_idx+2,
> + s->wavelet_depth, s->bit_depth);
> + if (ret < 0)
> + return ret;
>
> - for (comp = 0; comp < 3; comp++) {
> - Plane *p = &s->plane[comp];
> - uint8_t *frame = s->current_picture->avframe->data[comp];
> + if (!s->num_refs) { /* intra */
> + for (y = 0; y < p->height; y += 16) {
> + int idx = (s->bit_depth - 8) >> 1;
> + ff_spatial_idwt_slice2(&d, y+16); /* decode */
> + s->diracdsp.put_signed_rect_clamped[idx](frame + y*p->stride,
> + p->stride,
> + p->idwt.buf +
> y*p->idwt.stride,
> + p->idwt.stride,
> p->width, 16);
> + }
> + } else { /* inter */
> + int rowheight = p->ybsep*p->stride;
>
> - /* FIXME: small resolutions */
> - for (i = 0; i < 4; i++)
> - s->edge_emu_buffer[i] = s->edge_emu_buffer_base +
> i*FFALIGN(p->width, 16);
> + select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
>
> - if (!s->zero_res && !s->low_delay)
> - {
> - memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
> - decode_component(s, comp); /* [DIRAC_STD] 13.4.1
> core_transform_data() */
> + for (i = 0; i < s->num_refs; i++) {
> + int ret = interpolate_refplane(s, s->ref_pics[i], jobnr,
> p->width, p->height);
> + if (ret < 0)
> + return ret;
> }
> - ret = ff_spatial_idwt_init(&d, &p->idwt, s->wavelet_idx+2,
> - s->wavelet_depth, s->bit_depth);
> - if (ret < 0)
> - return ret;
>
> - if (!s->num_refs) { /* intra */
> - for (y = 0; y < p->height; y += 16) {
> - int idx = (s->bit_depth - 8) >> 1;
> - ff_spatial_idwt_slice2(&d, y+16); /* decode */
> - s->diracdsp.put_signed_rect_clamped[idx](frame +
> y*p->stride,
> - p->stride,
> - p->idwt.buf +
> y*p->idwt.stride,
> - p->idwt.stride,
> p->width, 16);
> - }
> - } else { /* inter */
> - int rowheight = p->ybsep*p->stride;
> + memset(s->mctmp, 0, 4*p->yoffset*p->stride);
>
> - select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
> + dsty = -p->yoffset;
> + for (y = 0; y < s->blheight; y++) {
> + int h = 0,
> + start = FFMAX(dsty, 0);
> + uint16_t *mctmp = s->mctmp + y*rowheight;
> + DiracBlock *blocks = s->blmotion + y*s->blwidth;
>
> - for (i = 0; i < s->num_refs; i++) {
> - int ret = interpolate_refplane(s, s->ref_pics[i], comp,
> p->width, p->height);
> - if (ret < 0)
> - return ret;
> - }
> + init_obmc_weights(s, p, y);
>
> - memset(s->mctmp, 0, 4*p->yoffset*p->stride);
> + if (y == s->blheight-1 || start+p->ybsep > p->height)
> + h = p->height - start;
> + else
> + h = p->ybsep - (start - dsty);
> + if (h < 0)
> + break;
>
> - dsty = -p->yoffset;
> - for (y = 0; y < s->blheight; y++) {
> - int h = 0,
> - start = FFMAX(dsty, 0);
> - uint16_t *mctmp = s->mctmp + y*rowheight;
> - DiracBlock *blocks = s->blmotion + y*s->blwidth;
> + memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
> + mc_row(s, blocks, mctmp, jobnr, dsty);
>
> - init_obmc_weights(s, p, y);
> + mctmp += (start - dsty)*p->stride + p->xoffset;
> + ff_spatial_idwt_slice2(&d, start + h); /* decode */
> + /* NOTE: add_rect_clamped hasn't been templated hence the
> shifts.
> + * idwt.stride is passed as pixels, not in bytes as in the
> rest of the decoder */
> + s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp,
> p->stride,
> + (int16_t*)(p->idwt.buf) +
> start*(p->idwt.stride >> 1), (p->idwt.stride >> 1), p->width, h);
>
> - if (y == s->blheight-1 || start+p->ybsep > p->height)
> - h = p->height - start;
> - else
> - h = p->ybsep - (start - dsty);
> - if (h < 0)
> - break;
> + dsty += p->ybsep;
> + }
> + }
>
> - memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
> - mc_row(s, blocks, mctmp, comp, dsty);
> + return 0;
> +}
>
> - mctmp += (start - dsty)*p->stride + p->xoffset;
> - ff_spatial_idwt_slice2(&d, start + h); /* decode */
> - /* NOTE: add_rect_clamped hasn't been templated hence the
> shifts.
> - * idwt.stride is passed as pixels, not in bytes as in
> the rest of the decoder */
> - s->diracdsp.add_rect_clamped(frame + start*p->stride,
> mctmp, p->stride,
> - (int16_t*)(p->idwt.buf) +
> start*(p->idwt.stride >> 1), (p->idwt.stride >> 1), p->width, h);
> +/**
> + * Dirac Specification ->
> + * 13.0 Transform data syntax. transform_data()
> + */
> +static int dirac_decode_frame_internal(DiracContext *s)
> +{
> + int ret, comp, res[3];
>
> - dsty += p->ybsep;
> - }
> + if (s->low_delay) {
> + /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
> + for (comp = 0; comp < 3; comp++) {
> + Plane *p = &s->plane[comp];
> + memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
> + }
> + if (!s->zero_res) {
> + if ((ret = decode_lowdelay(s)) < 0)
> + return ret;
> }
> }
>
> + s->avctx->execute2(s->avctx, decode_plane, NULL, res, 3);
> + for (comp = 0; comp < 3; comp++)
> + if (res[comp])
> + return res[comp];
>
> return 0;
> }
> --
> 2.8.1.369.geae769a
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
Disregard this patch, it breaks regular Dirac files.
More information about the ffmpeg-devel
mailing list