[FFmpeg-devel] [PATCH 4/4] avcodec/h264dec: apply H.274 film grain

Sat Aug 14 16:07:11 EEST 2021

On 8/14/2021 8:36 AM, Niklas Haas wrote:
> From: Niklas Haas <git at haasn.dev>
> 
> Because we need access to ref frames without film grain applied, we have
> to add an extra AVFrame to H264Picture to avoid messing with the
> original. This requires some amount of overhead to make the reference
> moves work out, but it allows us to benefit from frame multithreading
> for film grain application "for free".
> 
> Unfortunately, this approach requires twice as much RAM to be constantly
> allocated, due to the need for an extra buffer per H264Picture. In
> theory, we could get away with freeing up this memory as soon as it's no
> longer needed, but trying to call ff_thread_release_buffer() from
> output_frame() simply deadlocks the decoder and I haven't figured out
> why. Patches welcome(tm)
> 
> Tested on all three cases of (no fg), (fg present but exported) and (fg
> present and not exported), with and without threading.
> ---
>   libavcodec/h264_picture.c | 24 +++++++++++++++++++-
>   libavcodec/h264_slice.c   | 18 +++++++++++++--
>   libavcodec/h264dec.c      | 48 +++++++++++++++++++++++++++++++--------
>   libavcodec/h264dec.h      |  6 +++++
>   4 files changed, 83 insertions(+), 13 deletions(-)
> 
> diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
> index b79944b794..8284d12158 100644
> --- a/libavcodec/h264_picture.c
> +++ b/libavcodec/h264_picture.c
> @@ -44,7 +44,7 @@
>   
>   void ff_h264_unref_picture(H264Context *h, H264Picture *pic)
>   {
> -    int off = offsetof(H264Picture, tf) + sizeof(pic->tf);
> +    int off = offsetof(H264Picture, tf_grain) + sizeof(pic->tf_grain);
>       int i;
>   
>       if (!pic->f || !pic->f->buf[0])
> @@ -61,6 +61,9 @@ void ff_h264_unref_picture(H264Context *h, H264Picture *pic)
>           av_buffer_unref(&pic->ref_index_buf[i]);
>       }
>   
> +    if (pic->needs_fg)
> +        ff_thread_release_buffer(h->avctx, &pic->tf_grain);
> +
>       memset((uint8_t*)pic + off, 0, sizeof(*pic) - off);
>   }
>   
> @@ -109,6 +112,15 @@ int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src)
>       if (ret < 0)
>           goto fail;
>   
> +    if (src->needs_fg) {
> +        av_assert0(src->tf_grain.f == src->f_grain);
> +        dst->tf_grain.f = dst->f_grain;
> +        ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain);
> +        if (ret < 0)
> +            goto fail;
> +        dst->needs_fg = 1;
> +    }
> +
>       dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf);
>       dst->mb_type_buf      = av_buffer_ref(src->mb_type_buf);
>       dst->pps_buf          = av_buffer_ref(src->pps_buf);
> @@ -160,6 +172,16 @@ int ff_h264_replace_picture(H264Context *h, H264Picture *dst, const H264Picture
>       if (ret < 0)
>           goto fail;
>   
> +    if (src->needs_fg) {
> +        av_assert0(src->tf_grain.f == src->f_grain);
> +        dst->tf_grain.f = dst->f_grain;
> +        ff_thread_release_buffer(h->avctx, &dst->tf_grain);
> +        ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain);
> +        if (ret < 0)
> +            goto fail;
> +        dst->needs_fg = 1;
> +    }
> +
>       ret  = av_buffer_replace(&dst->qscale_table_buf, src->qscale_table_buf);
>       ret |= av_buffer_replace(&dst->mb_type_buf, src->mb_type_buf);
>       ret |= av_buffer_replace(&dst->pps_buf, src->pps_buf);
> diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
> index 65d0259a71..2b85f807df 100644
> --- a/libavcodec/h264_slice.c
> +++ b/libavcodec/h264_slice.c
> @@ -197,6 +197,21 @@ static int alloc_picture(H264Context *h, H264Picture *pic)
>       if (ret < 0)
>           goto fail;
>   
> +    if (h->sei.film_grain_characteristics.present &&
> +        !(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN))
> +    {
> +        // Extra buffer required for film grain synthesis in the decoder
> +        pic->tf_grain.f = pic->f_grain;
> +        pic->f_grain->format = pic->f->format;
> +        pic->f_grain->width = pic->f->width;
> +        pic->f_grain->height = pic->f->height;
> +        ret = ff_thread_get_buffer(h->avctx, &pic->tf_grain, 0);
> +        if (ret < 0)
> +            goto fail;
> +        pic->needs_fg = 1;
> +    }
> +
> +
>       if (h->avctx->hwaccel) {
>           const AVHWAccel *hwaccel = h->avctx->hwaccel;
>           av_assert0(!pic->hwaccel_picture_private);
> @@ -1329,8 +1344,7 @@ static int h264_export_frame_props(H264Context *h)
>       }
>       h->sei.unregistered.nb_buf_ref = 0;
>   
> -    if (h->sei.film_grain_characteristics.present &&
> -        (h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)) {
> +    if (h->sei.film_grain_characteristics.present) {
>           H264SEIFilmGrainCharacteristics *fgc = &h->sei.film_grain_characteristics;
>           AVFilmGrainParams *fgp = av_film_grain_params_create_side_data(out);
>           if (!fgp)
> diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
> index 38f8967265..d5878b8a13 100644
> --- a/libavcodec/h264dec.c
> +++ b/libavcodec/h264dec.c
> @@ -275,9 +275,22 @@ int ff_h264_slice_context_init(H264Context *h, H264SliceContext *sl)
>       return 0;
>   }
>   
> +static int h264_init_pic(H264Picture *pic)
> +{
> +    pic->f = av_frame_alloc();
> +    if (!pic->f)
> +        return AVERROR(ENOMEM);
> +
> +    pic->f_grain = av_frame_alloc();
> +    if (!pic->f_grain)
> +        return AVERROR(ENOMEM);
> +
> +    return 0;
> +}
> +
>   static int h264_init_context(AVCodecContext *avctx, H264Context *h)
>   {
> -    int i;
> +    int i, ret;
>   
>       h->avctx                 = avctx;
>       h->cur_chroma_format_idc = -1;
> @@ -308,18 +321,15 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h)
>       }
>   
>       for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) {
> -        h->DPB[i].f = av_frame_alloc();
> -        if (!h->DPB[i].f)
> -            return AVERROR(ENOMEM);
> +        if ((ret = h264_init_pic(&h->DPB[i])) < 0)
> +            return ret;
>       }
>   
> -    h->cur_pic.f = av_frame_alloc();
> -    if (!h->cur_pic.f)
> -        return AVERROR(ENOMEM);
> +    if ((ret = h264_init_pic(&h->cur_pic)) < 0)
> +        return ret;
>   
> -    h->last_pic_for_ec.f = av_frame_alloc();
> -    if (!h->last_pic_for_ec.f)
> -        return AVERROR(ENOMEM);
> +    if ((ret = h264_init_pic(&h->last_pic_for_ec)) < 0)
> +        return ret;
>   
>       for (i = 0; i < h->nb_slice_ctx; i++)
>           h->slice_ctx[i].h264 = h;
> @@ -826,6 +836,21 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
>       AVFrame *src = srcp->f;
>       int ret;
>   
> +    if (srcp->needs_fg) {
> +        AVFrameSideData *sd = av_frame_get_side_data(src, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
> +        av_assert0(sd);
> +        ret = ff_h274_apply_film_grain(srcp->f_grain, src, &h->h274db,
> +                                       (AVFilmGrainParams *) sd->data);
> +        if (!ret) {
> +            if ((ret = av_frame_copy_props(srcp->f_grain, src)) < 0)

TSAN reports a race here. update_context_from_thread() from the next 
thread is already running at this point as is trying to sync the 
H264Pictures, so you're replacing fields in srcp->f_grain at the same 
time they are being read to be copied onto another thread's context.

Since this threads owns dst, you'd can copy the props directly to it 
instead.

> +                return ret;
> +            src = srcp->f_grain;
> +        } else {
> +            av_log(h->avctx, AV_LOG_WARNING, "Failed synthesizing film "
> +                   "grain, ignoring: %s\n", av_err2str(ret));
> +        }
> +    }
> +
>       ret = av_frame_ref(dst, src);
>       if (ret < 0)
>           return ret;
> @@ -841,6 +866,9 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
>               goto fail;
>       }
>   
> +    if (!(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN))
> +        av_frame_remove_side_data(dst, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
> +
>       return 0;
>   fail:
>       av_frame_unref(dst);
> diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
> index 7c419de051..7d11d81f99 100644
> --- a/libavcodec/h264dec.h
> +++ b/libavcodec/h264dec.h
> @@ -43,6 +43,7 @@
>   #include "h264dsp.h"
>   #include "h264pred.h"
>   #include "h264qpel.h"
> +#include "h274.h"
>   #include "internal.h"
>   #include "mpegutils.h"
>   #include "parser.h"
> @@ -130,6 +131,9 @@ typedef struct H264Picture {
>       AVFrame *f;
>       ThreadFrame tf;
>   
> +    AVFrame *f_grain;   ///< extra buffer for film grain synthesis
> +    ThreadFrame tf_grain;
> +
>       AVBufferRef *qscale_table_buf;
>       int8_t *qscale_table;
>   
> @@ -162,6 +166,7 @@ typedef struct H264Picture {
>       int recovered;          ///< picture at IDR or recovery point + recovery count
>       int invalid_gap;
>       int sei_recovery_frame_cnt;
> +    int needs_fg;           ///< whether picture needs film grain synthesis (see `f_grain`)
>   
>       AVBufferRef *pps_buf;
>       const PPS   *pps;
> @@ -349,6 +354,7 @@ typedef struct H264Context {
>       H264DSPContext h264dsp;
>       H264ChromaContext h264chroma;
>       H264QpelContext h264qpel;
> +    H274FilmGrainDatabase h274db;
>   
>       H264Picture DPB[H264_MAX_PICTURE_COUNT];
>       H264Picture *cur_pic_ptr;
>