[FFmpeg-devel] [PATCH, RFC] lavc/phtread_frame: update context in child thread in multi-thread mode

Wed Jun 26 11:28:40 EEST 2019

> -----Original Message-----
> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces at ffmpeg.org] On Behalf
> Of Hendrik Leppkes
> Sent: Wednesday, June 26, 2019 15:12
> To: FFmpeg development discussions and patches <ffmpeg-
> devel at ffmpeg.org>
> Subject: Re: [FFmpeg-devel] [PATCH, RFC] lavc/phtread_frame: update
> context in child thread in multi-thread mode
> 
> On Wed, Jun 26, 2019 at 9:06 AM Linjie Fu <linjie.fu at intel.com> wrote:
> >
> > Currently in ff_thread_decode_frame, context is updated from child
> thread
> > to main thread, and main thread releases the context in avcodec_close()
> > when decode finishes.
> >
> > However, when resolution/format change in vp9, ff_get_format was called,
> > and hwaccel_uninit() and hwaccel_init will be used to destroy and re-
> create
> > the context. Due to the async between main-thread and child-thread,
> > main-thread updated its context from child earlier than the context was
> > refreshed in child-thread. And it will lead to:
> >     1. memory leak in child-thread.
> >     2. double free in main-thread while calling avcodec_close().
> >
> > Can be reproduced with a resolution change case in vp9, and use -vframes
> > to terminate the decode between the dynamic resolution change frames:
> >
> > ffmpeg -hwaccel vaapi -hwaccel_device /dev/dri/renderD128 -v
> > verbose -i ./test2360_1672_4980.ivf -pix_fmt p010le -f rawvideo -vsync
> > passthrough -vframes 6 -y out.yuv
> >
> > Move update_context_from_thread from ff_thread_decode_frame(main
> thread)
> > to frame_worker_thread(child thread), update the context in child thread
> > right after the context was updated to avoid the async issue.
> >
> > Signed-off-by: Linjie Fu <linjie.fu at intel.com>
> > ---
> > Request for Comments, not quite familiar with the constraints.
> > Thanks in advance.
> >
> >  libavcodec/internal.h      |   5 ++
> >  libavcodec/pthread_frame.c | 164 ++++++++++++++++++++++++----------
> -----------
> >  2 files changed, 91 insertions(+), 78 deletions(-)
> >
> > diff --git a/libavcodec/internal.h b/libavcodec/internal.h
> > index 5096ffa..9f4ed0b 100644
> > --- a/libavcodec/internal.h
> > +++ b/libavcodec/internal.h
> > @@ -162,6 +162,11 @@ typedef struct AVCodecInternal {
> >
> >      void *thread_ctx;
> >
> > +    /**
> > +     * Main thread AVCodecContext pointer
> > +     */
> > +    void *main_thread;
> > +
> >      DecodeSimpleContext ds;
> >      DecodeFilterContext filter;
> >
> > diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
> > index 36ac0ac..be42e98 100644
> > --- a/libavcodec/pthread_frame.c
> > +++ b/libavcodec/pthread_frame.c
> > @@ -159,82 +159,6 @@ static void async_unlock(FrameThreadContext
> *fctx)
> >  }
> >
> >  /**
> > - * Codec worker thread.
> > - *
> > - * Automatically calls ff_thread_finish_setup() if the codec does
> > - * not provide an update_thread_context method, or if the codec returns
> > - * before calling it.
> > - */
> > -static attribute_align_arg void *frame_worker_thread(void *arg)
> > -{
> > -    PerThreadContext *p = arg;
> > -    AVCodecContext *avctx = p->avctx;
> > -    const AVCodec *codec = avctx->codec;
> > -
> > -    pthread_mutex_lock(&p->mutex);
> > -    while (1) {
> > -        while (atomic_load(&p->state) == STATE_INPUT_READY && !p->die)
> > -            pthread_cond_wait(&p->input_cond, &p->mutex);
> > -
> > -        if (p->die) break;
> > -
> > -        if (!codec->update_thread_context &&
> THREAD_SAFE_CALLBACKS(avctx))
> > -            ff_thread_finish_setup(avctx);
> > -
> > -        /* If a decoder supports hwaccel, then it must call ff_get_format().
> > -         * Since that call must happen before ff_thread_finish_setup(), the
> > -         * decoder is required to implement update_thread_context() and call
> > -         * ff_thread_finish_setup() manually. Therefore the above
> > -         * ff_thread_finish_setup() call did not happen and hwaccel_serializing
> > -         * cannot be true here. */
> > -        av_assert0(!p->hwaccel_serializing);
> > -
> > -        /* if the previous thread uses hwaccel then we take the lock to ensure
> > -         * the threads don't run concurrently */
> > -        if (avctx->hwaccel) {
> > -            pthread_mutex_lock(&p->parent->hwaccel_mutex);
> > -            p->hwaccel_serializing = 1;
> > -        }
> > -
> > -        av_frame_unref(p->frame);
> > -        p->got_frame = 0;
> > -        p->result = codec->decode(avctx, p->frame, &p->got_frame, &p-
> >avpkt);
> > -
> > -        if ((p->result < 0 || !p->got_frame) && p->frame->buf[0]) {
> > -            if (avctx->internal->allocate_progress)
> > -                av_log(avctx, AV_LOG_ERROR, "A frame threaded decoder did
> not "
> > -                       "free the frame on failure. This is a bug, please report it.\n");
> > -            av_frame_unref(p->frame);
> > -        }
> > -
> > -        if (atomic_load(&p->state) == STATE_SETTING_UP)
> > -            ff_thread_finish_setup(avctx);
> > -
> > -        if (p->hwaccel_serializing) {
> > -            p->hwaccel_serializing = 0;
> > -            pthread_mutex_unlock(&p->parent->hwaccel_mutex);
> > -        }
> > -
> > -        if (p->async_serializing) {
> > -            p->async_serializing = 0;
> > -
> > -            async_unlock(p->parent);
> > -        }
> > -
> > -        pthread_mutex_lock(&p->progress_mutex);
> > -
> > -        atomic_store(&p->state, STATE_INPUT_READY);
> > -
> > -        pthread_cond_broadcast(&p->progress_cond);
> > -        pthread_cond_signal(&p->output_cond);
> > -        pthread_mutex_unlock(&p->progress_mutex);
> > -    }
> > -    pthread_mutex_unlock(&p->mutex);
> > -
> > -    return NULL;
> > -}
> > -
> > -/**
> >   * Update the next thread's AVCodecContext with values from the
> reference thread's context.
> >   *
> >   * @param dst The destination context.
> > @@ -313,6 +237,89 @@ FF_ENABLE_DEPRECATION_WARNINGS
> >      return err;
> >  }
> >
> > +
> > +
> > +/**
> > + * Codec worker thread.
> > + *
> > + * Automatically calls ff_thread_finish_setup() if the codec does
> > + * not provide an update_thread_context method, or if the codec returns
> > + * before calling it.
> > + */
> > +static attribute_align_arg void *frame_worker_thread(void *arg)
> > +{
> > +    PerThreadContext *p = arg;
> > +    AVCodecContext *avctx = p->avctx;
> > +    const AVCodec *codec = avctx->codec;
> > +
> > +    pthread_mutex_lock(&p->mutex);
> > +    while (1) {
> > +        while (atomic_load(&p->state) == STATE_INPUT_READY && !p->die)
> > +            pthread_cond_wait(&p->input_cond, &p->mutex);
> > +
> > +        if (p->die) break;
> > +
> > +        if (!codec->update_thread_context &&
> THREAD_SAFE_CALLBACKS(avctx))
> > +            ff_thread_finish_setup(avctx);
> > +
> > +        /* If a decoder supports hwaccel, then it must call ff_get_format().
> > +         * Since that call must happen before ff_thread_finish_setup(), the
> > +         * decoder is required to implement update_thread_context() and
> call
> > +         * ff_thread_finish_setup() manually. Therefore the above
> > +         * ff_thread_finish_setup() call did not happen and
> hwaccel_serializing
> > +         * cannot be true here. */
> > +        av_assert0(!p->hwaccel_serializing);
> > +
> > +        /* if the previous thread uses hwaccel then we take the lock to
> ensure
> > +         * the threads don't run concurrently */
> > +        if (avctx->hwaccel) {
> > +            pthread_mutex_lock(&p->parent->hwaccel_mutex);
> > +            p->hwaccel_serializing = 1;
> > +        }
> > +
> > +        av_frame_unref(p->frame);
> > +        p->got_frame = 0;
> > +        p->result = codec->decode(avctx, p->frame, &p->got_frame, &p-
> >avpkt);
> > +
> > +        if (p->avctx->internal->main_thread)
> > +            update_context_from_thread((AVCodecContext *)p->avctx-
> >internal->main_thread,
> > +                                                                            p->avctx, 1);
> > +
> > +
> > +        if ((p->result < 0 || !p->got_frame) && p->frame->buf[0]) {
> > +            if (avctx->internal->allocate_progress)
> > +                av_log(avctx, AV_LOG_ERROR, "A frame threaded decoder did
> not "
> > +                       "free the frame on failure. This is a bug, please report it.\n");
> > +            av_frame_unref(p->frame);
> > +        }
> > +
> > +        if (atomic_load(&p->state) == STATE_SETTING_UP)
> > +            ff_thread_finish_setup(avctx);
> > +
> > +        if (p->hwaccel_serializing) {
> > +            p->hwaccel_serializing = 0;
> > +            pthread_mutex_unlock(&p->parent->hwaccel_mutex);
> > +        }
> > +
> > +        if (p->async_serializing) {
> > +            p->async_serializing = 0;
> > +
> > +            async_unlock(p->parent);
> > +        }
> > +
> > +        pthread_mutex_lock(&p->progress_mutex);
> > +
> > +        atomic_store(&p->state, STATE_INPUT_READY);
> > +
> > +        pthread_cond_broadcast(&p->progress_cond);
> > +        pthread_cond_signal(&p->output_cond);
> > +        pthread_mutex_unlock(&p->progress_mutex);
> > +    }
> > +    pthread_mutex_unlock(&p->mutex);
> > +:
> > +    return NULL;
> > +}
> > +
> 
> Please don't move the entire function around, as it makes it
> impossible to spot any differences in the function itself. Instead,
> use a forward declaration of whichever function you want to call, so
> the diff is more clear.
> 
Thanks, updated to make it easier for review.