[FFmpeg-devel] [PATCH v3 9/9] [GSoC] lavfi/dnn: DNNAsyncExecModule Execution Failure Handling

Mon Aug 9 15:36:41 EEST 2021


> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces at ffmpeg.org> On Behalf Of Fu,
> Ting
> Sent: 2021年8月9日 18:13
> To: FFmpeg development discussions and patches <ffmpeg-
> devel at ffmpeg.org>
> Subject: Re: [FFmpeg-devel] [PATCH v3 9/9] [GSoC] lavfi/dnn:
> DNNAsyncExecModule Execution Failure Handling
> 
> 
> 
> > -----Original Message-----
> > From: ffmpeg-devel <ffmpeg-devel-bounces at ffmpeg.org> On Behalf Of
> > Shubhanshu Saxena
> > Sent: 2021年8月8日 18:56
> > To: ffmpeg-devel at ffmpeg.org
> > Cc: Shubhanshu Saxena <shubhanshu.e01 at gmail.com>
> > Subject: [FFmpeg-devel] [PATCH v3 9/9] [GSoC] lavfi/dnn:
> > DNNAsyncExecModule Execution Failure Handling
> >
> > This commit adds the case handling if the asynchronous execution of a
> > request fails by checking the exit status of the thread when joining
> > before starting another execution. On failure, it does the cleanup as well.
> >
> > Signed-off-by: Shubhanshu Saxena <shubhanshu.e01 at gmail.com>
> > ---
> >  libavfilter/dnn/dnn_backend_common.c | 23 +++++++++++++++++++----
> >  libavfilter/dnn/dnn_backend_tf.c     | 10 +++++++++-
> >  2 files changed, 28 insertions(+), 5 deletions(-)
> >
> > diff --git a/libavfilter/dnn/dnn_backend_common.c
> > b/libavfilter/dnn/dnn_backend_common.c
> > index 470fffa2ae..426683b73d 100644
> > --- a/libavfilter/dnn/dnn_backend_common.c
> > +++ b/libavfilter/dnn/dnn_backend_common.c
> > @@ -23,6 +23,9 @@
> >
> >  #include "dnn_backend_common.h"
> >
> > +#define DNN_ASYNC_SUCCESS (void *)0
> > +#define DNN_ASYNC_FAIL (void *)-1
> > +
> >  int ff_check_exec_params(void *ctx, DNNBackendType backend,
> > DNNFunctionType func_type, DNNExecBaseParams *exec_params)  {
> >      if (!exec_params) {
> > @@ -79,18 +82,25 @@ static void *async_thread_routine(void *args)
> >      DNNAsyncExecModule *async_module = args;
> >      void *request = async_module->args;
> >
> > -    async_module->start_inference(request);
> > +    if (async_module->start_inference(request) != DNN_SUCCESS) {
> > +        return DNN_ASYNC_FAIL;
> > +    }
> >      async_module->callback(request);
> > -    return NULL;
> > +    return DNN_ASYNC_SUCCESS;
> >  }
> >
> >  DNNReturnType ff_dnn_async_module_cleanup(DNNAsyncExecModule
> > *async_module)  {
> > +    void *status = 0;
> >      if (!async_module) {
> >          return DNN_ERROR;
> >      }
> >  #if HAVE_PTHREAD_CANCEL
> > -    pthread_join(async_module->thread_id, NULL);
> > +    pthread_join(async_module->thread_id, &status);
> > +    if (status == DNN_ASYNC_FAIL) {
> > +        av_log(NULL, AV_LOG_ERROR, "Last Inference Failed.\n");
> > +        return DNN_ERROR;
> > +    }
> >  #endif
> >      async_module->start_inference = NULL;
> >      async_module->callback = NULL;
> > @@ -101,6 +111,7 @@ DNNReturnType
> > ff_dnn_async_module_cleanup(DNNAsyncExecModule *async_module)
> > DNNReturnType ff_dnn_start_inference_async(void *ctx,
> > DNNAsyncExecModule *async_module)  {
> >      int ret;
> > +    void *status = 0;
> >
> >      if (!async_module) {
> >          av_log(ctx, AV_LOG_ERROR, "async_module is null when starting
> > async inference.\n"); @@ -108,7 +119,11 @@ DNNReturnType
> > ff_dnn_start_inference_async(void *ctx, DNNAsyncExecModule *async_
> >      }
> >
> >  #if HAVE_PTHREAD_CANCEL
> > -    pthread_join(async_module->thread_id, NULL);
> > +    pthread_join(async_module->thread_id, &status);
> > +    if (status == DNN_ASYNC_FAIL) {
> > +        av_log(ctx, AV_LOG_ERROR, "Unable to start inference as
> > + previous
> > inference failed.\n");
> > +        return DNN_ERROR;
> > +    }
> >      ret = pthread_create(&async_module->thread_id, NULL,
> > async_thread_routine, async_module);
> >      if (ret != 0) {
> >          av_log(ctx, AV_LOG_ERROR, "Unable to start async
> > inference.\n"); diff --git a/libavfilter/dnn/dnn_backend_tf.c
> > b/libavfilter/dnn/dnn_backend_tf.c
> > index fb3f6f5ea6..ffec1b1328 100644
> > --- a/libavfilter/dnn/dnn_backend_tf.c
> > +++ b/libavfilter/dnn/dnn_backend_tf.c
> > @@ -91,6 +91,7 @@ AVFILTER_DEFINE_CLASS(dnn_tensorflow);
> >
> >  static DNNReturnType execute_model_tf(TFRequestItem *request,
> Queue
> > *inference_queue);  static void infer_completion_callback(void *args);
> > +static inline void destroy_request_item(TFRequestItem **arg);
> >
> >  static void free_buffer(void *data, size_t length)  { @@ -172,6
> > +173,10 @@ static DNNReturnType tf_start_inference(void *args)
> >                    request->status);
> >      if (TF_GetCode(request->status) != TF_OK) {
> >          av_log(&tf_model->ctx, AV_LOG_ERROR, "%s",
> > TF_Message(request-
> > >status));
> > +        tf_free_request(infer_request);
> > +        if (ff_safe_queue_push_back(tf_model->request_queue, request) <
> 0) {
> > +            destroy_request_item(&request);
> > +        }
> >          return DNN_ERROR;
> >      }
> >      return DNN_SUCCESS;
> > @@ -1095,7 +1100,10 @@ static DNNReturnType
> > execute_model_tf(TFRequestItem *request, Queue *inference_q
> >      }
> >
> >      if (task->async) {
> > -        return ff_dnn_start_inference_async(ctx, &request->exec_module);
> > +        if (ff_dnn_start_inference_async(ctx, &request->exec_module)
> > + !=
> > DNN_SUCCESS) {
> > +            goto err;
> > +        }
> > +        return DNN_SUCCESS;
> >      } else {
> >          if (tf_start_inference(request) != DNN_SUCCESS) {
> >              goto err;
> > --
> > 2.25.1
> 
> LGTM, those patches function well and tensorflow backend performs much
> better.
> 
Thanks for the review, will push tomorrow.