[FFmpeg-devel] [V5 1/4] lavc/vaapi_encode: Change the slice/parameter buffers to dynamic alloc.

Mark Thompson sw at jkqxz.net
Sat Sep 9 15:00:11 EEST 2017


On 09/09/17 11:28, Mark Thompson wrote:
> On 06/09/17 03:35, Jun Zhao wrote:
>> On 2017/8/28 20:28, Mark Thompson wrote:
>>> On 24/08/17 02:13, Jun Zhao wrote:
>>>> V5: - In h265_vaapi encoder, when setting slice number > max slice number
>>>>       supported by driver, report error and return. Same as h264_vaapi.
>>>>     - Clean the logic when setting first_slice_segment_in_pic_flags  
>>>> V4: - Change the array malloc function.
>>>>     - Clean the pointless condition check when free the memory.
>>>>
>>>> V3: - Making pic->slices be VAAPIEncodeSlice* instead of
>>>> VAAPIEncodeSlice**. - Fix resource (vaBuffer) lead when realloc
>>>> pic->param_buffers fail. - Adjust max_slices location in
>>>> VAAPIEncodeContext. - Re-work distributing the macro-blocks for
>>>> multi-slices function. V2: - Change the slice/parameter buffers to
>>>> dynamic alloc and split the mutil-slice support for AVC/HEVC.
>>>>
>>>>
>>>> From a1078385915d53ec94559ed897eb253e537d1f65 Mon Sep 17 00:00:00 2001
>>>> From: Jun Zhao <jun.zhao at intel.com>
>>>> Date: Mon, 31 Jul 2017 22:46:23 -0400
>>>> Subject: [V5 1/4] lavc/vaapi_encode: Change the slice/parameter buffers to
>>>>  dynamic alloc.
>>>>
>>>> Change the slice/parameter buffers to be allocated dynamically.
>>>>
>>>> Signed-off-by: Wang, Yi A <yi.a.wang at intel.com>
>>>> Signed-off-by: Jun Zhao <jun.zhao at intel.com>
>>>> ---
>>>>  libavcodec/vaapi_encode.c | 44 ++++++++++++++++++++++++++++++++------------
>>>>  libavcodec/vaapi_encode.h |  6 ++----
>>>>  2 files changed, 34 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
>>>> index 22114bedbe..f49e0e3b91 100644
>>>> --- a/libavcodec/vaapi_encode.c
>>>> +++ b/libavcodec/vaapi_encode.c
>>>> @@ -36,13 +36,18 @@ static int vaapi_encode_make_packed_header(AVCodecContext *avctx,
>>>>      VAAPIEncodeContext *ctx = avctx->priv_data;
>>>>      VAStatus vas;
>>>>      VABufferID param_buffer, data_buffer;
>>>> +    VABufferID *tmp;
>>>>      VAEncPackedHeaderParameterBuffer params = {
>>>>          .type = type,
>>>>          .bit_length = bit_len,
>>>>          .has_emulation_bytes = 1,
>>>>      };
>>>>  
>>>> -    av_assert0(pic->nb_param_buffers + 2 <= MAX_PARAM_BUFFERS);
>>>> +    tmp = av_realloc_array(pic->param_buffers, sizeof(*tmp), pic->nb_param_buffers + 2);
>>>> +    if (!tmp) {
>>>> +        return AVERROR(ENOMEM);
>>>> +    }
>>>> +    pic->param_buffers = tmp;
>>>>  
>>>>      vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
>>>>                           VAEncPackedHeaderParameterBufferType,
>>>> @@ -77,9 +82,14 @@ static int vaapi_encode_make_param_buffer(AVCodecContext *avctx,
>>>>  {
>>>>      VAAPIEncodeContext *ctx = avctx->priv_data;
>>>>      VAStatus vas;
>>>> +    VABufferID *tmp;
>>>>      VABufferID buffer;
>>>>  
>>>> -    av_assert0(pic->nb_param_buffers + 1 <= MAX_PARAM_BUFFERS);
>>>> +    tmp = av_realloc_array(pic->param_buffers, sizeof(*tmp), pic->nb_param_buffers + 1);
>>>> +    if (!tmp) {
>>>> +        return AVERROR(ENOMEM);
>>>> +    }
>>>> +    pic->param_buffers = tmp;
>>>>  
>>>>      vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
>>>>                           type, len, 1, data, &buffer);
>>>> @@ -313,15 +323,14 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
>>>>          }
>>>>      }
>>>>  
>>>> -    av_assert0(pic->nb_slices <= MAX_PICTURE_SLICES);
>>>> +    pic->slices = av_mallocz_array(pic->nb_slices, sizeof(*pic->slices));
>>>> +    if (!pic->slices) {
>>>> +        err = AVERROR(ENOMEM);
>>>> +        goto fail;
>>>> +    }
>>>>      for (i = 0; i < pic->nb_slices; i++) {
>>>> -        slice = av_mallocz(sizeof(*slice));
>>>> -        if (!slice) {
>>>> -            err = AVERROR(ENOMEM);
>>>> -            goto fail;
>>>> -        }
>>>> +        slice = &pic->slices[i];
>>>>          slice->index = i;
>>>> -        pic->slices[i] = slice;
>>>>  
>>>>          if (ctx->codec->slice_params_size > 0) {
>>>>              slice->codec_slice_params = av_mallocz(ctx->codec->slice_params_size);
>>>> @@ -425,8 +434,16 @@ fail_with_picture:
>>>>  fail:
>>>>      for(i = 0; i < pic->nb_param_buffers; i++)
>>>>          vaDestroyBuffer(ctx->hwctx->display, pic->param_buffers[i]);
>>>> +    for (i = 0; i < pic->nb_slices; i++) {
>>>> +        if (pic->slices) {
>>>> +            av_freep(&pic->slices[i].priv_data);
>>>> +            av_freep(&pic->slices[i].codec_slice_params);
>>>> +        }
>>>> +    }
>>>>  fail_at_end:
>>>>      av_freep(&pic->codec_picture_params);
>>>> +    av_freep(&pic->param_buffers);
>>>> +    av_freep(&pic->slices);
>>>>      av_frame_free(&pic->recon_image);
>>>>      av_buffer_unref(&pic->output_buffer_ref);
>>>>      pic->output_buffer = VA_INVALID_ID;
>>>> @@ -535,15 +552,18 @@ static int vaapi_encode_free(AVCodecContext *avctx,
>>>>          vaapi_encode_discard(avctx, pic);
>>>>  
>>>>      for (i = 0; i < pic->nb_slices; i++) {
>>>> -        av_freep(&pic->slices[i]->priv_data);
>>>> -        av_freep(&pic->slices[i]->codec_slice_params);
>>>> -        av_freep(&pic->slices[i]);
>>>> +        if (pic->slices) {
>>>> +            av_freep(&pic->slices[i].priv_data);
>>>> +            av_freep(&pic->slices[i].codec_slice_params);
>>>> +        }
>>>>      }
>>>>      av_freep(&pic->codec_picture_params);
>>>>  
>>>>      av_frame_free(&pic->input_image);
>>>>      av_frame_free(&pic->recon_image);
>>>>  
>>>> +    av_freep(&pic->param_buffers);
>>>> +    av_freep(&pic->slices);
>>>>      // Output buffer should already be destroyed.
>>>>      av_assert0(pic->output_buffer == VA_INVALID_ID);
>>>>  
>>>> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
>>>> index 3bf0cc87c7..364b5b8028 100644
>>>> --- a/libavcodec/vaapi_encode.h
>>>> +++ b/libavcodec/vaapi_encode.h
>>>> @@ -35,8 +35,6 @@ enum {
>>>>      MAX_CONFIG_ATTRIBUTES  = 4,
>>>>      MAX_GLOBAL_PARAMS      = 4,
>>>>      MAX_PICTURE_REFERENCES = 2,
>>>> -    MAX_PICTURE_SLICES     = 112,
>>>> -    MAX_PARAM_BUFFERS      = 128,
>>>>      MAX_REORDER_DELAY      = 16,
>>>>      MAX_PARAM_BUFFER_SIZE  = 1024,
>>>>  };
>>>> @@ -73,7 +71,7 @@ typedef struct VAAPIEncodePicture {
>>>>      VASurfaceID     recon_surface;
>>>>  
>>>>      int          nb_param_buffers;
>>>> -    VABufferID      param_buffers[MAX_PARAM_BUFFERS];
>>>> +    VABufferID      *param_buffers;
>>>>  
>>>>      AVBufferRef    *output_buffer_ref;
>>>>      VABufferID      output_buffer;
>>>> @@ -85,7 +83,7 @@ typedef struct VAAPIEncodePicture {
>>>>      struct VAAPIEncodePicture *refs[MAX_PICTURE_REFERENCES];
>>>>  
>>>>      int          nb_slices;
>>>> -    VAAPIEncodeSlice *slices[MAX_PICTURE_SLICES];
>>>> +    VAAPIEncodeSlice *slices;
>>>>  } VAAPIEncodePicture;
>>>>  
>>>>  typedef struct VAAPIEncodeContext {
>>>> -- 
>>>> 2.11.0
>>>>
>>> Applied this one.
>>>
>>> The others look ok, but I'm still dubious about what the result should actually be with multiple slices.  Would you mind answering <3b7ccc48-25f7-71d3-ac98-8b40b8594cc7 at jkqxz.net>, particularly around cases (2) and (4), or any others applying to your use-case?  I don't think error resilience is relevant, since we aren't actually using any of the relevant features to make it effective (e.g. redundant coding).
>>>
>>> Thanks,
>>>
>>> - Mark
>> For 2). I think is yes, some SW decode can parallelism when encoder
>> support mutil-slice.
> 
> Do said decoders place any requirements on the slices?
> 
> From a bit of research I suspect that bluray slice requirements are actually for rectangular slices, but I don't have a reference for that.  Maybe someone else can confirm or deny that?

<http://www.blu-raydisc.com/assets/Downloadablefile/BD-ROM_Part3_V3.0_WhitePaper_150724.pdf>:

"A slice segment shall be composed of one or more rows of a coding tree block. A row of a coding tree block indicates all the coding tree blocks in a horizontal row of coding tree block."

Can't find an equivalent reference for H.264, though.

>> For 3). when send NALU with RTP, customer can use mutil-slice to
>> adaptive the coded slice size to different MTU size, this means when we
>> send NALU in RTP, we can save CPU power and give low latency in link 
>> because we don't need to  fragment the RTP packet in local CPU.
> 
> This (RFC 6184 in single NAL unit packet mode) requires support for arbitrary numbers of slices limited by MTU - that's not possible in current VAAPI, and even if it were then selection of slices by count only as here is not sufficient (complexity need not be uniformly distributed through the picture).
> 
> - Mark
> 


More information about the ffmpeg-devel mailing list