[FFmpeg-devel] [PATCH 2/6] avcodec/nvdec: avoid needless copy of output frame
Timo Rothenpieler
timo at rothenpieler.org
Tue May 8 18:49:55 EEST 2018
Am 08.05.2018 um 17:25 schrieb wm4:
> On Tue, 8 May 2018 15:31:28 +0200
> Timo Rothenpieler <timo at rothenpieler.org> wrote:
>
>> Replaces the data pointers with the mapped cuvid ones.
>> Adds buffer_refs to the frame to ensure the needed contexts stay alive
>> and the cuvid idx stays allocated.
>> Adds another buffer_ref to unmap the frame when it's unreferenced itself.
>> ---
>> libavcodec/nvdec.c | 83 +++++++++++++++++++++++++++++++++-------------
>> 1 file changed, 60 insertions(+), 23 deletions(-)
>>
>> diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
>> index ab3cb88b27..d98f9dd95e 100644
>> --- a/libavcodec/nvdec.c
>> +++ b/libavcodec/nvdec.c
>> @@ -308,7 +308,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>> params.CodecType = cuvid_codec_type;
>> params.ChromaFormat = cuvid_chroma_format;
>> params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
>> - params.ulNumOutputSurfaces = 1;
>> + params.ulNumOutputSurfaces = frames_ctx->initial_pool_size;
>>
>> ret = nvdec_decoder_create(&ctx->decoder_ref, frames_ctx->device_ref, ¶ms, avctx);
>> if (ret < 0) {
>> @@ -354,6 +354,32 @@ static void nvdec_fdd_priv_free(void *priv)
>> av_freep(&priv);
>> }
>>
>> +static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
>> +{
>> + NVDECFrame *unmap_data = (NVDECFrame*)data;
>> + NVDECDecoder *decoder = (NVDECDecoder*)unmap_data->decoder_ref->data;
>> + CUdeviceptr devptr = (CUdeviceptr)opaque;
>> + CUresult err;
>> + CUcontext dummy;
>> +
>> + err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
>> + if (err != CUDA_SUCCESS) {
>> + av_log(NULL, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
>> + goto finish;
>> + }
>> +
>> + err = decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
>> + if (err != CUDA_SUCCESS)
>> + av_log(NULL, AV_LOG_ERROR, "cuvidUnmapVideoFrame failed\n");
>> +
>> + decoder->cudl->cuCtxPopCurrent(&dummy);
>> +
>> +finish:
>> + av_buffer_unref(&unmap_data->idx_ref);
>> + av_buffer_unref(&unmap_data->decoder_ref);
>> + av_free(unmap_data);
>> +}
>> +
>> static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>> {
>> FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
>> @@ -361,6 +387,7 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>> NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
>>
>> CUVIDPROCPARAMS vpp = { .progressive_frame = 1 };
>> + NVDECFrame *unmap_data = NULL;
>>
>> CUresult err;
>> CUcontext dummy;
>> @@ -383,32 +410,39 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>> goto finish;
>> }
>>
>> - for (i = 0; frame->data[i]; i++) {
>> - CUDA_MEMCPY2D cpy = {
>> - .srcMemoryType = CU_MEMORYTYPE_DEVICE,
>> - .dstMemoryType = CU_MEMORYTYPE_DEVICE,
>> - .srcDevice = devptr,
>> - .dstDevice = (CUdeviceptr)frame->data[i],
>> - .srcPitch = pitch,
>> - .dstPitch = frame->linesize[i],
>> - .srcY = offset,
>> - .WidthInBytes = FFMIN(pitch, frame->linesize[i]),
>> - .Height = frame->height >> (i ? 1 : 0),
>> - };
>> -
>> - err = decoder->cudl->cuMemcpy2D(&cpy);
>> - if (err != CUDA_SUCCESS) {
>> - av_log(logctx, AV_LOG_ERROR, "Error copying decoded frame: %d\n",
>> - err);
>> - ret = AVERROR_UNKNOWN;
>> - goto copy_fail;
>> - }
>> + unmap_data = av_mallocz(sizeof(*unmap_data));
>> + if (!unmap_data) {
>> + ret = AVERROR(ENOMEM);
>> + goto copy_fail;
>> + }
>>
>> - offset += cpy.Height;
>> + frame->buf[1] = av_buffer_create((uint8_t *)unmap_data, sizeof(*unmap_data),
>> + nvdec_unmap_mapped_frame, (void*)devptr,
>> + AV_BUFFER_FLAG_READONLY);
>> + if (!frame->buf[1]) {
>> + ret = AVERROR(ENOMEM);
>> + goto copy_fail;
>> }
>>
>> + unmap_data->idx = cf->idx;
>> + unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
>> + unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
>> +
>> + for (i = 0; frame->linesize[i]; i++) {
>> + frame->data[i] = (uint8_t*)(devptr + offset);
>> + frame->linesize[i] = pitch;
>> + offset += pitch * (frame->height >> (i ? 1 : 0));
>> + }
>> +
>> + goto finish;
>> +
>> copy_fail:
>> - decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
>> + if (!frame->buf[1]) {
>> + decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
>> + av_freep(&unmap_data);
>> + } else {
>> + av_buffer_unref(&frame->buf[1]);
>> + }
>>
>> finish:
>> decoder->cudl->cuCtxPopCurrent(&dummy);
>> @@ -526,6 +560,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
>> int dpb_size)
>> {
>> AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
>> + AVCUDAFramesContext *hwctx = (AVCUDAFramesContext*)frames_ctx->hwctx;
>> const AVPixFmtDescriptor *sw_desc;
>> int cuvid_codec_type, cuvid_chroma_format;
>>
>> @@ -550,6 +585,8 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
>> frames_ctx->height = (avctx->coded_height + 1) & ~1;
>> frames_ctx->initial_pool_size = dpb_size;
>>
>> + hwctx->flags = AV_CUDA_HWFRAMES_DUMMY_MODE;
>> +
>> switch (sw_desc->comp[0].depth) {
>> case 8:
>> frames_ctx->sw_format = AV_PIX_FMT_NV12;
>
> Probably breaks if the API user doesn't use this dummy flag.
It intentionally stores the other buffer_ref in buf[1] so it doesn't
leak that memory if the context is supplied externally.
It'll just be pointlessly wasting memory.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 3994 bytes
Desc: S/MIME Cryptographic Signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20180508/eff2cdc0/attachment.bin>
More information about the ffmpeg-devel
mailing list