[FFmpeg-devel] [PATCH] Optimize nvenc parameters, add 3 more presets: fast, medium, slow

Agatha Hu ahu at nvidia.com
Fri Sep 11 04:55:27 CEST 2015


在 2015/9/10 17:48, Timo Rothenpieler 写道:
> * PGP Signed by an unknown key
>
>> ---
>>   libavcodec/nvenc.c | 59
>> +++++++++++++++++++++++++++++++++++++++++++++++++-----
>>   1 file changed, 54 insertions(+), 5 deletions(-)
>>
>> diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
>> index 5490652..7c683ea 100644
>> --- a/libavcodec/nvenc.c
>> +++ b/libavcodec/nvenc.c
>> @@ -610,8 +610,17 @@ static av_cold int nvenc_encode_init(AVCodecContext
>> *avctx)
>>       if (ctx->preset) {
>>           if (!strcmp(ctx->preset, "hp")) {
>>               encoder_preset = NV_ENC_PRESET_HP_GUID;
>> +        }else if (!strcmp(ctx->preset, "fast")) {
>
> It's missing a space here.
>
>> +            ctx->twopass = 0;
>> +            encoder_preset = NV_ENC_PRESET_HQ_GUID;
>>           } else if (!strcmp(ctx->preset, "hq")) {
>>               encoder_preset = NV_ENC_PRESET_HQ_GUID;
>> +        } else if (!strcmp(ctx->preset, "medium")) {
>> +            ctx->twopass = 0;
>> +            encoder_preset = NV_ENC_PRESET_HQ_GUID;
>> +        } else if (!strcmp(ctx->preset, "slow")) {
>> +            ctx->twopass = 1;
>> +            encoder_preset = NV_ENC_PRESET_HQ_GUID;
>>           } else if (!strcmp(ctx->preset, "bd")) {
>>               encoder_preset = NV_ENC_PRESET_BD_GUID;
>>           } else if (!strcmp(ctx->preset, "ll")) {
>> @@ -632,7 +641,7 @@ static av_cold int nvenc_encode_init(AVCodecContext
>> *avctx)
>>           } else if (!strcmp(ctx->preset, "default")) {
>>               encoder_preset = NV_ENC_PRESET_DEFAULT_GUID;
>>           } else {
>> -            av_log(avctx, AV_LOG_FATAL, "Preset \"%s\" is unknown!
>> Supported presets: hp, hq, bd, ll, llhp, llhq, lossless, losslesshp,
>> default\n", ctx->preset);
>> +            av_log(avctx, AV_LOG_FATAL, "Preset \"%s\" is unknown!
>> Supported presets: slow, medium, fast, hp, hq, bd, ll, llhp, llhq,
>> lossless, losslesshp, default\n", ctx->preset);
>>               res = AVERROR(EINVAL);
>>               goto error;
>>           }
>> @@ -710,6 +719,7 @@ static av_cold int nvenc_encode_init(AVCodecContext
>> *avctx)
>>           switch (avctx->codec->id) {
>>           case AV_CODEC_ID_H264:
>>
>> ctx->encode_config.encodeCodecConfig.h264Config.maxNumRefFrames =
>> avctx->refs;
>> +
>> ctx->encode_config.encodeCodecConfig.h264Config.hierarchicalPFrames = 1;
>>               break;
>>           case AV_CODEC_ID_H265:
>>
>> ctx->encode_config.encodeCodecConfig.hevcConfig.maxNumRefFramesInDPB =
>> avctx->refs;
>> @@ -770,7 +780,7 @@ static av_cold int nvenc_encode_init(AVCodecContext
>> *avctx)
>>           avctx->qmin = -1;
>>           avctx->qmax = -1;
>>       } else if (ctx->cbr) {
>> -        if (!ctx->twopass) {
>> +        if (!ctx->twopass < 1) {
>
> This doesn't seem right at all, what is it supposed to do?
> Keep in mind that twopass is a tristate, with the default beeing -1,
> which means autoselect.
>
>>               ctx->encode_config.rcParams.rateControlMode =
>> NV_ENC_PARAMS_RC_CBR;
>>           } else if (ctx->twopass == 1 || isLL) {
>>               ctx->encode_config.rcParams.rateControlMode =
>> NV_ENC_PARAMS_RC_2_PASS_QUALITY;
>> @@ -799,7 +809,7 @@ static av_cold int nvenc_encode_init(AVCodecContext
>> *avctx)
>>
>> ctx->encode_config.encodeCodecConfig.h264Config.fmoMode =
>> NV_ENC_H264_FMO_DISABLE;
>>               }
>>           } else {
>> -            ctx->encode_config.rcParams.rateControlMode =
>> NV_ENC_PARAMS_RC_VBR;
>> +            ctx->encode_config.rcParams.rateControlMode =
>> NV_ENC_PARAMS_RC_VBR_MINQP;
>>           }
>>
>>           ctx->encode_config.rcParams.enableMinQP = 1;
>> @@ -812,6 +822,45 @@ static av_cold int nvenc_encode_init(AVCodecContext
>> *avctx)
>>           ctx->encode_config.rcParams.maxQP.qpInterB = avctx->qmax;
>>           ctx->encode_config.rcParams.maxQP.qpInterP = avctx->qmax;
>>           ctx->encode_config.rcParams.maxQP.qpIntra = avctx->qmax;
>> +
>> +        {
>
> I'm not sure if this conforms with the ffmpeg code style guidelines.
>
>> +            uint32_t qpInterP = (avctx->qmax + 3*avctx->qmin)/4; //
>> biased towards Qmin
>> +            ctx->encode_config.rcParams.initialRCQP.qpInterP  =
>> qpInterP;
>> +            if(avctx->i_quant_factor != 0.0 && avctx->b_quant_factor !=
>> 0.0) {
>> +                ctx->encode_config.rcParams.initialRCQP.qpIntra =
>> qpInterP * fabs(avctx->i_quant_factor);
>> +                ctx->encode_config.rcParams.initialRCQP.qpIntra +=
>> qpInterP * (avctx->i_quant_offset);
>> +                ctx->encode_config.rcParams.initialRCQP.qpInterB =
>> qpInterP * fabs(avctx->b_quant_factor);
>> +                ctx->encode_config.rcParams.initialRCQP.qpInterB +=
>> qpInterP * (avctx->b_quant_offset);
>> +            } else {
>> +                ctx->encode_config.rcParams.initialRCQP.qpIntra =
>> qpInterP;
>> +                ctx->encode_config.rcParams.initialRCQP.qpInterB =
>> qpInterP;
>> +            }
>> +        }
>> +        ctx->encode_config.rcParams.enableInitialRCQP = 1;
>> +    } else {
>> +        if (ctx->twopass < 1) {
>
> This also seems a bit strange.
>
>> +            ctx->encode_config.rcParams.rateControlMode =
>> NV_ENC_PARAMS_RC_VBR;
>> +        } else {
>> +            ctx->encode_config.rcParams.rateControlMode =
>> NV_ENC_PARAMS_RC_2_PASS_VBR;
>> +        }
>> +
>> +        {
>> +            uint32_t qpInterP = 26; // default to 26
>> +            ctx->encode_config.rcParams.initialRCQP.qpInterP  =
>> qpInterP;
>> +
>> +            if(avctx->i_quant_factor != 0.0 && avctx->b_quant_factor !=
>> 0.0) {
>> +
>> +                ctx->encode_config.rcParams.initialRCQP.qpIntra =
>> qpInterP * fabs(avctx->i_quant_factor);
>> +                ctx->encode_config.rcParams.initialRCQP.qpIntra +=
>> qpInterP * (avctx->i_quant_offset);
>> +
>> +                ctx->encode_config.rcParams.initialRCQP.qpInterB =
>> qpInterP * fabs(avctx->b_quant_factor);
>> +                ctx->encode_config.rcParams.initialRCQP.qpInterB +=
>> qpInterP * (avctx->b_quant_offset);
>> +            } else {
>> +                ctx->encode_config.rcParams.initialRCQP.qpIntra =
>> qpInterP;
>> +                ctx->encode_config.rcParams.initialRCQP.qpInterB =
>> qpInterP;
>> +            }
>> +        }
>> +        ctx->encode_config.rcParams.enableInitialRCQP = 1;
>>       }
>
> Can't this second block be merged with the first one? It seems to be
> doing the same calculations, just with a diffrent default value.
>
>>       if (avctx->rc_buffer_size > 0)
>> @@ -1415,12 +1464,12 @@ static const enum AVPixelFormat pix_fmts_nvenc[]
>> = {
>>   #define OFFSET(x) offsetof(NvencContext, x)
>>   #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
>>   static const AVOption options[] = {
>> -    { "preset", "Set the encoding preset (one of hq, hp, bd, ll, llhq,
>> llhp, default)", OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "hq" }, 0,
>> 0, VE },
>> +    { "preset", "Set the encoding preset (one of one of slow=hq 2pass,
>> medium= hq, fast = hp, hq, hp, bd, ll, llhq, llhp, default)",
>> OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "hq" }, 0, 0, VE },
>>       { "profile", "Set the encoding profile (high, main or baseline)",
>> OFFSET(profile), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
>>       { "level", "Set the encoding level restriction (auto, 1.0, 1.0b,
>> 1.1, 1.2, ..., 4.2, 5.0, 5.1)", OFFSET(level), AV_OPT_TYPE_STRING, { 0
>> }, 0, 0, VE },
>>       { "tier", "Set the encoding tier (main or high)", OFFSET(tier),
>> AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
>>       { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_INT, {
>> .i64 = 0 }, 0, 1, VE },
>> -    { "2pass", "Use 2pass cbr encoding mode", OFFSET(twopass),
>> AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE },
>> +    { "2pass", "Use 2pass encoding mode", OFFSET(twopass),
>> AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
>>       { "gpu", "Selects which NVENC capable GPU to use. First GPU is 0,
>> second is 1, and so on.", OFFSET(gpu), AV_OPT_TYPE_INT, { .i64 = 0 }, 0,
>> INT_MAX, VE },
>>       { "delay", "Delays frame output by the given amount of frames.",
>> OFFSET(buffer_delay), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX,
>> VE },
>>       { NULL }
>
>
>
> * Unknown Key
> * 0xA6DA5D0F
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

I'll give you answer later. But there's a more urgent fix, I've sent the 
patch, only one-line changed.
It's the creation flag of cuCtxCreate, currently is 0(default) which 
will cause CPU thread spins when waiting for GPU to return thus decrease 
the CPU performance.
Change it to 4(CU_CTX_SCHED_BLOCKING_SYNC) could solve it.

Agatha Hu


More information about the ffmpeg-devel mailing list