[FFmpeg-devel] [PATCH v3 1/3] avcodec/libvpxenc: add VP9 temporal scalability encoding option
Wonkap Jang
wonkap at google.com
Thu Jan 9 01:04:49 EET 2020
On Wed, Jan 8, 2020 at 3:00 PM Wonkap Jang <wonkap at google.com> wrote:
> This commit reuses the configuration options for VP8 that enables
> temporal scalability for VP9. It also adds a way to enable three
> preset temporal structures (refer to the documentation for more
> detail) that can be used in offline encoding.
> ---
> libavcodec/libvpxenc.c | 251 +++++++++++++++++++++++++++++++++++++----
> 1 file changed, 228 insertions(+), 23 deletions(-)
>
> diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
> index 0b8a070304..14cc1e7158 100644
> --- a/libavcodec/libvpxenc.c
> +++ b/libavcodec/libvpxenc.c
> @@ -100,7 +100,9 @@ typedef struct VPxEncoderContext {
> int rc_undershoot_pct;
> int rc_overshoot_pct;
>
> - AVDictionary *vp8_ts_parameters;
> + AVDictionary *vpx_ts_parameters;
> + int *ts_layer_flags;
> + int current_temporal_idx;
>
> // VP9-only
> int lossless;
> @@ -137,6 +139,7 @@ static const char *const ctlidstr[] = {
> [VP8E_SET_CQ_LEVEL] = "VP8E_SET_CQ_LEVEL",
> [VP8E_SET_MAX_INTRA_BITRATE_PCT] = "VP8E_SET_MAX_INTRA_BITRATE_PCT",
> [VP8E_SET_SHARPNESS] = "VP8E_SET_SHARPNESS",
> + [VP8E_SET_TEMPORAL_LAYER_ID] = "VP8E_SET_TEMPORAL_LAYER_ID",
> #if CONFIG_LIBVPX_VP9_ENCODER
> [VP9E_SET_LOSSLESS] = "VP9E_SET_LOSSLESS",
> [VP9E_SET_TILE_COLUMNS] = "VP9E_SET_TILE_COLUMNS",
> @@ -144,6 +147,11 @@ static const char *const ctlidstr[] = {
> [VP9E_SET_FRAME_PARALLEL_DECODING] =
> "VP9E_SET_FRAME_PARALLEL_DECODING",
> [VP9E_SET_AQ_MODE] = "VP9E_SET_AQ_MODE",
> [VP9E_SET_COLOR_SPACE] = "VP9E_SET_COLOR_SPACE",
> + [VP9E_SET_SVC_LAYER_ID] = "VP9E_SET_SVC_LAYER_ID",
> +#if VPX_ENCODER_ABI_VERSION >= 12
> + [VP9E_SET_SVC_PARAMETERS] = "VP9E_SET_SVC_PARAMETERS",
> +#endif
> + [VP9E_SET_SVC] = "VP9E_SET_SVC",
> #if VPX_ENCODER_ABI_VERSION >= 11
> [VP9E_SET_COLOR_RANGE] = "VP9E_SET_COLOR_RANGE",
> #endif
> @@ -223,8 +231,16 @@ static av_cold void dump_enc_cfg(AVCodecContext
> *avctx,
> " %*s%u\n", width, "ts_number_layers:",
> cfg->ts_number_layers);
> av_log(avctx, level,
> "\n %*s", width, "ts_target_bitrate:");
> - for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> - av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
> + if (avctx->codec_id == AV_CODEC_ID_VP8) {
> + for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> + av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
> + }
> +#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
> + if (avctx->codec_id == AV_CODEC_ID_VP9) {
> + for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> + av_log(avctx, level, "%u ", cfg->layer_target_bitrate[i]);
> + }
> +#endif
> av_log(avctx, level, "\n");
> av_log(avctx, level,
> "\n %*s", width, "ts_rate_decimator:");
> @@ -346,6 +362,8 @@ static av_cold int vpx_free(AVCodecContext *avctx)
> }
> #endif
>
> + av_freep(&ctx->ts_layer_flags);
> +
> vpx_codec_destroy(&ctx->encoder);
> if (ctx->is_alpha) {
> vpx_codec_destroy(&ctx->encoder_alpha);
> @@ -370,23 +388,154 @@ static void vp8_ts_parse_int_array(int *dest, char
> *value, size_t value_len, int
> }
> }
>
> -static int vp8_ts_param_parse(struct vpx_codec_enc_cfg *enccfg, char
> *key, char *value)
> +static void set_temporal_layer_pattern(int layering_mode,
> + vpx_codec_enc_cfg_t *cfg,
> + int *layer_flags,
> + int *flag_periodicity)
> +{
> + switch (layering_mode) {
> + case 2: {
> + /**
> + * 2-layers, 2-frame period.
> + */
> + int ids[2] = { 0, 1 };
> + cfg->ts_periodicity = 2;
> + *flag_periodicity = 2;
> + cfg->ts_number_layers = 2;
> + cfg->ts_rate_decimator[0] = 2;
> + cfg->ts_rate_decimator[1] = 1;
> + memcpy(cfg->ts_layer_id, ids, sizeof(ids));
> +
> + layer_flags[0] =
> + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
> +
> + layer_flags[1] =
> + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF |
> + VP8_EFLAG_NO_UPD_LAST |
> + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF;
> + break;
> + }
> + case 3: {
> + /**
> + * 3-layers structure with one reference frame.
> + * This works same as temporal_layering_mode 3.
> + *
> + * 3-layers, 4-frame period.
> + */
> + int ids[4] = { 0, 2, 1, 2 };
> + cfg->ts_periodicity = 4;
> + *flag_periodicity = 4;
> + cfg->ts_number_layers = 3;
> + cfg->ts_rate_decimator[0] = 4;
> + cfg->ts_rate_decimator[1] = 2;
> + cfg->ts_rate_decimator[2] = 1;
> + memcpy(cfg->ts_layer_id, ids, sizeof(ids));
> +
> + /**
> + * 0=L, 1=GF, 2=ARF,
> + * Intra-layer prediction disabled.
> + */
> + layer_flags[0] =
> + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
> + layer_flags[2] =
> + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
> + layer_flags[1] =
> + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
> + VP8_EFLAG_NO_UPD_ARF;
> + layer_flags[3] =
> + VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
> + VP8_EFLAG_NO_UPD_ARF;
> + break;
> + }
> + case 4: {
> + /**
> + * 3-layers structure.
> + * added dependency between the two TL2 frames (on top of case 3).
> + * 3-layers, 4-frame period.
> + */
> + int ids[4] = { 0, 2, 1, 2 };
> + cfg->ts_periodicity = 4;
> + *flag_periodicity = 4;
> + cfg->ts_number_layers = 3;
> + cfg->ts_rate_decimator[0] = 4;
> + cfg->ts_rate_decimator[1] = 2;
> + cfg->ts_rate_decimator[2] = 1;
> + memcpy(cfg->ts_layer_id, ids, sizeof(ids));
> +
> + /**
> + * 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled.
> + */
> + layer_flags[0] =
> + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
> + layer_flags[2] =
> + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
> + layer_flags[1] =
> + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
> + layer_flags[3] =
> + VP8_EFLAG_NO_REF_LAST |
> + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
> + VP8_EFLAG_NO_UPD_ARF;
> + break;
> + }
> + default:
> + /**
> + * do not change the layer_flags or the flag_periodicity in this
> case;
> + * it might be that the code is using external flags to be used.
> + */
> + break;
> +
> + }
> +}
> +
> +static int vpx_ts_param_parse(VPxContext *ctx,
> + struct vpx_codec_enc_cfg *enccfg,
> + char *key, char *value,
> + const enum AVCodecID codec_id)
> {
> size_t value_len = strlen(value);
> + int ts_layering_mode = 0;
>
> if (!value_len)
> return -1;
>
> if (!strcmp(key, "ts_number_layers"))
> enccfg->ts_number_layers = strtoul(value, &value, 10);
> - else if (!strcmp(key, "ts_target_bitrate"))
> - vp8_ts_parse_int_array(enccfg->ts_target_bitrate, value,
> value_len, VPX_TS_MAX_LAYERS);
> - else if (!strcmp(key, "ts_rate_decimator"))
> - vp8_ts_parse_int_array(enccfg->ts_rate_decimator, value, value_len,
> VPX_TS_MAX_LAYERS);
> - else if (!strcmp(key, "ts_periodicity"))
> + else if (!strcmp(key, "ts_target_bitrate")) {
> + if (codec_id == AV_CODEC_ID_VP8)
> + vp8_ts_parse_int_array(enccfg->ts_target_bitrate, value,
> value_len, VPX_TS_MAX_LAYERS);
> +#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
> + if (codec_id == AV_CODEC_ID_VP9)
> + vp8_ts_parse_int_array(enccfg->layer_target_bitrate, value,
> value_len, VPX_TS_MAX_LAYERS);
> +#endif
> + } else if (!strcmp(key, "ts_rate_decimator")) {
> + vp8_ts_parse_int_array(enccfg->ts_rate_decimator, value,
> value_len, VPX_TS_MAX_LAYERS);
> + } else if (!strcmp(key, "ts_periodicity")) {
> enccfg->ts_periodicity = strtoul(value, &value, 10);
> - else if (!strcmp(key, "ts_layer_id"))
> + } else if (!strcmp(key, "ts_layer_id")) {
> vp8_ts_parse_int_array(enccfg->ts_layer_id, value, value_len,
> VPX_TS_MAX_PERIODICITY);
> + } else if (!strcmp(key, "ts_layering_mode")) {
> + /* option for pre-defined temporal structures in function
> set_temporal_layer_pattern. */
> + ts_layering_mode = strtoul(value, &value, 4);
> + }
> +
> +#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
> + enccfg->temporal_layering_mode = 1; // only bypass mode is being
> supported for now.
> + enccfg->ss_number_layers = 1; // making sure the spatial scalability
> is off. Will support this in future.
> +#endif
> + if (ts_layering_mode) {
> + // make sure the ts_layering_mode comes at the end of the
> ts_parameter string to ensure that
> + // correct configuration is done.
> + ctx->ts_layer_flags = av_malloc(sizeof(int) *
> VPX_TS_MAX_PERIODICITY);
> + set_temporal_layer_pattern(ts_layering_mode, enccfg,
> ctx->ts_layer_flags, &enccfg->ts_periodicity);
> + }
>
> return 0;
> }
> @@ -590,7 +739,9 @@ static av_cold int vpx_init(AVCodecContext *avctx,
> vpx_img_fmt_t img_fmt = VPX_IMG_FMT_I420;
> #if CONFIG_LIBVPX_VP9_ENCODER
> vpx_codec_caps_t codec_caps = vpx_codec_get_caps(iface);
> + vpx_svc_extra_cfg_t svc_params;
> #endif
> + AVDictionaryEntry* en = NULL;
>
> av_log(avctx, AV_LOG_INFO, "%s\n", vpx_codec_version_str());
> av_log(avctx, AV_LOG_VERBOSE, "%s\n", vpx_codec_build_config());
> @@ -611,8 +762,8 @@ static av_cold int vpx_init(AVCodecContext *avctx,
> }
> #endif
>
> - if(!avctx->bit_rate)
> - if(avctx->rc_max_rate || avctx->rc_buffer_size ||
> avctx->rc_initial_buffer_occupancy) {
> + if (!avctx->bit_rate)
> + if (avctx->rc_max_rate || avctx->rc_buffer_size ||
> avctx->rc_initial_buffer_occupancy) {
> av_log( avctx, AV_LOG_ERROR, "Rate control parameters set
> without a bitrate\n");
> return AVERROR(EINVAL);
> }
> @@ -648,6 +799,9 @@ static av_cold int vpx_init(AVCodecContext *avctx,
> if (avctx->bit_rate) {
> enccfg.rc_target_bitrate = av_rescale_rnd(avctx->bit_rate, 1,
> 1000,
> AV_ROUND_NEAR_INF);
> +#if CONFIG_LIBVPX_VP9_ENCODER
> + enccfg.ss_target_bitrate[0] = enccfg.rc_target_bitrate;
> +#endif
> } else {
> // Set bitrate to default value. Also sets CRF to default if
> needed.
> set_vpx_defaults(avctx, &enccfg);
> @@ -757,14 +911,11 @@ FF_ENABLE_DEPRECATION_WARNINGS
>
> enccfg.g_error_resilient = ctx->error_resilient || ctx->flags &
> VP8F_ERROR_RESILIENT;
>
> - if (CONFIG_LIBVPX_VP8_ENCODER && avctx->codec_id == AV_CODEC_ID_VP8) {
> - AVDictionaryEntry* en = NULL;
> - while ((en = av_dict_get(ctx->vp8_ts_parameters, "", en,
> AV_DICT_IGNORE_SUFFIX))) {
> - if (vp8_ts_param_parse(&enccfg, en->key, en->value) < 0)
> - av_log(avctx, AV_LOG_WARNING,
> - "Error parsing option '%s = %s'.\n",
> - en->key, en->value);
> - }
> + while ((en = av_dict_get(ctx->vpx_ts_parameters, "", en,
> AV_DICT_IGNORE_SUFFIX))) {
> + if (vpx_ts_param_parse(ctx, &enccfg, en->key, en->value,
> avctx->codec_id) < 0)
> + av_log(avctx, AV_LOG_WARNING,
> + "Error parsing option '%s = %s'.\n",
> + en->key, en->value);
> }
>
> dump_enc_cfg(avctx, &enccfg);
> @@ -774,7 +925,21 @@ FF_ENABLE_DEPRECATION_WARNINGS
> log_encoder_error(avctx, "Failed to initialize encoder");
> return AVERROR(EINVAL);
> }
> -
> +#if CONFIG_LIBVPX_VP9_ENCODER
> + if (avctx->codec_id == AV_CODEC_ID_VP9 && enccfg.ts_number_layers >
> 1) {
> + memset(&svc_params, 0, sizeof(svc_params));
> + for (int i = 0; i < enccfg.ts_number_layers; ++i) {
> + svc_params.max_quantizers[i] = enccfg.rc_max_quantizer;
> + svc_params.min_quantizers[i] = enccfg.rc_min_quantizer;
> + }
> + svc_params.scaling_factor_num[0] = enccfg.g_h;
> + svc_params.scaling_factor_den[0] = enccfg.g_h;
> +#if VPX_ENCODER_ABI_VERSION >= 12
> + codecctl_int(avctx, VP9E_SET_SVC, 1);
> + codecctl_intp(avctx, VP9E_SET_SVC_PARAMETERS, (int *)
> &svc_params);
> +#endif
> + }
> +#endif
> if (ctx->is_alpha) {
> enccfg_alpha = enccfg;
> res = vpx_codec_enc_init(&ctx->encoder_alpha, iface,
> &enccfg_alpha, flags);
> @@ -1321,6 +1486,9 @@ static int vpx_encode(AVCodecContext *avctx,
> AVPacket *pkt,
> int64_t timestamp = 0;
> int res, coded_size;
> vpx_enc_frame_flags_t flags = 0;
> + const struct vpx_codec_enc_cfg *enccfg = ctx->encoder.config.enc;
> + vpx_svc_layer_id_t layer_id;
> + int layer_id_valid = 0;
>
> if (frame) {
> const AVFrameSideData *sd = av_frame_get_side_data(frame,
> AV_FRAME_DATA_REGIONS_OF_INTEREST);
> @@ -1368,6 +1536,42 @@ static int vpx_encode(AVCodecContext *avctx,
> AVPacket *pkt,
> }
> }
>
> + // this is for encoding with preset temporal layering patterns
> defined in
> + // set_temporal_layer_pattern function.
> + if (enccfg->ts_number_layers > 1 && ctx->ts_layer_flags) {
> + if (flags & VPX_EFLAG_FORCE_KF) {
> + // keyframe, reset temporal layering.
> + ctx->current_temporal_idx = 0;
> + flags = VPX_EFLAG_FORCE_KF;
> + } else {
> + flags = 0;
> + }
> +
> + /* get the flags from the temporal layer configuration. */
> + flags |= ctx->ts_layer_flags[ctx->current_temporal_idx];
> +
> + memset(&layer_id, 0, sizeof(layer_id));
> +#if VPX_ENCODER_ABI_VERSION >= 12
> + layer_id.spatial_layer_id = 0;
> +#endif
> + layer_id.temporal_layer_id =
> enccfg->ts_layer_id[ctx->current_temporal_idx];
> +#ifdef VPX_CTRL_VP9E_SET_MAX_INTER_BITRATE_PCT
> + layer_id.temporal_layer_id_per_spatial[0] =
> layer_id.temporal_layer_id;
> +#endif
> + layer_id_valid = 1;
> + }
> +
> + if (layer_id_valid) {
> + if (avctx->codec_id == AV_CODEC_ID_VP8) {
> + codecctl_int(avctx, VP8E_SET_TEMPORAL_LAYER_ID,
> layer_id.temporal_layer_id);
> + }
> +#if CONFIG_LIBVPX_VP9_ENCODER && VPX_ENCODER_ABI_VERSION >= 12
> + else if (avctx->codec_id == AV_CODEC_ID_VP9) {
> + codecctl_intp(avctx, VP9E_SET_SVC_LAYER_ID, (int *)
> &layer_id);
> + }
> +#endif
> + }
> +
> res = vpx_codec_encode(&ctx->encoder, rawimg, timestamp,
> avctx->ticks_per_frame, flags, ctx->deadline);
> if (res != VPX_CODEC_OK) {
> @@ -1397,6 +1601,8 @@ static int vpx_encode(AVCodecContext *avctx,
> AVPacket *pkt,
> }
> av_base64_encode(avctx->stats_out, b64_size,
> ctx->twopass_stats.buf,
> ctx->twopass_stats.sz);
> + } else if (enccfg->ts_number_layers > 1 && ctx->ts_layer_flags) {
> + ctx->current_temporal_idx = (ctx->current_temporal_idx + 1) %
> enccfg->ts_periodicity;
> }
>
> *got_packet = !!coded_size;
> @@ -1435,6 +1641,7 @@ static int vpx_encode(AVCodecContext *avctx,
> AVPacket *pkt,
> { "noise-sensitivity", "Noise sensitivity",
> OFFSET(noise_sensitivity), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 4, VE}, \
> { "undershoot-pct", "Datarate undershoot (min) target (%)",
> OFFSET(rc_undershoot_pct), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 100, VE }, \
> { "overshoot-pct", "Datarate overshoot (max) target (%)",
> OFFSET(rc_overshoot_pct), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1000, VE }, \
> + { "ts-parameters", "Temporal scaling configuration using a
> :-separated list of key=value parameters", OFFSET(vpx_ts_parameters),
> AV_OPT_TYPE_DICT, {.str=NULL}, 0, 0, VE}, \
>
> #define LEGACY_OPTIONS \
> {"speed", "", offsetof(VPxContext, cpu_used), AV_OPT_TYPE_INT, {.i64
> = 1}, -16, 16, VE}, \
> @@ -1454,8 +1661,6 @@ static const AVOption vp8_options[] = {
> { "auto-alt-ref", "Enable use of alternate reference "
> "frames (2-pass only)",
> OFFSET(auto_alt_ref), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 2, VE},
> { "cpu-used", "Quality/Speed ratio modifier",
> OFFSET(cpu_used), AV_OPT_TYPE_INT, {.i64 = 1}, -16, 16, VE},
> - { "ts-parameters", "Temporal scaling configuration using a "
> - ":-separated list of key=value parameters",
> OFFSET(vp8_ts_parameters), AV_OPT_TYPE_DICT, {.str=NULL}, 0, 0, VE},
> LEGACY_OPTIONS
> { NULL }
> };
> --
> 2.25.0.rc1.283.g88dfdc4193-goog
>
>
Changes from v2:
1. fixed the versioning problem with older versions of libvpx library
2. changed sizeof to use variable rather than type.
3. merged two patches that are dependent on each other.
4. changed documentation to include only related documentation changes.
Thank you,
Wonkap
More information about the ffmpeg-devel
mailing list