[FFmpeg-devel] [PATCH v3 4/5] libavcodec: VAAPI H.265 encoder

Mark Thompson sw at jkqxz.net
Mon Jan 18 23:52:36 CET 2016


---
 configure                   |    1 +
 libavcodec/Makefile         |    1 +
 libavcodec/allcodecs.c      |    1 +
 libavcodec/vaapi_enc_hevc.c | 1625 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1628 insertions(+)
 create mode 100644 libavcodec/vaapi_enc_hevc.c

diff --git a/configure b/configure
index a31d65e..9da8e8b 100755
--- a/configure
+++ b/configure
@@ -2519,6 +2519,7 @@ hevc_dxva2_hwaccel_select="hevc_decoder"
 hevc_qsv_hwaccel_deps="libmfx"
 hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC"
 hevc_vaapi_hwaccel_select="hevc_decoder"
+hevc_vaapi_encoder_deps="vaapi"
 hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC"
 hevc_vdpau_hwaccel_select="hevc_decoder"
 mpeg_vdpau_decoder_deps="vdpau"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 06b3c48..a5e1cab 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -311,6 +311,7 @@ OBJS-$(CONFIG_HEVC_DECODER)            += hevc.o hevc_mvs.o hevc_ps.o hevc_sei.o
                                           hevcdsp.o hevc_filter.o hevc_parse.o hevc_data.o
 OBJS-$(CONFIG_HEVC_QSV_DECODER)        += qsvdec_h2645.o
 OBJS-$(CONFIG_HEVC_QSV_ENCODER)        += qsvenc_hevc.o hevc_ps_enc.o hevc_parse.o
+OBJS-$(CONFIG_HEVC_VAAPI_ENCODER)      += vaapi_enc_hevc.o
 OBJS-$(CONFIG_HNM4_VIDEO_DECODER)      += hnm4video.o
 OBJS-$(CONFIG_HQ_HQA_DECODER)          += hq_hqa.o hq_hqadata.o hq_hqadsp.o \
                                           canopus.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 0d07087..a25da5b 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -203,6 +203,7 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (HAP,               hap);
     REGISTER_DECODER(HEVC,              hevc);
     REGISTER_DECODER(HEVC_QSV,          hevc_qsv);
+    REGISTER_ENCODER(HEVC_VAAPI,        hevc_vaapi);
     REGISTER_DECODER(HNM4_VIDEO,        hnm4_video);
     REGISTER_DECODER(HQ_HQA,            hq_hqa);
     REGISTER_DECODER(HQX,               hqx);
diff --git a/libavcodec/vaapi_enc_hevc.c b/libavcodec/vaapi_enc_hevc.c
new file mode 100644
index 0000000..885f1c8
--- /dev/null
+++ b/libavcodec/vaapi_enc_hevc.c
@@ -0,0 +1,1625 @@
+/*
+ * VAAPI H.265 encoder.
+ *
+ * Copyright (C) 2016 Mark Thompson <mrt at jkqxz.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "golomb.h"
+#include "put_bits.h"
+
+#include "hevc.h"
+
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/vaapi.h"
+
+#define MAX_DPB_PICS     16
+#define INPUT_PICS       2
+
+#define bool unsigned char
+#define MAX_ST_REF_PIC_SETS  32
+#define MAX_LAYERS            1
+
+
+// This structure contains all possibly-useful per-sequence syntax elements
+// which are not already contained in the various VAAPI structures.
+typedef struct VAAPIHEVCEncodeMiscSequenceParams {
+
+    // Parameter set IDs.
+    unsigned int video_parameter_set_id;
+    unsigned int seq_parameter_set_id;
+
+    // Layering.
+    unsigned int vps_max_layers_minus1;
+    unsigned int vps_max_sub_layers_minus1;
+    bool vps_temporal_id_nesting_flag;
+    unsigned int vps_max_layer_id;
+    unsigned int vps_num_layer_sets_minus1;
+    unsigned int sps_max_sub_layers_minus1;
+    bool sps_temporal_id_nesting_flag;
+    bool layer_id_included_flag[MAX_LAYERS][64];
+
+    // Profile/tier/level parameters.
+    bool general_profile_compatibility_flag[32];
+    bool general_progressive_source_flag;
+    bool general_interlaced_source_flag;
+    bool general_non_packed_constraint_flag;
+    bool general_frame_only_constraint_flag;
+    bool general_inbld_flag;
+
+    // Decode/display ordering parameters.
+    unsigned int log2_max_pic_order_cnt_lsb_minus4;
+    bool vps_sub_layer_ordering_info_present_flag;
+    unsigned int vps_max_dec_pic_buffering_minus1[MAX_LAYERS];
+    unsigned int vps_max_num_reorder_pics[MAX_LAYERS];
+    unsigned int vps_max_latency_increase_plus1[MAX_LAYERS];
+    bool sps_sub_layer_ordering_info_present_flag;
+    unsigned int sps_max_dec_pic_buffering_minus1[MAX_LAYERS];
+    unsigned int sps_max_num_reorder_pics[MAX_LAYERS];
+    unsigned int sps_max_latency_increase_plus1[MAX_LAYERS];
+
+    // Timing information.
+    bool vps_timing_info_present_flag;
+    unsigned int vps_num_units_in_tick;
+    unsigned int vps_time_scale;
+    bool vps_poc_proportional_to_timing_flag;
+    unsigned int vps_num_ticks_poc_diff_minus1;
+
+    // Cropping information.
+    bool conformance_window_flag;
+    unsigned int conf_win_left_offset;
+    unsigned int conf_win_right_offset;
+    unsigned int conf_win_top_offset;
+    unsigned int conf_win_bottom_offset;
+
+    // Short-term reference picture sets.
+    unsigned int num_short_term_ref_pic_sets;
+    struct {
+        unsigned int num_negative_pics;
+        unsigned int num_positive_pics;
+
+        unsigned int delta_poc_s0_minus1[MAX_DPB_PICS];
+        bool used_by_curr_pic_s0_flag[MAX_DPB_PICS];
+
+        unsigned int delta_poc_s1_minus1[MAX_DPB_PICS];
+        bool used_by_curr_pic_s1_flag[MAX_DPB_PICS];
+    } st_ref_pic_set[MAX_ST_REF_PIC_SETS];
+
+    // Long-term reference pictures.
+    bool long_term_ref_pics_present_flag;
+    unsigned int num_long_term_ref_pics_sps;
+    struct {
+        unsigned int lt_ref_pic_poc_lsb_sps;
+        bool used_by_curr_pic_lt_sps_flag;
+    } lt_ref_pic;
+
+    // Deblocking filter control.
+    bool deblocking_filter_control_present_flag;
+    bool deblocking_filter_override_enabled_flag;
+    bool pps_deblocking_filter_disabled_flag;
+    int pps_beta_offset_div2;
+    int pps_tc_offset_div2;
+
+    // Video Usability Information.
+    bool vui_parameters_present_flag;
+    bool aspect_ratio_info_present_flag;
+    unsigned int aspect_ratio_idc;
+    unsigned int sar_width;
+    unsigned int sar_height;
+    bool video_signal_type_present_flag;
+    unsigned int video_format;
+    bool video_full_range_flag;
+    bool colour_description_present_flag;
+    unsigned int colour_primaries;
+    unsigned int transfer_characteristics;
+    unsigned int matrix_coeffs;
+
+    // Oddments.
+    bool uniform_spacing_flag;
+    bool output_flag_present_flag;
+    bool cabac_init_present_flag;
+    unsigned int num_extra_slice_header_bits;
+    bool lists_modification_present_flag;
+    bool pps_slice_chroma_qp_offsets_present_flag;
+    bool pps_slice_chroma_offset_list_enabled_flag;
+
+} VAAPIHEVCEncodeMiscSequenceParams;
+
+// This structure contains all possibly-useful per-slice syntax elements
+// which are not already contained in the various VAAPI structures.
+typedef struct {
+    // Slice segments.
+    bool first_slice_segment_in_pic_flag;
+    unsigned int slice_segment_address;
+
+    // Short-term reference picture sets.
+    bool short_term_ref_pic_set_sps_flag;
+    unsigned int short_term_ref_pic_idx;
+
+    // Deblocking filter.
+    bool deblocking_filter_override_flag;
+
+    // Oddments.
+    bool slice_reserved_flag[8];
+    bool no_output_of_prior_pics_flag;
+    bool pic_output_flag;
+
+} VAAPIHEVCEncodeMiscPictureParams;
+
+typedef struct VAAPIHEVCEncodeFrame {
+    AVFrame avframe;
+    VASurfaceID surface_id;
+
+    int poc;
+    enum {
+        FRAME_TYPE_I = I_SLICE,
+        FRAME_TYPE_P = P_SLICE,
+        FRAME_TYPE_B = B_SLICE,
+    } type;
+
+    VAPictureHEVC pic;
+
+    VAEncPictureParameterBufferHEVC pic_params;
+    VABufferID pic_params_id;
+
+    VAEncSliceParameterBufferHEVC slice_params;
+    VABufferID slice_params_id;
+
+    VAAPIHEVCEncodeMiscPictureParams misc_params;
+
+    VABufferID coded_data_id;
+
+    struct VAAPIHEVCEncodeFrame *refa, *refb;
+} VAAPIHEVCEncodeFrame;
+
+typedef struct VAAPIHEVCEncodeContext {
+    const AVClass *class;
+    const AVCodecContext *avctx;
+
+    AVVAAPIHardwareContext *hardware_context;
+    AVVAAPIPipelineConfig va_config;
+    AVVAAPIPipelineContext va_codec;
+
+    int input_is_vaapi;
+    AVVAAPISurfaceConfig input_config;
+    AVVAAPISurfaceConfig output_config;
+
+    VAProfile va_profile;
+    int level;
+    int rc_mode;
+    int fixed_qp;
+
+    int input_width;
+    int input_height;
+
+    int aligned_width;
+    int aligned_height;
+    int ctu_width;
+    int ctu_height;
+
+    VAEncSequenceParameterBufferHEVC seq_params;
+    VABufferID seq_params_id;
+
+    VAEncMiscParameterRateControl rc_params;
+    VAEncMiscParameterBuffer rc_params_buffer;
+    VABufferID rc_params_id;
+
+    VAEncPictureParameterBufferHEVC pic_params;
+    VABufferID pic_params_id;
+
+    VAAPIHEVCEncodeMiscSequenceParams misc_params;
+
+    int poc;
+
+    VAAPIHEVCEncodeFrame dpb[MAX_DPB_PICS];
+    int current_frame;
+    int previous_frame;
+
+    struct {
+        int64_t hardware_context;
+
+        const char *profile;
+        const char *level;
+        int qp;
+        int idr_interval;
+    } options;
+
+} VAAPIHEVCEncodeContext;
+
+
+// Set to 1 to log a full trace of all bitstream output (debugging only).
+#if 0
+static void trace_hevc_write_u(PutBitContext *s, unsigned int width,
+                               unsigned int value, const char *name)
+{
+    av_log(0, AV_LOG_INFO, "H.265 bitstream [%3d]:  %4u  u(%u) / %s\n",
+           put_bits_count(s), value, width, name);
+    put_bits(s, width, value);
+}
+static void trace_hevc_write_ue(PutBitContext *s,
+                                unsigned int value, const char *name)
+{
+    av_log(0, AV_LOG_INFO, "H.265 bitstream [%3d]:  %4u  ue(v) / %s\n",
+           put_bits_count(s), value, name);
+    set_ue_golomb(s, value);
+}
+static void trace_hevc_write_se(PutBitContext *s,
+                                int value, const char *name)
+{
+    av_log(0, AV_LOG_INFO, "H.265 bitstream [%3d]:  %+4d  se(v) / %s\n",
+           put_bits_count(s), value, name);
+    set_se_golomb(s, value);
+}
+
+#define hevc_write_u(pbc, width, value, name) \
+    trace_hevc_write_u(pbc, width, value, #name)
+#define hevc_write_ue(pbc, value, name) \
+    trace_hevc_write_ue(pbc, value, #name)
+#define hevc_write_se(pbc, value, name) \
+    trace_hevc_write_se(pbc, value, #name)
+#else
+#define hevc_write_u(pbc, width, value, name) put_bits(pbc, width, value)
+#define hevc_write_ue(pbc, value, name) set_ue_golomb(pbc, value)
+#define hevc_write_se(pbc, value, name) set_se_golomb(pbc, value)
+#endif
+
+#define u(width, ...) hevc_write_u(s, width, __VA_ARGS__)
+#define ue(...) hevc_write_ue(s, __VA_ARGS__)
+#define se(...) hevc_write_se(s, __VA_ARGS__)
+
+#define seq_var(name)     seq->name, name
+#define seq_field(name)   seq->seq_fields.bits.name, name
+#define pic_var(name)     pic->name, name
+#define pic_field(name)   pic->pic_fields.bits.name, name
+#define slice_var(name)   slice->name, name
+#define slice_field(name) slice->slice_fields.bits.name, name
+#define misc_var(name)    misc->name, name
+#define miscs_var(name)   miscs->name, name
+
+static void vaapi_hevc_write_nal_unit_header(PutBitContext *s,
+                                             int nal_unit_type)
+{
+    u(1, 0, forbidden_zero_bit);
+    u(6, nal_unit_type, nal_unit_type);
+    u(6, 0, nuh_layer_id);
+    u(3, 1, nuh_temporal_id_plus1);
+}
+
+static void vaapi_hevc_write_rbsp_trailing_bits(PutBitContext *s)
+{
+    u(1, 1, rbsp_stop_one_bit);
+    while(put_bits_count(s) & 7)
+        u(1, 0, rbsp_alignment_zero_bit);
+}
+
+static void vaapi_hevc_write_profile_tier_level(PutBitContext *s,
+                                                VAAPIHEVCEncodeContext *ctx)
+{
+    VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params;
+    VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+    int j;
+
+    if(1) {
+        u(2, 0, general_profile_space);
+        u(1, seq->general_tier_flag, general_tier_flag);
+        u(5, seq->general_profile_idc, general_profile_idc);
+
+        for(j = 0; j < 32; j++) {
+            u(1, misc_var(general_profile_compatibility_flag[j]));
+        }
+
+        u(1, misc_var(general_progressive_source_flag));
+        u(1, misc_var(general_interlaced_source_flag));
+        u(1, misc_var(general_non_packed_constraint_flag));
+        u(1, misc_var(general_frame_only_constraint_flag));
+
+        if(0) {
+            // Not main profile.
+            // Lots of extra constraint flags.
+        } else {
+            // put_bits only handles up to 31 bits.
+            u(23, 0, general_reserved_zero_43bits);
+            u(20, 0, general_reserved_zero_43bits);
+        }
+
+        if(seq->general_profile_idc >= 1 && seq->general_profile_idc <= 5) {
+            u(1, misc_var(general_inbld_flag));
+        } else {
+            u(1, 0, general_reserved_zero_bit);
+        }
+    }
+
+    u(8, seq->general_level_idc, general_level_idc);
+
+    // No sublayers.
+}
+
+static void vaapi_hevc_write_vps(PutBitContext *s,
+                                 VAAPIHEVCEncodeContext *ctx)
+{
+    VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+    int i, j;
+
+    vaapi_hevc_write_nal_unit_header(s, NAL_VPS);
+
+    u(4, misc->video_parameter_set_id, vps_video_parameter_set_id);
+
+    u(1, 1, vps_base_layer_internal_flag);
+    u(1, 1, vps_base_layer_available_flag);
+    u(6, misc_var(vps_max_layers_minus1));
+    u(3, misc_var(vps_max_sub_layers_minus1));
+    u(1, misc_var(vps_temporal_id_nesting_flag));
+
+    u(16, 0xffff, vps_reserved_0xffff_16bits);
+
+    vaapi_hevc_write_profile_tier_level(s, ctx);
+
+    u(1, misc_var(vps_sub_layer_ordering_info_present_flag));
+    for(i = (misc->vps_sub_layer_ordering_info_present_flag ?
+             0 : misc->vps_max_sub_layers_minus1);
+        i <= misc->vps_max_sub_layers_minus1; i++) {
+        ue(misc_var(vps_max_dec_pic_buffering_minus1[i]));
+        ue(misc_var(vps_max_num_reorder_pics[i]));
+        ue(misc_var(vps_max_latency_increase_plus1[i]));
+    }
+
+    u(6, misc_var(vps_max_layer_id));
+    ue(misc_var(vps_num_layer_sets_minus1));
+    for(i = 1; i <= misc->vps_num_layer_sets_minus1; i++) {
+        for(j = 0; j < misc->vps_max_layer_id; j++)
+            u(1, misc_var(layer_id_included_flag[i][j]));
+    }
+
+    u(1, misc_var(vps_timing_info_present_flag));
+    if(misc->vps_timing_info_present_flag) {
+        u(1, 0, put_bits_hack_zero_bit);
+        u(31, misc_var(vps_num_units_in_tick));
+        u(1, 0, put_bits_hack_zero_bit);
+        u(31, misc_var(vps_time_scale));
+        u(1, misc_var(vps_poc_proportional_to_timing_flag));
+        if(misc->vps_poc_proportional_to_timing_flag) {
+            ue(misc_var(vps_num_ticks_poc_diff_minus1));
+        }
+        ue(0, vps_num_hrd_parameters);
+    }
+
+    u(1, 0, vps_extension_flag);
+
+    vaapi_hevc_write_rbsp_trailing_bits(s);
+}
+
+static void vaapi_hevc_write_st_ref_pic_set(PutBitContext *s,
+                                            VAAPIHEVCEncodeContext *ctx,
+                                            int st_rps_idx)
+{
+    VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+#define strps_var(name) misc->st_ref_pic_set[st_rps_idx].name, name
+    int i;
+
+    if(st_rps_idx != 0)
+       u(1, 0, inter_ref_pic_set_prediction_flag);
+
+    if(0) {
+        // Inter ref pic set prediction.
+    } else {
+        ue(strps_var(num_negative_pics));
+        ue(strps_var(num_positive_pics));
+
+        for(i = 0; i <
+                misc->st_ref_pic_set[st_rps_idx].num_negative_pics; i++) {
+            ue(strps_var(delta_poc_s0_minus1[i]));
+            u(1, strps_var(used_by_curr_pic_s0_flag[i]));
+        }
+        for(i = 0; i <
+                misc->st_ref_pic_set[st_rps_idx].num_positive_pics; i++) {
+            ue(strps_var(delta_poc_s1_minus1[i]));
+            u(1, strps_var(used_by_curr_pic_s1_flag[i]));
+        }
+    }
+}
+
+static void vaapi_hevc_write_vui_parameters(PutBitContext *s,
+                                            VAAPIHEVCEncodeContext *ctx)
+{
+    VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+
+    u(1, misc_var(aspect_ratio_info_present_flag));
+    if(misc->aspect_ratio_info_present_flag) {
+        u(8, misc_var(aspect_ratio_idc));
+        if(misc->aspect_ratio_idc == 255) {
+            u(16, misc_var(sar_width));
+            u(16, misc_var(sar_height));
+        }
+    }
+
+    u(1, 0, overscan_info_present_flag);
+
+    u(1, misc_var(video_signal_type_present_flag));
+    if(misc->video_signal_type_present_flag) {
+        u(3, misc_var(video_format));
+        u(1, misc_var(video_full_range_flag));
+        u(1, misc_var(colour_description_present_flag));
+        if(misc->colour_description_present_flag) {
+            u(8, misc_var(colour_primaries));
+            u(8, misc_var(transfer_characteristics));
+            u(8, misc_var(matrix_coeffs));
+        }
+    }
+
+    u(1, 0, chroma_loc_info_present_flag);
+    u(1, 0, neutral_chroma_indication_flag);
+    u(1, 0, field_seq_flag);
+    u(1, 0, frame_field_info_present_flag);
+    u(1, 0, default_display_window_flag);
+    u(1, 0, vui_timing_info_present_flag);
+    u(1, 0, bitstream_restriction_flag_flag);
+}
+
+static void vaapi_hevc_write_sps(PutBitContext *s,
+                                 VAAPIHEVCEncodeContext *ctx)
+{
+    VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params;
+    VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+    int i;
+
+    vaapi_hevc_write_nal_unit_header(s, NAL_SPS);
+
+    u(4, misc->video_parameter_set_id, sps_video_parameter_set_id);
+
+    u(3, misc_var(sps_max_sub_layers_minus1));
+    u(1, misc_var(sps_temporal_id_nesting_flag));
+
+    vaapi_hevc_write_profile_tier_level(s, ctx);
+
+    ue(misc->seq_parameter_set_id, sps_seq_parameter_set_id);
+    ue(seq_field(chroma_format_idc));
+    if(seq->seq_fields.bits.chroma_format_idc == 3)
+        u(1, 0, separate_colour_plane_flag);
+
+    ue(seq_var(pic_width_in_luma_samples));
+    ue(seq_var(pic_height_in_luma_samples));
+
+    u(1, misc_var(conformance_window_flag));
+    if(misc->conformance_window_flag) {
+        ue(misc_var(conf_win_left_offset));
+        ue(misc_var(conf_win_right_offset));
+        ue(misc_var(conf_win_top_offset));
+        ue(misc_var(conf_win_bottom_offset));
+    }
+
+    ue(seq_field(bit_depth_luma_minus8));
+    ue(seq_field(bit_depth_chroma_minus8));
+
+    ue(misc_var(log2_max_pic_order_cnt_lsb_minus4));
+
+    u(1, misc_var(sps_sub_layer_ordering_info_present_flag));
+    for(i = (misc->sps_sub_layer_ordering_info_present_flag ?
+             0 : misc->sps_max_sub_layers_minus1);
+        i <= misc->sps_max_sub_layers_minus1; i++) {
+        ue(misc_var(sps_max_dec_pic_buffering_minus1[i]));
+        ue(misc_var(sps_max_num_reorder_pics[i]));
+        ue(misc_var(sps_max_latency_increase_plus1[i]));
+    }
+
+    ue(seq_var(log2_min_luma_coding_block_size_minus3));
+    ue(seq_var(log2_diff_max_min_luma_coding_block_size));
+    ue(seq_var(log2_min_transform_block_size_minus2));
+    ue(seq_var(log2_diff_max_min_transform_block_size));
+    ue(seq_var(max_transform_hierarchy_depth_inter));
+    ue(seq_var(max_transform_hierarchy_depth_intra));
+
+    u(1, seq_field(scaling_list_enabled_flag));
+    if(seq->seq_fields.bits.scaling_list_enabled_flag) {
+        u(1, 0, sps_scaling_list_data_present_flag);
+    }
+
+    u(1, seq_field(amp_enabled_flag));
+    u(1, seq_field(sample_adaptive_offset_enabled_flag));
+
+    u(1, seq_field(pcm_enabled_flag));
+    if(seq->seq_fields.bits.pcm_enabled_flag) {
+        u(4, seq_var(pcm_sample_bit_depth_luma_minus1));
+        u(4, seq_var(pcm_sample_bit_depth_chroma_minus1));
+        ue(seq_var(log2_min_pcm_luma_coding_block_size_minus3));
+        ue(seq->log2_max_pcm_luma_coding_block_size_minus3 -
+           seq->log2_min_pcm_luma_coding_block_size_minus3,
+           log2_diff_max_min_pcm_luma_coding_block_size);
+        u(1, seq_field(pcm_loop_filter_disabled_flag));
+    }
+
+    ue(misc_var(num_short_term_ref_pic_sets));
+    for(i = 0; i < misc->num_short_term_ref_pic_sets; i++)
+        vaapi_hevc_write_st_ref_pic_set(s, ctx, i);
+
+    u(1, misc_var(long_term_ref_pics_present_flag));
+    if(misc->long_term_ref_pics_present_flag) {
+        ue(0, num_long_term_ref_pics_sps);
+    }
+
+    u(1, seq_field(sps_temporal_mvp_enabled_flag));
+    u(1, seq_field(strong_intra_smoothing_enabled_flag));
+
+    u(1, misc_var(vui_parameters_present_flag));
+    if(misc->vui_parameters_present_flag) {
+        vaapi_hevc_write_vui_parameters(s, ctx);
+    }
+
+    u(1, 0, sps_extension_present_flag);
+
+    vaapi_hevc_write_rbsp_trailing_bits(s);
+}
+
+static void vaapi_hevc_write_pps(PutBitContext *s,
+                                 VAAPIHEVCEncodeContext *ctx)
+{
+    VAEncPictureParameterBufferHEVC *pic = &ctx->pic_params;
+    VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+    int i;
+
+    vaapi_hevc_write_nal_unit_header(s, NAL_PPS);
+
+    ue(pic->slice_pic_parameter_set_id, pps_pic_parameter_set_id);
+    ue(misc->seq_parameter_set_id, pps_seq_parameter_set_id);
+
+    u(1, pic_field(dependent_slice_segments_enabled_flag));
+    u(1, misc_var(output_flag_present_flag));
+    u(3, misc_var(num_extra_slice_header_bits));
+    u(1, pic_field(sign_data_hiding_enabled_flag));
+    u(1, misc_var(cabac_init_present_flag));
+
+    ue(pic_var(num_ref_idx_l0_default_active_minus1));
+    ue(pic_var(num_ref_idx_l1_default_active_minus1));
+
+    se(pic->pic_init_qp - 26, init_qp_minus26);
+
+    u(1, pic_field(constrained_intra_pred_flag));
+    u(1, pic_field(transform_skip_enabled_flag));
+
+    u(1, pic_field(cu_qp_delta_enabled_flag));
+    if(pic->pic_fields.bits.cu_qp_delta_enabled_flag)
+        ue(pic_var(diff_cu_qp_delta_depth));
+
+    se(pic_var(pps_cb_qp_offset));
+    se(pic_var(pps_cr_qp_offset));
+
+    u(1, misc_var(pps_slice_chroma_qp_offsets_present_flag));
+    u(1, pic_field(weighted_pred_flag));
+    u(1, pic_field(weighted_bipred_flag));
+    u(1, pic_field(transquant_bypass_enabled_flag));
+    u(1, pic_field(tiles_enabled_flag));
+    u(1, pic_field(entropy_coding_sync_enabled_flag));
+
+    if(pic->pic_fields.bits.tiles_enabled_flag) {
+        ue(pic_var(num_tile_columns_minus1));
+        ue(pic_var(num_tile_rows_minus1));
+        u(1, misc_var(uniform_spacing_flag));
+        if(!misc->uniform_spacing_flag) {
+            for(i = 0; i < pic->num_tile_columns_minus1; i++)
+                ue(pic_var(column_width_minus1[i]));
+            for(i = 0; i < pic->num_tile_rows_minus1; i++)
+                ue(pic_var(row_height_minus1[i]));
+        }
+        u(1, pic_field(loop_filter_across_tiles_enabled_flag));
+    }
+
+    u(1, pic_field(pps_loop_filter_across_slices_enabled_flag));
+    u(1, misc_var(deblocking_filter_control_present_flag));
+    if(misc->deblocking_filter_control_present_flag) {
+        u(1, misc_var(deblocking_filter_override_enabled_flag));
+        u(1, misc_var(pps_deblocking_filter_disabled_flag));
+        if(!misc->pps_deblocking_filter_disabled_flag) {
+            se(misc_var(pps_beta_offset_div2));
+            se(misc_var(pps_tc_offset_div2));
+        }
+    }
+
+    u(1, 0, pps_scaling_list_data_present_flag);
+    // No scaling list data.
+
+    u(1, misc_var(lists_modification_present_flag));
+    ue(pic_var(log2_parallel_merge_level_minus2));
+    u(1, 0, slice_segment_header_extension_present_flag);
+    u(1, 0, pps_extension_present_flag);
+
+    vaapi_hevc_write_rbsp_trailing_bits(s);
+}
+
+static void vaapi_hevc_write_slice_header(PutBitContext *s,
+                                          VAAPIHEVCEncodeContext *ctx,
+                                          VAAPIHEVCEncodeFrame *current)
+{
+    VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params;
+    VAEncPictureParameterBufferHEVC *pic = &current->pic_params;
+    VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+    VAEncSliceParameterBufferHEVC *slice = &current->slice_params;
+    VAAPIHEVCEncodeMiscPictureParams *miscs = &current->misc_params;
+    int i;
+
+    vaapi_hevc_write_nal_unit_header(s, pic->nal_unit_type);
+
+    u(1, miscs_var(first_slice_segment_in_pic_flag));
+    if(pic->nal_unit_type >= NAL_BLA_W_LP &&
+       pic->nal_unit_type <= 23)
+        u(1, miscs_var(no_output_of_prior_pics_flag));
+
+    ue(slice_var(slice_pic_parameter_set_id));
+
+    if(!miscs->first_slice_segment_in_pic_flag) {
+        if(pic->pic_fields.bits.dependent_slice_segments_enabled_flag)
+            u(1, slice_field(dependent_slice_segment_flag));
+        u(av_log2((ctx->ctu_width * ctx->ctu_height) - 1) + 1,
+          miscs_var(slice_segment_address));
+    }
+    if(!slice->slice_fields.bits.dependent_slice_segment_flag) {
+        for(i = 0; i < misc->num_extra_slice_header_bits; i++)
+            u(1, miscs_var(slice_reserved_flag[i]));
+
+        ue(slice_var(slice_type));
+        if(misc->output_flag_present_flag)
+            u(1, 1, pic_output_flag);
+        if(seq->seq_fields.bits.separate_colour_plane_flag)
+            u(2, slice_field(colour_plane_id));
+        if(pic->nal_unit_type != NAL_IDR_W_RADL &&
+           pic->nal_unit_type != NAL_IDR_N_LP) {
+            u(4 + misc->log2_max_pic_order_cnt_lsb_minus4,
+              current->poc & ((1 << (misc->log2_max_pic_order_cnt_lsb_minus4 + 4)) - 1),
+              slice_pic_order_cnt_lsb);
+
+            u(1, miscs_var(short_term_ref_pic_set_sps_flag));
+            if(!miscs->short_term_ref_pic_set_sps_flag) {
+                av_assert0(0);
+                // vaapi_hevc_write_st_ref_pic_set(ctx->num_short_term_ref_pic_sets);
+            } else if(misc->num_short_term_ref_pic_sets > 1) {
+                u(av_log2(misc->num_short_term_ref_pic_sets - 1) + 1,
+                  miscs_var(short_term_ref_pic_idx));
+            }
+
+            if(misc->long_term_ref_pics_present_flag) {
+                av_assert0(0);
+            }
+
+            if(seq->seq_fields.bits.sps_temporal_mvp_enabled_flag) {
+                u(1, slice_field(slice_temporal_mvp_enabled_flag));
+            }
+
+            if(seq->seq_fields.bits.sample_adaptive_offset_enabled_flag) {
+                u(1, slice_field(slice_sao_luma_flag));
+                if(!seq->seq_fields.bits.separate_colour_plane_flag &&
+                   seq->seq_fields.bits.chroma_format_idc != 0) {
+                    u(1, slice_field(slice_sao_chroma_flag));
+                }
+            }
+
+            if(slice->slice_type == P_SLICE || slice->slice_type == B_SLICE) {
+                u(1, slice_field(num_ref_idx_active_override_flag));
+                if(slice->slice_fields.bits.num_ref_idx_active_override_flag) {
+                    ue(slice_var(num_ref_idx_l0_active_minus1));
+                    if(slice->slice_type == B_SLICE) {
+                        ue(slice_var(num_ref_idx_l1_active_minus1));
+                    }
+                }
+
+                if(misc->lists_modification_present_flag) {
+                    av_assert0(0);
+                    // ref_pic_lists_modification()
+                }
+                if(slice->slice_type == B_SLICE) {
+                    u(1, slice_field(mvd_l1_zero_flag));
+                }
+                if(misc->cabac_init_present_flag) {
+                    u(1, slice_field(cabac_init_flag));
+                }
+                if(slice->slice_fields.bits.slice_temporal_mvp_enabled_flag) {
+                    if(slice->slice_type == B_SLICE)
+                        u(1, slice_field(collocated_from_l0_flag));
+                    ue(pic->collocated_ref_pic_index, collocated_ref_idx);
+                }
+                if((pic->pic_fields.bits.weighted_pred_flag &&
+                    slice->slice_type == P_SLICE) ||
+                   (pic->pic_fields.bits.weighted_bipred_flag &&
+                    slice->slice_type == B_SLICE)) {
+                    ue(5 - slice->max_num_merge_cand, five_minus_max_num_merge_cand);
+                }
+            }
+
+            se(slice_var(slice_qp_delta));
+            if(misc->pps_slice_chroma_qp_offsets_present_flag) {
+                se(slice_var(slice_cb_qp_offset));
+                se(slice_var(slice_cr_qp_offset));
+            }
+            if(misc->pps_slice_chroma_offset_list_enabled_flag) {
+                u(1, 0, cu_chroma_qp_offset_enabled_flag);
+            }
+            if(misc->deblocking_filter_override_enabled_flag) {
+                u(1, miscs_var(deblocking_filter_override_flag));
+            }
+            if(miscs->deblocking_filter_override_flag) {
+                u(1, slice_field(slice_deblocking_filter_disabled_flag));
+                if(!slice->slice_fields.bits.slice_deblocking_filter_disabled_flag) {
+                    se(slice_var(slice_beta_offset_div2));
+                    se(slice_var(slice_tc_offset_div2));
+                }
+            }
+            if(pic->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag &&
+               (slice->slice_fields.bits.slice_sao_luma_flag ||
+                slice->slice_fields.bits.slice_sao_chroma_flag ||
+                slice->slice_fields.bits.slice_deblocking_filter_disabled_flag)) {
+                u(1, slice_field(slice_loop_filter_across_slices_enabled_flag));
+            }
+        }
+
+        if(pic->pic_fields.bits.tiles_enabled_flag ||
+           pic->pic_fields.bits.entropy_coding_sync_enabled_flag) {
+            // num_entry_point_offsets
+        }
+
+        if(0) {
+            // slice_segment_header_extension_length
+        }
+    }
+
+    u(1, 1, alignment_bit_equal_to_one);
+    while(put_bits_count(s) & 7)
+        u(1, 0, alignment_bit_equal_to_zero);
+}
+
+static size_t vaapi_hevc_nal_unit_to_byte_stream(uint8_t *dst, uint8_t *src, size_t len)
+{
+    size_t dp, sp;
+    int zero_run = 0;
+
+    // Start code.
+    dst[0] = dst[1] = dst[2] = 0;
+    dst[3] = 1;
+    dp = 4;
+
+    for(sp = 0; sp < len; sp++) {
+        if(zero_run < 2) {
+            if(src[sp] == 0)
+                ++zero_run;
+            else
+                zero_run = 0;
+        } else {
+            if((src[sp] & ~3) == 0) {
+                // emulation_prevention_three_byte
+                dst[dp++] = 3;
+            }
+            zero_run = src[sp] == 0;
+        }
+        dst[dp++] = src[sp];
+    }
+
+    return dp;
+}
+
+static int vaapi_hevc_render_packed_header(VAAPIHEVCEncodeContext *ctx, int type,
+                                           char *data, size_t bit_len)
+{
+    VAStatus vas;
+    VABufferID id_list[2];
+    VAEncPackedHeaderParameterBuffer buffer = {
+        .type = type,
+        .bit_length = bit_len,
+        .has_emulation_bytes = 1,
+    };
+
+    vas = vaCreateBuffer(ctx->hardware_context->display, ctx->va_codec.context_id,
+                         VAEncPackedHeaderParameterBufferType,
+                         sizeof(&buffer), 1, &buffer, &id_list[0]);
+    if(vas != VA_STATUS_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create parameter buffer for packed "
+               "header (type %d): %d (%s).\n", type, vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+
+    vas = vaCreateBuffer(ctx->hardware_context->display, ctx->va_codec.context_id,
+                         VAEncPackedHeaderDataBufferType,
+                         (bit_len + 7) / 8, 1, data, &id_list[1]);
+    if(vas != VA_STATUS_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create data buffer for packed "
+               "header (type %d): %d (%s).\n", type, vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+
+    av_log(ctx, AV_LOG_DEBUG, "Packed header buffer (%d) is %#x/%#x "
+           "(%zu bits).\n", type, id_list[0], id_list[1], bit_len);
+
+    vas = vaRenderPicture(ctx->hardware_context->display, ctx->va_codec.context_id,
+                          id_list, 2);
+    if(vas != VA_STATUS_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to render packed "
+               "header (type %d): %d (%s).\n", type, vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+
+    return 0;
+}
+
+static int vaapi_hevc_render_packed_vps_sps(VAAPIHEVCEncodeContext *ctx)
+{
+    PutBitContext pbc, *s = &pbc;
+    uint8_t tmp[256];
+    uint8_t buf[512];
+    size_t byte_len, nal_len;
+
+    init_put_bits(s, tmp, sizeof(tmp));
+    vaapi_hevc_write_vps(s, ctx);
+    nal_len = put_bits_count(s);
+    flush_put_bits(s);
+    byte_len = vaapi_hevc_nal_unit_to_byte_stream(buf, tmp, nal_len / 8);
+
+    init_put_bits(s, tmp, sizeof(tmp));
+    vaapi_hevc_write_sps(s, ctx);
+    nal_len = put_bits_count(s);
+    flush_put_bits(s);
+    byte_len += vaapi_hevc_nal_unit_to_byte_stream(buf + byte_len, tmp, nal_len / 8);
+
+    return vaapi_hevc_render_packed_header(ctx, VAEncPackedHeaderSequence,
+                                           buf, byte_len * 8);
+}
+
+static int vaapi_hevc_render_packed_pps(VAAPIHEVCEncodeContext *ctx)
+{
+    PutBitContext pbc, *s = &pbc;
+    uint8_t tmp[256];
+    uint8_t buf[512];
+    size_t byte_len, nal_len;
+
+    init_put_bits(s, tmp, sizeof(tmp));
+    vaapi_hevc_write_pps(s, ctx);
+    nal_len = put_bits_count(s);
+    flush_put_bits(s);
+    byte_len = vaapi_hevc_nal_unit_to_byte_stream(buf, tmp, nal_len / 8);
+
+    return vaapi_hevc_render_packed_header(ctx, VAEncPackedHeaderPicture,
+                                           buf, byte_len * 8);
+}
+
+static int vaapi_hevc_render_packed_slice(VAAPIHEVCEncodeContext *ctx,
+                                          VAAPIHEVCEncodeFrame *current)
+{
+    PutBitContext pbc, *s = &pbc;
+    uint8_t tmp[256];
+    uint8_t buf[512];
+    size_t byte_len, nal_len;
+
+    init_put_bits(s, tmp, sizeof(tmp));
+    vaapi_hevc_write_slice_header(s, ctx, current);
+    nal_len = put_bits_count(s);
+    flush_put_bits(s);
+    byte_len = vaapi_hevc_nal_unit_to_byte_stream(buf, tmp, nal_len / 8);
+
+    return vaapi_hevc_render_packed_header(ctx, VAEncPackedHeaderSlice,
+                                           buf, byte_len * 8);
+}
+
+static int vaapi_hevc_render_sequence(VAAPIHEVCEncodeContext *ctx)
+{
+    VAStatus vas;
+    VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params;
+
+    vas = vaCreateBuffer(ctx->hardware_context->display, ctx->va_codec.context_id,
+                         VAEncSequenceParameterBufferType,
+                         sizeof(*seq), 1, seq, &ctx->seq_params_id);
+    if(vas != VA_STATUS_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create buffer for sequence "
+               "parameters: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+    av_log(ctx, AV_LOG_DEBUG, "Sequence parameter buffer is %#x.\n",
+           ctx->seq_params_id);
+
+    vas = vaRenderPicture(ctx->hardware_context->display, ctx->va_codec.context_id,
+                          &ctx->seq_params_id, 1);
+    if(vas != VA_STATUS_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to send sequence parameters: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+
+    return 0;
+}
+
+static int vaapi_hevc_render_picture(VAAPIHEVCEncodeContext *ctx,
+                                     VAAPIHEVCEncodeFrame *current)
+{
+    VAStatus vas;
+    VAEncPictureParameterBufferHEVC *pic = &current->pic_params;
+
+    vas = vaCreateBuffer(ctx->hardware_context->display, ctx->va_codec.context_id,
+                         VAEncPictureParameterBufferType,
+                         sizeof(*pic), 1, pic, &ctx->pic_params_id);
+    if(vas != VA_STATUS_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create buffer for picture "
+               "parameters: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+    av_log(ctx, AV_LOG_DEBUG, "Picture parameter buffer is %#x.\n",
+           ctx->pic_params_id);
+
+    vas = vaRenderPicture(ctx->hardware_context->display, ctx->va_codec.context_id,
+                          &ctx->pic_params_id, 1);
+    if(vas != VA_STATUS_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to send picture parameters: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+
+    return 0;
+}
+
+static int vaapi_hevc_render_slice(VAAPIHEVCEncodeContext *ctx,
+                                   VAAPIHEVCEncodeFrame *current)
+{
+    VAStatus vas;
+    VAEncSliceParameterBufferHEVC *slice = &current->slice_params;
+
+    vas = vaCreateBuffer(ctx->hardware_context->display, ctx->va_codec.context_id,
+                         VAEncSliceParameterBufferType,
+                         sizeof(*slice), 1, slice, &current->slice_params_id);
+    if(vas != VA_STATUS_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create buffer for slice "
+               "parameters: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+    av_log(ctx, AV_LOG_DEBUG, "Slice buffer is %#x.\n", current->slice_params_id);
+
+    vas = vaRenderPicture(ctx->hardware_context->display, ctx->va_codec.context_id,
+                          &current->slice_params_id, 1);
+    if(vas != VA_STATUS_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to send slice parameters: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+
+    return 0;
+}
+
+static av_cold int vaapi_hevc_encode_init_stream(VAAPIHEVCEncodeContext *ctx)
+{
+    VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params;
+    VAEncPictureParameterBufferHEVC *pic = &ctx->pic_params;
+    VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+    int i;
+
+    memset(seq, 0, sizeof(*seq));
+    memset(pic, 0, sizeof(*pic));
+
+    {
+        // general_profile_space == 0.
+        seq->general_profile_idc = 1; // Main profile.
+        seq->general_tier_flag = 0;
+
+        seq->general_level_idc = ctx->level * 3;
+
+        seq->intra_period = 0;
+        seq->intra_idr_period = 0;
+        seq->ip_period = 0;
+
+        seq->pic_width_in_luma_samples  = ctx->aligned_width;
+        seq->pic_height_in_luma_samples = ctx->aligned_height;
+
+        seq->seq_fields.bits.chroma_format_idc = 1; // 4:2:0.
+        seq->seq_fields.bits.separate_colour_plane_flag = 0;
+        seq->seq_fields.bits.bit_depth_luma_minus8 = 0; // 8-bit luma.
+        seq->seq_fields.bits.bit_depth_chroma_minus8 = 0; // 8-bit chroma.
+        // Other misc flags all zero.
+
+        // These have to come from the capabilities of the encoder.  We have
+        // no way to query it, so just hardcode ones which worked for me...
+        // CTB size from 8x8 to 32x32.
+        seq->log2_min_luma_coding_block_size_minus3 = 0;
+        seq->log2_diff_max_min_luma_coding_block_size = 2;
+        // Transform size from 4x4 to 32x32.
+        seq->log2_min_transform_block_size_minus2 = 0;
+        seq->log2_diff_max_min_transform_block_size = 3;
+        // Full transform hierarchy allowed (2-5).
+        seq->max_transform_hierarchy_depth_inter = 3;
+        seq->max_transform_hierarchy_depth_intra = 3;
+
+        seq->vui_parameters_present_flag = 0;
+    }
+
+    {
+        for(i = 0; i < FF_ARRAY_ELEMS(pic->reference_frames); i++) {
+            pic->reference_frames[i].picture_id = VA_INVALID_ID;
+            pic->reference_frames[i].flags = VA_PICTURE_HEVC_INVALID;
+        }
+
+        pic->collocated_ref_pic_index = 0xff;
+
+        pic->last_picture = 0;
+
+        pic->pic_init_qp = ctx->fixed_qp;
+
+        pic->diff_cu_qp_delta_depth = 0;
+        pic->pps_cb_qp_offset = 0;
+        pic->pps_cr_qp_offset = 0;
+
+        // tiles_enabled_flag == 0, so ignore num_tile_(rows|columns)_minus1.
+
+        pic->log2_parallel_merge_level_minus2 = 0;
+
+        // No limit on size.
+        pic->ctu_max_bitsize_allowed = 0;
+
+        pic->num_ref_idx_l0_default_active_minus1 = 0;
+        pic->num_ref_idx_l1_default_active_minus1 = 0;
+
+        pic->slice_pic_parameter_set_id = 0;
+
+        pic->pic_fields.bits.screen_content_flag = 0;
+        pic->pic_fields.bits.enable_gpu_weighted_prediction = 0;
+
+        //pic->pic_fields.bits.cu_qp_delta_enabled_flag = 1;
+    }
+
+    {
+        misc->video_parameter_set_id = 5;
+        misc->seq_parameter_set_id = 5;
+
+        misc->vps_max_layers_minus1 = 0;
+        misc->vps_max_sub_layers_minus1 = 0;
+        misc->vps_temporal_id_nesting_flag = 1;
+        misc->sps_max_sub_layers_minus1 = 0;
+        misc->sps_temporal_id_nesting_flag = 1;
+
+        for(i = 0; i < 32; i++) {
+            misc->general_profile_compatibility_flag[i] =
+                (i == seq->general_profile_idc);
+        }
+
+        misc->general_progressive_source_flag    = 1;
+        misc->general_interlaced_source_flag     = 0;
+        misc->general_non_packed_constraint_flag = 0;
+        misc->general_frame_only_constraint_flag = 1;
+        misc->general_inbld_flag = 0;
+
+        misc->log2_max_pic_order_cnt_lsb_minus4 = 4;
+        misc->vps_sub_layer_ordering_info_present_flag = 0;
+        misc->vps_max_dec_pic_buffering_minus1[0] = 0;
+        misc->vps_max_num_reorder_pics[0]         = 0;
+        misc->vps_max_latency_increase_plus1[0]   = 0;
+        misc->sps_sub_layer_ordering_info_present_flag = 0;
+        misc->sps_max_dec_pic_buffering_minus1[0] = 0;
+        misc->sps_max_num_reorder_pics[0]         = 0;
+        misc->sps_max_latency_increase_plus1[0]   = 0;
+
+        misc->vps_timing_info_present_flag = 1;
+        misc->vps_num_units_in_tick = ctx->avctx->time_base.num;
+        misc->vps_time_scale        = ctx->avctx->time_base.den;
+        misc->vps_poc_proportional_to_timing_flag = 1;
+        misc->vps_num_ticks_poc_diff_minus1 = 0;
+
+        if(ctx->input_width  != ctx->aligned_width ||
+           ctx->input_height != ctx->aligned_height) {
+            misc->conformance_window_flag = 1;
+            misc->conf_win_left_offset   = 0;
+            misc->conf_win_right_offset  =
+                (ctx->aligned_width - ctx->input_width) / 2;
+            misc->conf_win_top_offset    = 0;
+            misc->conf_win_bottom_offset =
+                (ctx->aligned_height - ctx->input_height) / 2;
+        } else {
+            misc->conformance_window_flag = 0;
+        }
+
+        misc->num_short_term_ref_pic_sets = 1;
+        misc->st_ref_pic_set[0].num_negative_pics = 1;
+        misc->st_ref_pic_set[0].num_positive_pics = 0;
+        misc->st_ref_pic_set[0].delta_poc_s0_minus1[0] = 0;
+        misc->st_ref_pic_set[0].used_by_curr_pic_s0_flag[0] = 1;
+
+        misc->vui_parameters_present_flag = 1;
+        if(ctx->avctx->sample_aspect_ratio.num != 0) {
+            misc->aspect_ratio_info_present_flag = 1;
+            if(ctx->avctx->sample_aspect_ratio.num ==
+               ctx->avctx->sample_aspect_ratio.den) {
+                misc->aspect_ratio_idc = 1;
+            } else {
+                misc->aspect_ratio_idc = 255; // Extended SAR.
+                misc->sar_width  = ctx->avctx->sample_aspect_ratio.num;
+                misc->sar_height = ctx->avctx->sample_aspect_ratio.den;
+            }
+        }
+        if(1) {
+            // Should this be conditional on some of these being set?
+            misc->video_signal_type_present_flag = 1;
+            misc->video_format = 5; // Unspecified.
+            misc->video_full_range_flag = 0;
+            misc->colour_description_present_flag = 1;
+            misc->colour_primaries = ctx->avctx->color_primaries;
+            misc->transfer_characteristics = ctx->avctx->color_trc;
+            misc->matrix_coeffs = ctx->avctx->colorspace;
+        }
+    }
+
+    return 0;
+}
+
+static int vaapi_hevc_encode_init_picture(VAAPIHEVCEncodeContext *ctx,
+                                          VAAPIHEVCEncodeFrame *current)
+{
+    VAEncPictureParameterBufferHEVC *pic = &current->pic_params;
+    VAEncSliceParameterBufferHEVC *slice = &current->slice_params;
+    VAAPIHEVCEncodeMiscPictureParams *misc = &current->misc_params;
+    int idr = current->type == FRAME_TYPE_I;
+
+    memcpy(pic, &ctx->pic_params, sizeof(*pic));
+    memset(slice, 0, sizeof(*slice));
+    memset(misc, 0, sizeof(*misc));
+
+    {
+        memcpy(&pic->decoded_curr_pic, &current->pic, sizeof(VAPictureHEVC));
+
+        if(current->type != FRAME_TYPE_I) {
+            memcpy(&pic->reference_frames[0],
+                   &current->refa->pic, sizeof(VAPictureHEVC));
+        }
+        if(current->type == FRAME_TYPE_B) {
+            memcpy(&pic->reference_frames[1],
+                   &current->refb->pic, sizeof(VAPictureHEVC));
+        }
+
+        pic->coded_buf = current->coded_data_id;
+
+        pic->nal_unit_type = (idr ? NAL_IDR_W_RADL : NAL_TRAIL_R);
+
+        pic->pic_fields.bits.idr_pic_flag = (idr ? 1 : 0);
+        pic->pic_fields.bits.coding_type = (idr ? 1 : 2);
+
+        pic->pic_fields.bits.reference_pic_flag = 1;
+    }
+
+    {
+        slice->slice_segment_address = 0;
+        slice->num_ctu_in_slice = ctx->ctu_width * ctx->ctu_height;
+
+        slice->slice_type = current->type;
+        slice->slice_pic_parameter_set_id = 0;
+
+        slice->num_ref_idx_l0_active_minus1 = 0;
+        slice->num_ref_idx_l1_active_minus1 = 0;
+        memcpy(slice->ref_pic_list0, pic->reference_frames, sizeof(pic->reference_frames));
+        memcpy(slice->ref_pic_list1, pic->reference_frames, sizeof(pic->reference_frames));
+
+        slice->max_num_merge_cand = 5;
+        slice->slice_qp_delta = 0;
+
+        slice->slice_fields.bits.last_slice_of_pic_flag = 1;
+    }
+
+    {
+        misc->first_slice_segment_in_pic_flag = 1;
+
+        misc->short_term_ref_pic_set_sps_flag = 1;
+        misc->short_term_ref_pic_idx = 0;
+    }
+
+    return 0;
+}
+
+static int vaapi_hevc_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
+                                     const AVFrame *pic, int *got_packet)
+{
+    VAAPIHEVCEncodeContext *ctx = avctx->priv_data;
+    AVVAAPISurface *input, *recon;
+    VAAPIHEVCEncodeFrame *current;
+    AVFrame *input_image, *recon_image;
+    VACodedBufferSegment *buf_list, *buf;
+    VAStatus vas;
+    int err;
+
+    av_log(ctx, AV_LOG_DEBUG, "New frame: format %s, size %ux%u.\n",
+           av_get_pix_fmt_name(pic->format), pic->width, pic->height);
+
+    av_vaapi_lock_hardware_context(ctx->hardware_context);
+
+    if(pic->format == AV_PIX_FMT_VAAPI) {
+        input_image = 0;
+        input = (AVVAAPISurface*)pic->buf[0]->data;
+
+    } else {
+        input_image = av_frame_alloc();
+
+        err = ff_vaapi_get_input_surface(&ctx->va_codec, input_image);
+        if(err) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to allocate surface to "
+                   "copy input frame: %d (%s).\n", err, av_err2str(err));
+            goto fail;
+        }
+
+        input = (AVVAAPISurface*)input_image->buf[0]->data;
+
+        err = ff_vaapi_map_surface(input, 0);
+        if(err) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to map input surface: "
+                   "%d (%s).\n", err, av_err2str(err));
+            goto fail;
+        }
+
+        err = ff_vaapi_copy_to_surface(pic, input);
+        if(err) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to copy to input surface: "
+                   "%d (%s).\n", err, av_err2str(err));
+            goto fail;
+        }
+
+        err = ff_vaapi_unmap_surface(input, 1);
+        if(err) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to unmap input surface: "
+                   "%d (%s).\n", err, av_err2str(err));
+            goto fail;
+        }
+    }
+    av_log(ctx, AV_LOG_DEBUG, "Using surface %#x for input image.\n",
+           input->id);
+
+    recon_image = av_frame_alloc();
+
+    err = ff_vaapi_get_output_surface(&ctx->va_codec, recon_image);
+    if(err) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to allocate surface for "
+               "reconstructed frame: %d (%s).\n", err, av_err2str(err));
+        goto fail;
+    }
+    recon = (AVVAAPISurface*)recon_image->buf[0]->data;
+    av_log(ctx, AV_LOG_DEBUG, "Using surface %#x for reconstructed image.\n",
+           recon->id);
+
+    if(ctx->previous_frame != ctx->current_frame) {
+        av_frame_unref(&ctx->dpb[ctx->previous_frame].avframe);
+    }
+
+    ctx->previous_frame = ctx->current_frame;
+    ctx->current_frame = (ctx->current_frame + 1) % MAX_DPB_PICS;
+    {
+        current = &ctx->dpb[ctx->current_frame];
+
+        if(ctx->poc < 0 ||
+           ctx->poc == ctx->options.idr_interval)
+            current->type = FRAME_TYPE_I;
+        else
+            current->type = FRAME_TYPE_P;
+
+        if(current->type == FRAME_TYPE_I)
+            ctx->poc = 0;
+        else
+            ++ctx->poc;
+        current->poc = ctx->poc;
+
+        if(current->type == FRAME_TYPE_I) {
+            current->refa = 0;
+            current->refb = 0;
+        } else if(current->type == FRAME_TYPE_P) {
+            current->refa = &ctx->dpb[ctx->previous_frame];
+            current->refb = 0;
+        } else {
+            av_assert0(0);
+        }
+
+        memset(&current->pic, 0, sizeof(VAPictureHEVC));
+        current->pic.picture_id = recon->id;
+        current->pic.pic_order_cnt = current->poc;
+
+        memcpy(&current->avframe, recon_image, sizeof(AVFrame));
+    }
+    av_log(ctx, AV_LOG_DEBUG, "Encoding as frame as %s (%d).\n",
+           current->type == FRAME_TYPE_I ? "I" :
+           current->type == FRAME_TYPE_P ? "P" : "B", current->poc);
+
+    vas = vaBeginPicture(ctx->hardware_context->display, ctx->va_codec.context_id,
+                         input->id);
+    if(vas != VA_STATUS_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to attach new picture: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR_EXTERNAL;
+        goto fail;
+    }
+
+    vaapi_hevc_encode_init_picture(ctx, current);
+
+    if(current->type == FRAME_TYPE_I) {
+        err = vaapi_hevc_render_sequence(ctx);
+        if(err) goto fail;
+    }
+
+    err = vaapi_hevc_render_picture(ctx, current);
+    if(err) goto fail;
+
+    if(current->type == FRAME_TYPE_I) {
+        err = vaapi_hevc_render_packed_vps_sps(ctx);
+        if(err) goto fail;
+
+        err = vaapi_hevc_render_packed_pps(ctx);
+        if(err) goto fail;
+    }
+
+    err = vaapi_hevc_render_packed_slice(ctx, current);
+    if(err) goto fail;
+
+    err = vaapi_hevc_render_slice(ctx, current);
+    if(err) goto fail;
+
+    vas = vaEndPicture(ctx->hardware_context->display, ctx->va_codec.context_id);
+    if(vas != VA_STATUS_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to start picture processing: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR_EXTERNAL;
+        goto fail;
+    }
+
+    vas = vaSyncSurface(ctx->hardware_context->display, input->id);
+    if(vas != VA_STATUS_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to sync to picture completion: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR_EXTERNAL;
+        goto fail;
+    }
+
+    buf_list = 0;
+    vas = vaMapBuffer(ctx->hardware_context->display, current->coded_data_id,
+                      (void**)&buf_list);
+    if(vas != VA_STATUS_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to map output buffers: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR_EXTERNAL;
+        goto fail;
+    }
+
+    for(buf = buf_list; buf; buf = buf->next) {
+        av_log(ctx, AV_LOG_DEBUG, "Output buffer: %u bytes.\n", buf->size);
+
+        err = av_new_packet(pkt, buf->size);
+        if(err) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to make output buffer "
+                   "(%u bytes).\n", buf->size);
+            goto fail;
+        }
+
+        memcpy(pkt->data, buf->buf, buf->size);
+
+        if(current->type == FRAME_TYPE_I)
+            pkt->flags |= AV_PKT_FLAG_KEY;
+
+        pkt->pts = pic->pts;
+
+        *got_packet = 1;
+    }
+
+    vas = vaUnmapBuffer(ctx->hardware_context->display, current->coded_data_id);
+    if(vas != VA_STATUS_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to unmap output buffers: "
+               "%d (%s).\n", vas, vaErrorStr(vas));
+        err = AVERROR_EXTERNAL;
+        goto fail;
+    }
+
+    if(pic->format != AV_PIX_FMT_VAAPI)
+        av_frame_free(&input_image);
+
+    err = 0;
+  fail:
+    av_vaapi_unlock_hardware_context(ctx->hardware_context);
+    return err;
+}
+
+static VAConfigAttrib config_attributes[] = {
+    { .type  = VAConfigAttribRTFormat,
+      .value = VA_RT_FORMAT_YUV420 },
+    { .type  = VAConfigAttribRateControl,
+      .value = VA_RC_CQP },
+    { .type  = VAConfigAttribEncPackedHeaders,
+      .value = 0 },
+};
+
+static av_cold int vaapi_hevc_encode_init(AVCodecContext *avctx)
+{
+    VAAPIHEVCEncodeContext *ctx = avctx->priv_data;
+    VAStatus vas;
+    int i, err;
+
+    if(ctx->options.hardware_context == 0) {
+        av_log(ctx, AV_LOG_ERROR, "VAAPI encode requires hardware context.\n");
+        return AVERROR(EINVAL);
+    }
+    ctx->hardware_context =
+        (AVVAAPIHardwareContext*)ctx->options.hardware_context;
+
+    ctx->avctx = avctx;
+
+    ctx->va_profile = VAProfileHEVCMain;
+    ctx->level = -1;
+    if(sscanf(ctx->options.level, "%d", &ctx->level) <= 0 ||
+       ctx->level < 0 || ctx->level > 63) {
+        av_log(ctx, AV_LOG_ERROR, "Invaid level '%s'.\n", ctx->options.level);
+        return AVERROR(EINVAL);
+    }
+
+    if(ctx->options.qp >= 0) {
+        ctx->rc_mode = VA_RC_CQP;
+    } else {
+        // Default to fixed-QP 26.
+        ctx->rc_mode = VA_RC_CQP;
+        ctx->options.qp = 26;
+    }
+    av_log(ctx, AV_LOG_INFO, "Using constant-QP mode at %d.\n",
+           ctx->options.qp);
+
+    ctx->input_width  = avctx->width;
+    ctx->input_height = avctx->height;
+
+    ctx->aligned_width  = (ctx->input_width  + 15) / 16 * 16;
+    ctx->aligned_height = (ctx->input_height + 15) / 16 * 16;
+    ctx->ctu_width  = (ctx->aligned_width  + 31) / 32;
+    ctx->ctu_height = (ctx->aligned_height + 31) / 32;
+
+    ctx->fixed_qp = ctx->options.qp;
+
+    ctx->poc = -1;
+
+    {
+        AVVAAPIPipelineConfig *config = &ctx->va_config;
+
+        config->profile    = ctx->va_profile;
+        config->entrypoint = VAEntrypointEncSlice;
+
+        config->attribute_count = FF_ARRAY_ELEMS(config_attributes);
+        config->attributes = config_attributes;
+    }
+
+    {
+        AVVAAPISurfaceConfig *config = &ctx->output_config;
+
+        config->rt_format = VA_RT_FORMAT_YUV420;
+        config->av_format = AV_PIX_FMT_VAAPI;
+
+        config->image_format.fourcc = VA_FOURCC_NV12;
+        config->image_format.bits_per_pixel = 12;
+
+        config->count  = MAX_DPB_PICS;
+        config->width  = ctx->aligned_width;
+        config->height = ctx->aligned_height;
+
+        config->attribute_count = 0;
+    }
+
+    if(avctx->pix_fmt == AV_PIX_FMT_VAAPI) {
+        // Just use the input surfaces directly.
+        ctx->input_is_vaapi = 1;
+
+    } else {
+        AVVAAPISurfaceConfig *config = &ctx->input_config;
+
+        config->rt_format = VA_RT_FORMAT_YUV420;
+        config->rt_format = VA_RT_FORMAT_YUV420;
+        config->av_format = AV_PIX_FMT_VAAPI;
+
+        config->image_format.fourcc = VA_FOURCC_NV12;
+        config->image_format.bits_per_pixel = 12;
+
+        config->count  = INPUT_PICS;
+        config->width  = ctx->aligned_width;
+        config->height = ctx->aligned_height;
+
+        config->attribute_count = 0;
+
+        ctx->input_is_vaapi = 0;
+    }
+
+    av_vaapi_lock_hardware_context(ctx->hardware_context);
+
+    err = ff_vaapi_pipeline_init(&ctx->va_codec, ctx->hardware_context,
+                                 &ctx->va_config,
+                                 ctx->input_is_vaapi ? 0 : &ctx->input_config,
+                                 &ctx->output_config);
+    if(err) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create codec: %d (%s).\n",
+               err, av_err2str(err));
+        goto fail;
+    }
+
+    for(i = 0; i < MAX_DPB_PICS; i++) {
+        vas = vaCreateBuffer(ctx->hardware_context->display,
+                             ctx->va_codec.context_id,
+                             VAEncCodedBufferType,
+                             1048576, 1, 0, &ctx->dpb[i].coded_data_id);
+        if(vas != VA_STATUS_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to create buffer for "
+                   "coded data: %d (%s).\n", vas, vaErrorStr(vas));
+            err = AVERROR_EXTERNAL;
+            goto fail;
+        }
+        av_log(ctx, AV_LOG_TRACE, "Coded data buffer %d is %#x.\n",
+               i, ctx->dpb[i].coded_data_id);
+    }
+
+    av_vaapi_unlock_hardware_context(ctx->hardware_context);
+
+    av_log(ctx, AV_LOG_INFO, "Started VAAPI H.265 encoder.\n");
+
+    vaapi_hevc_encode_init_stream(ctx);
+
+    return 0;
+
+  fail:
+    av_vaapi_unlock_hardware_context(ctx->hardware_context);
+    return err;
+}
+
+static av_cold int vaapi_hevc_encode_close(AVCodecContext *avctx)
+{
+    VAAPIHEVCEncodeContext *ctx = avctx->priv_data;
+    int err;
+
+    av_vaapi_lock_hardware_context(ctx->hardware_context);
+
+    err = ff_vaapi_pipeline_uninit(&ctx->va_codec);
+    if(err) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to destroy codec: %d (%s).\n",
+               err, av_err2str(err));
+    }
+
+    av_vaapi_unlock_hardware_context(ctx->hardware_context);
+
+    return 0;
+}
+
+#define OFFSET(member) offsetof(VAAPIHEVCEncodeContext, options.member)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
+static const AVOption vaapi_hevc_options[] = {
+    { "hardware_context", "VAAPI hardware context",
+      OFFSET(hardware_context), AV_OPT_TYPE_INT64,
+      { .i64 = 0 }, INT64_MIN, INT64_MAX, AV_OPT_FLAG_VIDEO_PARAM },
+    { "level", "Set H.265 level",
+      OFFSET(level), AV_OPT_TYPE_STRING,
+      { .str = "52" }, 0, 0, FLAGS },
+    { "qp", "Use constant quantisation parameter",
+      OFFSET(qp), AV_OPT_TYPE_INT,
+      { .i64 = -1 }, -1, MAX_QP, FLAGS },
+    { "idr_interval", "Number of frames between IDR frames (0 = all intra)",
+      OFFSET(idr_interval), AV_OPT_TYPE_INT,
+      { .i64 = -1 }, -1, INT_MAX, FLAGS },
+    { 0 }
+};
+
+static const AVClass vaapi_hevc_class = {
+    .class_name = "VAAPI/H.265",
+    .item_name  = av_default_item_name,
+    .option     = vaapi_hevc_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_hevc_vaapi_encoder = {
+    .name           = "vaapi_hevc",
+    .long_name      = NULL_IF_CONFIG_SMALL("H.265 (VAAPI)"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_HEVC,
+    .priv_data_size = sizeof(VAAPIHEVCEncodeContext),
+    .init           = &vaapi_hevc_encode_init,
+    .encode2        = &vaapi_hevc_encode_picture,
+    .close          = &vaapi_hevc_encode_close,
+    .priv_class     = &vaapi_hevc_class,
+    .pix_fmts = (const enum AVPixelFormat[]) {
+        AV_PIX_FMT_VAAPI,
+        AV_PIX_FMT_NV12,
+        AV_PIX_FMT_NONE,
+    },
+};
-- 
2.6.4




More information about the ffmpeg-devel mailing list