--- configure | 2 + libavcodec/Makefile | 1 + libavcodec/allcodecs.c | 1 + libavcodec/vaapi_enc_hevc.c | 1665 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1669 insertions(+) create mode 100644 libavcodec/vaapi_enc_hevc.c
diff --git a/configure b/configure index 9f8d9d4..62eca15 100755 --- a/configure +++ b/configure @@ -2517,6 +2517,7 @@ hevc_dxva2_hwaccel_select="hevc_decoder" hevc_qsv_hwaccel_deps="libmfx" hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC" hevc_vaapi_hwaccel_select="hevc_decoder" +hevc_vaapi_encoder_deps="vaapi_recent VAEncPictureParameterBufferHEVC" hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC" hevc_vdpau_hwaccel_select="hevc_decoder" mpeg_vdpau_decoder_deps="vdpau" @@ -5382,6 +5383,7 @@ check_type "d3d9.h dxva2api.h" DXVA2_ConfigPictureDecode -D_WIN32_WINNT=0x0602 check_type "va/va.h" "VAPictureParameterBufferHEVC" check_type "va/va.h" "VADecPictureParameterBufferVP9" check_type "va/va.h" "VAEncPictureParameterBufferH264" +check_type "va/va.h" "VAEncPictureParameterBufferHEVC" check_type "vdpau/vdpau.h" "VdpPictureInfoHEVC" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 0b0a4e4..a15ac31 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -313,6 +313,7 @@ OBJS-$(CONFIG_HEVC_DECODER) += hevc.o hevc_mvs.o hevc_ps.o hevc_sei.o hevcdsp.o hevc_filter.o hevc_parse.o hevc_data.o OBJS-$(CONFIG_HEVC_QSV_DECODER) += qsvdec_h2645.o OBJS-$(CONFIG_HEVC_QSV_ENCODER) += qsvenc_hevc.o hevc_ps_enc.o hevc_parse.o +OBJS-$(CONFIG_HEVC_VAAPI_ENCODER) += vaapi_enc_hevc.o OBJS-$(CONFIG_HNM4_VIDEO_DECODER) += hnm4video.o OBJS-$(CONFIG_HQ_HQA_DECODER) += hq_hqa.o hq_hqadata.o hq_hqadsp.o \ canopus.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index f27c099..7bda0dc 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -206,6 +206,7 @@ void avcodec_register_all(void) REGISTER_ENCDEC (HAP, hap); REGISTER_DECODER(HEVC, hevc); REGISTER_DECODER(HEVC_QSV, hevc_qsv); + REGISTER_ENCODER(HEVC_VAAPI, hevc_vaapi); REGISTER_DECODER(HNM4_VIDEO, hnm4_video); REGISTER_DECODER(HQ_HQA, hq_hqa); REGISTER_DECODER(HQX, hqx); diff --git a/libavcodec/vaapi_enc_hevc.c b/libavcodec/vaapi_enc_hevc.c new file mode 100644 index 0000000..a57eb94 --- /dev/null +++ b/libavcodec/vaapi_enc_hevc.c @@ -0,0 +1,1665 @@ +/* + * VAAPI H.265 encoder. + * + * Copyright (C) 2016 Mark Thompson <m...@jkqxz.net> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" + +#include "avcodec.h" +#include "golomb.h" +#include "hevc.h" +#include "put_bits.h" +#include "vaapi_support.h" + +#define MAX_DPB_PICS 16 +#define INPUT_PICS 2 + +#define bool unsigned char +#define MAX_ST_REF_PIC_SETS 32 +#define MAX_LAYERS 1 + + +// This structure contains all possibly-useful per-sequence syntax elements +// which are not already contained in the various VAAPI structures. +typedef struct VAAPIHEVCEncodeMiscSequenceParams { + + // Parameter set IDs. + unsigned int video_parameter_set_id; + unsigned int seq_parameter_set_id; + + // Layering. + unsigned int vps_max_layers_minus1; + unsigned int vps_max_sub_layers_minus1; + bool vps_temporal_id_nesting_flag; + unsigned int vps_max_layer_id; + unsigned int vps_num_layer_sets_minus1; + unsigned int sps_max_sub_layers_minus1; + bool sps_temporal_id_nesting_flag; + bool layer_id_included_flag[MAX_LAYERS][64]; + + // Profile/tier/level parameters. + bool general_profile_compatibility_flag[32]; + bool general_progressive_source_flag; + bool general_interlaced_source_flag; + bool general_non_packed_constraint_flag; + bool general_frame_only_constraint_flag; + bool general_inbld_flag; + + // Decode/display ordering parameters. + unsigned int log2_max_pic_order_cnt_lsb_minus4; + bool vps_sub_layer_ordering_info_present_flag; + unsigned int vps_max_dec_pic_buffering_minus1[MAX_LAYERS]; + unsigned int vps_max_num_reorder_pics[MAX_LAYERS]; + unsigned int vps_max_latency_increase_plus1[MAX_LAYERS]; + bool sps_sub_layer_ordering_info_present_flag; + unsigned int sps_max_dec_pic_buffering_minus1[MAX_LAYERS]; + unsigned int sps_max_num_reorder_pics[MAX_LAYERS]; + unsigned int sps_max_latency_increase_plus1[MAX_LAYERS]; + + // Timing information. + bool vps_timing_info_present_flag; + unsigned int vps_num_units_in_tick; + unsigned int vps_time_scale; + bool vps_poc_proportional_to_timing_flag; + unsigned int vps_num_ticks_poc_diff_minus1; + + // Cropping information. + bool conformance_window_flag; + unsigned int conf_win_left_offset; + unsigned int conf_win_right_offset; + unsigned int conf_win_top_offset; + unsigned int conf_win_bottom_offset; + + // Short-term reference picture sets. + unsigned int num_short_term_ref_pic_sets; + struct { + unsigned int num_negative_pics; + unsigned int num_positive_pics; + + unsigned int delta_poc_s0_minus1[MAX_DPB_PICS]; + bool used_by_curr_pic_s0_flag[MAX_DPB_PICS]; + + unsigned int delta_poc_s1_minus1[MAX_DPB_PICS]; + bool used_by_curr_pic_s1_flag[MAX_DPB_PICS]; + } st_ref_pic_set[MAX_ST_REF_PIC_SETS]; + + // Long-term reference pictures. + bool long_term_ref_pics_present_flag; + unsigned int num_long_term_ref_pics_sps; + struct { + unsigned int lt_ref_pic_poc_lsb_sps; + bool used_by_curr_pic_lt_sps_flag; + } lt_ref_pic; + + // Deblocking filter control. + bool deblocking_filter_control_present_flag; + bool deblocking_filter_override_enabled_flag; + bool pps_deblocking_filter_disabled_flag; + int pps_beta_offset_div2; + int pps_tc_offset_div2; + + // Video Usability Information. + bool vui_parameters_present_flag; + bool aspect_ratio_info_present_flag; + unsigned int aspect_ratio_idc; + unsigned int sar_width; + unsigned int sar_height; + bool video_signal_type_present_flag; + unsigned int video_format; + bool video_full_range_flag; + bool colour_description_present_flag; + unsigned int colour_primaries; + unsigned int transfer_characteristics; + unsigned int matrix_coeffs; + + // Oddments. + bool uniform_spacing_flag; + bool output_flag_present_flag; + bool cabac_init_present_flag; + unsigned int num_extra_slice_header_bits; + bool lists_modification_present_flag; + bool pps_slice_chroma_qp_offsets_present_flag; + bool pps_slice_chroma_offset_list_enabled_flag; + +} VAAPIHEVCEncodeMiscSequenceParams; + +// This structure contains all possibly-useful per-slice syntax elements +// which are not already contained in the various VAAPI structures. +typedef struct { + // Slice segments. + bool first_slice_segment_in_pic_flag; + unsigned int slice_segment_address; + + // Short-term reference picture sets. + bool short_term_ref_pic_set_sps_flag; + unsigned int short_term_ref_pic_idx; + + // Deblocking filter. + bool deblocking_filter_override_flag; + + // Oddments. + bool slice_reserved_flag[8]; + bool no_output_of_prior_pics_flag; + bool pic_output_flag; + +} VAAPIHEVCEncodeMiscPictureParams; + +typedef struct VAAPIHEVCEncodeFrame { + AVFrame *frame; + VASurfaceID surface_id; + + int poc; + enum { + FRAME_TYPE_I = I_SLICE, + FRAME_TYPE_P = P_SLICE, + FRAME_TYPE_B = B_SLICE, + } type; + + VAPictureHEVC pic; + + VAEncPictureParameterBufferHEVC pic_params; + VABufferID pic_params_id; + + VAEncSliceParameterBufferHEVC slice_params; + VABufferID slice_params_id; + + VAAPIHEVCEncodeMiscPictureParams misc_params; + + VABufferID coded_data_id; + + struct VAAPIHEVCEncodeFrame *refa, *refb; +} VAAPIHEVCEncodeFrame; + +typedef struct VAAPIHEVCEncodeContext { + const AVClass *class; + const AVCodecContext *avctx; + + AVVAAPIHardwareContext *hardware_context; + + AVVAAPIPipelineConfig codec_config; + AVVAAPIPipelineContext codec; + + AVVAAPISurfaceConfig input_config; + AVVAAPISurfacePool input_pool; + AVVAAPISurfaceConfig recon_config; + AVVAAPISurfacePool recon_pool; + + int input_is_vaapi; + + VAProfile va_profile; + int level; + int rc_mode; + int fixed_qp; + + int input_width; + int input_height; + + int aligned_width; + int aligned_height; + int ctu_width; + int ctu_height; + + VAEncSequenceParameterBufferHEVC seq_params; + VABufferID seq_params_id; + + VAEncMiscParameterRateControl rc_params; + VAEncMiscParameterBuffer rc_params_buffer; + VABufferID rc_params_id; + + VAEncPictureParameterBufferHEVC pic_params; + VABufferID pic_params_id; + + VAAPIHEVCEncodeMiscSequenceParams misc_params; + + int poc; + + VAAPIHEVCEncodeFrame dpb[MAX_DPB_PICS]; + int current_frame; + int previous_frame; + + struct { + int64_t hardware_context; + + const char *profile; + const char *level; + int qp; + int idr_interval; + } options; + +} VAAPIHEVCEncodeContext; + + +// Set to 1 to log a full trace of all bitstream output (debugging only). +#if 0 +static void trace_hevc_write_u(PutBitContext *s, unsigned int width, + unsigned int value, const char *name) +{ + av_log(0, AV_LOG_DEBUG, "H.265 bitstream [%3d]: %4u u(%u) / %s\n", + put_bits_count(s), value, width, name); + put_bits(s, width, value); +} +static void trace_hevc_write_ue(PutBitContext *s, + unsigned int value, const char *name) +{ + av_log(0, AV_LOG_DEBUG, "H.265 bitstream [%3d]: %4u ue(v) / %s\n", + put_bits_count(s), value, name); + set_ue_golomb(s, value); +} +static void trace_hevc_write_se(PutBitContext *s, + int value, const char *name) +{ + av_log(0, AV_LOG_DEBUG, "H.265 bitstream [%3d]: %+4d se(v) / %s\n", + put_bits_count(s), value, name); + set_se_golomb(s, value); +} + +#define hevc_write_u(pbc, width, value, name) \ + trace_hevc_write_u(pbc, width, value, #name) +#define hevc_write_ue(pbc, value, name) \ + trace_hevc_write_ue(pbc, value, #name) +#define hevc_write_se(pbc, value, name) \ + trace_hevc_write_se(pbc, value, #name) +#else +#define hevc_write_u(pbc, width, value, name) put_bits(pbc, width, value) +#define hevc_write_ue(pbc, value, name) set_ue_golomb(pbc, value) +#define hevc_write_se(pbc, value, name) set_se_golomb(pbc, value) +#endif + +#define u(width, ...) hevc_write_u(s, width, __VA_ARGS__) +#define ue(...) hevc_write_ue(s, __VA_ARGS__) +#define se(...) hevc_write_se(s, __VA_ARGS__) + +#define seq_var(name) seq->name, name +#define seq_field(name) seq->seq_fields.bits.name, name +#define pic_var(name) pic->name, name +#define pic_field(name) pic->pic_fields.bits.name, name +#define slice_var(name) slice->name, name +#define slice_field(name) slice->slice_fields.bits.name, name +#define misc_var(name) misc->name, name +#define miscs_var(name) miscs->name, name + +static void vaapi_hevc_write_nal_unit_header(PutBitContext *s, + int nal_unit_type) +{ + u(1, 0, forbidden_zero_bit); + u(6, nal_unit_type, nal_unit_type); + u(6, 0, nuh_layer_id); + u(3, 1, nuh_temporal_id_plus1); +} + +static void vaapi_hevc_write_rbsp_trailing_bits(PutBitContext *s) +{ + u(1, 1, rbsp_stop_one_bit); + while(put_bits_count(s) & 7) + u(1, 0, rbsp_alignment_zero_bit); +} + +static void vaapi_hevc_write_profile_tier_level(PutBitContext *s, + VAAPIHEVCEncodeContext *ctx) +{ + VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params; + VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params; + int j; + + if(1) { + u(2, 0, general_profile_space); + u(1, seq->general_tier_flag, general_tier_flag); + u(5, seq->general_profile_idc, general_profile_idc); + + for(j = 0; j < 32; j++) { + u(1, misc_var(general_profile_compatibility_flag[j])); + } + + u(1, misc_var(general_progressive_source_flag)); + u(1, misc_var(general_interlaced_source_flag)); + u(1, misc_var(general_non_packed_constraint_flag)); + u(1, misc_var(general_frame_only_constraint_flag)); + + if(0) { + // Not main profile. + // Lots of extra constraint flags. + } else { + // put_bits only handles up to 31 bits. + u(23, 0, general_reserved_zero_43bits); + u(20, 0, general_reserved_zero_43bits); + } + + if(seq->general_profile_idc >= 1 && seq->general_profile_idc <= 5) { + u(1, misc_var(general_inbld_flag)); + } else { + u(1, 0, general_reserved_zero_bit); + } + } + + u(8, seq->general_level_idc, general_level_idc); + + // No sublayers. +} + +static void vaapi_hevc_write_vps(PutBitContext *s, + VAAPIHEVCEncodeContext *ctx) +{ + VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params; + int i, j; + + vaapi_hevc_write_nal_unit_header(s, NAL_VPS); + + u(4, misc->video_parameter_set_id, vps_video_parameter_set_id); + + u(1, 1, vps_base_layer_internal_flag); + u(1, 1, vps_base_layer_available_flag); + u(6, misc_var(vps_max_layers_minus1)); + u(3, misc_var(vps_max_sub_layers_minus1)); + u(1, misc_var(vps_temporal_id_nesting_flag)); + + u(16, 0xffff, vps_reserved_0xffff_16bits); + + vaapi_hevc_write_profile_tier_level(s, ctx); + + u(1, misc_var(vps_sub_layer_ordering_info_present_flag)); + for(i = (misc->vps_sub_layer_ordering_info_present_flag ? + 0 : misc->vps_max_sub_layers_minus1); + i <= misc->vps_max_sub_layers_minus1; i++) { + ue(misc_var(vps_max_dec_pic_buffering_minus1[i])); + ue(misc_var(vps_max_num_reorder_pics[i])); + ue(misc_var(vps_max_latency_increase_plus1[i])); + } + + u(6, misc_var(vps_max_layer_id)); + ue(misc_var(vps_num_layer_sets_minus1)); + for(i = 1; i <= misc->vps_num_layer_sets_minus1; i++) { + for(j = 0; j < misc->vps_max_layer_id; j++) + u(1, misc_var(layer_id_included_flag[i][j])); + } + + u(1, misc_var(vps_timing_info_present_flag)); + if(misc->vps_timing_info_present_flag) { + u(1, 0, put_bits_hack_zero_bit); + u(31, misc_var(vps_num_units_in_tick)); + u(1, 0, put_bits_hack_zero_bit); + u(31, misc_var(vps_time_scale)); + u(1, misc_var(vps_poc_proportional_to_timing_flag)); + if(misc->vps_poc_proportional_to_timing_flag) { + ue(misc_var(vps_num_ticks_poc_diff_minus1)); + } + ue(0, vps_num_hrd_parameters); + } + + u(1, 0, vps_extension_flag); + + vaapi_hevc_write_rbsp_trailing_bits(s); +} + +static void vaapi_hevc_write_st_ref_pic_set(PutBitContext *s, + VAAPIHEVCEncodeContext *ctx, + int st_rps_idx) +{ + VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params; +#define strps_var(name) misc->st_ref_pic_set[st_rps_idx].name, name + int i; + + if(st_rps_idx != 0) + u(1, 0, inter_ref_pic_set_prediction_flag); + + if(0) { + // Inter ref pic set prediction. + } else { + ue(strps_var(num_negative_pics)); + ue(strps_var(num_positive_pics)); + + for(i = 0; i < + misc->st_ref_pic_set[st_rps_idx].num_negative_pics; i++) { + ue(strps_var(delta_poc_s0_minus1[i])); + u(1, strps_var(used_by_curr_pic_s0_flag[i])); + } + for(i = 0; i < + misc->st_ref_pic_set[st_rps_idx].num_positive_pics; i++) { + ue(strps_var(delta_poc_s1_minus1[i])); + u(1, strps_var(used_by_curr_pic_s1_flag[i])); + } + } +} + +static void vaapi_hevc_write_vui_parameters(PutBitContext *s, + VAAPIHEVCEncodeContext *ctx) +{ + VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params; + + u(1, misc_var(aspect_ratio_info_present_flag)); + if(misc->aspect_ratio_info_present_flag) { + u(8, misc_var(aspect_ratio_idc)); + if(misc->aspect_ratio_idc == 255) { + u(16, misc_var(sar_width)); + u(16, misc_var(sar_height)); + } + } + + u(1, 0, overscan_info_present_flag); + + u(1, misc_var(video_signal_type_present_flag)); + if(misc->video_signal_type_present_flag) { + u(3, misc_var(video_format)); + u(1, misc_var(video_full_range_flag)); + u(1, misc_var(colour_description_present_flag)); + if(misc->colour_description_present_flag) { + u(8, misc_var(colour_primaries)); + u(8, misc_var(transfer_characteristics)); + u(8, misc_var(matrix_coeffs)); + } + } + + u(1, 0, chroma_loc_info_present_flag); + u(1, 0, neutral_chroma_indication_flag); + u(1, 0, field_seq_flag); + u(1, 0, frame_field_info_present_flag); + u(1, 0, default_display_window_flag); + u(1, 0, vui_timing_info_present_flag); + u(1, 0, bitstream_restriction_flag_flag); +} + +static void vaapi_hevc_write_sps(PutBitContext *s, + VAAPIHEVCEncodeContext *ctx) +{ + VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params; + VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params; + int i; + + vaapi_hevc_write_nal_unit_header(s, NAL_SPS); + + u(4, misc->video_parameter_set_id, sps_video_parameter_set_id); + + u(3, misc_var(sps_max_sub_layers_minus1)); + u(1, misc_var(sps_temporal_id_nesting_flag)); + + vaapi_hevc_write_profile_tier_level(s, ctx); + + ue(misc->seq_parameter_set_id, sps_seq_parameter_set_id); + ue(seq_field(chroma_format_idc)); + if(seq->seq_fields.bits.chroma_format_idc == 3) + u(1, 0, separate_colour_plane_flag); + + ue(seq_var(pic_width_in_luma_samples)); + ue(seq_var(pic_height_in_luma_samples)); + + u(1, misc_var(conformance_window_flag)); + if(misc->conformance_window_flag) { + ue(misc_var(conf_win_left_offset)); + ue(misc_var(conf_win_right_offset)); + ue(misc_var(conf_win_top_offset)); + ue(misc_var(conf_win_bottom_offset)); + } + + ue(seq_field(bit_depth_luma_minus8)); + ue(seq_field(bit_depth_chroma_minus8)); + + ue(misc_var(log2_max_pic_order_cnt_lsb_minus4)); + + u(1, misc_var(sps_sub_layer_ordering_info_present_flag)); + for(i = (misc->sps_sub_layer_ordering_info_present_flag ? + 0 : misc->sps_max_sub_layers_minus1); + i <= misc->sps_max_sub_layers_minus1; i++) { + ue(misc_var(sps_max_dec_pic_buffering_minus1[i])); + ue(misc_var(sps_max_num_reorder_pics[i])); + ue(misc_var(sps_max_latency_increase_plus1[i])); + } + + ue(seq_var(log2_min_luma_coding_block_size_minus3)); + ue(seq_var(log2_diff_max_min_luma_coding_block_size)); + ue(seq_var(log2_min_transform_block_size_minus2)); + ue(seq_var(log2_diff_max_min_transform_block_size)); + ue(seq_var(max_transform_hierarchy_depth_inter)); + ue(seq_var(max_transform_hierarchy_depth_intra)); + + u(1, seq_field(scaling_list_enabled_flag)); + if(seq->seq_fields.bits.scaling_list_enabled_flag) { + u(1, 0, sps_scaling_list_data_present_flag); + } + + u(1, seq_field(amp_enabled_flag)); + u(1, seq_field(sample_adaptive_offset_enabled_flag)); + + u(1, seq_field(pcm_enabled_flag)); + if(seq->seq_fields.bits.pcm_enabled_flag) { + u(4, seq_var(pcm_sample_bit_depth_luma_minus1)); + u(4, seq_var(pcm_sample_bit_depth_chroma_minus1)); + ue(seq_var(log2_min_pcm_luma_coding_block_size_minus3)); + ue(seq->log2_max_pcm_luma_coding_block_size_minus3 - + seq->log2_min_pcm_luma_coding_block_size_minus3, + log2_diff_max_min_pcm_luma_coding_block_size); + u(1, seq_field(pcm_loop_filter_disabled_flag)); + } + + ue(misc_var(num_short_term_ref_pic_sets)); + for(i = 0; i < misc->num_short_term_ref_pic_sets; i++) + vaapi_hevc_write_st_ref_pic_set(s, ctx, i); + + u(1, misc_var(long_term_ref_pics_present_flag)); + if(misc->long_term_ref_pics_present_flag) { + ue(0, num_long_term_ref_pics_sps); + } + + u(1, seq_field(sps_temporal_mvp_enabled_flag)); + u(1, seq_field(strong_intra_smoothing_enabled_flag)); + + u(1, misc_var(vui_parameters_present_flag)); + if(misc->vui_parameters_present_flag) { + vaapi_hevc_write_vui_parameters(s, ctx); + } + + u(1, 0, sps_extension_present_flag); + + vaapi_hevc_write_rbsp_trailing_bits(s); +} + +static void vaapi_hevc_write_pps(PutBitContext *s, + VAAPIHEVCEncodeContext *ctx) +{ + VAEncPictureParameterBufferHEVC *pic = &ctx->pic_params; + VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params; + int i; + + vaapi_hevc_write_nal_unit_header(s, NAL_PPS); + + ue(pic->slice_pic_parameter_set_id, pps_pic_parameter_set_id); + ue(misc->seq_parameter_set_id, pps_seq_parameter_set_id); + + u(1, pic_field(dependent_slice_segments_enabled_flag)); + u(1, misc_var(output_flag_present_flag)); + u(3, misc_var(num_extra_slice_header_bits)); + u(1, pic_field(sign_data_hiding_enabled_flag)); + u(1, misc_var(cabac_init_present_flag)); + + ue(pic_var(num_ref_idx_l0_default_active_minus1)); + ue(pic_var(num_ref_idx_l1_default_active_minus1)); + + se(pic->pic_init_qp - 26, init_qp_minus26); + + u(1, pic_field(constrained_intra_pred_flag)); + u(1, pic_field(transform_skip_enabled_flag)); + + u(1, pic_field(cu_qp_delta_enabled_flag)); + if(pic->pic_fields.bits.cu_qp_delta_enabled_flag) + ue(pic_var(diff_cu_qp_delta_depth)); + + se(pic_var(pps_cb_qp_offset)); + se(pic_var(pps_cr_qp_offset)); + + u(1, misc_var(pps_slice_chroma_qp_offsets_present_flag)); + u(1, pic_field(weighted_pred_flag)); + u(1, pic_field(weighted_bipred_flag)); + u(1, pic_field(transquant_bypass_enabled_flag)); + u(1, pic_field(tiles_enabled_flag)); + u(1, pic_field(entropy_coding_sync_enabled_flag)); + + if(pic->pic_fields.bits.tiles_enabled_flag) { + ue(pic_var(num_tile_columns_minus1)); + ue(pic_var(num_tile_rows_minus1)); + u(1, misc_var(uniform_spacing_flag)); + if(!misc->uniform_spacing_flag) { + for(i = 0; i < pic->num_tile_columns_minus1; i++) + ue(pic_var(column_width_minus1[i])); + for(i = 0; i < pic->num_tile_rows_minus1; i++) + ue(pic_var(row_height_minus1[i])); + } + u(1, pic_field(loop_filter_across_tiles_enabled_flag)); + } + + u(1, pic_field(pps_loop_filter_across_slices_enabled_flag)); + u(1, misc_var(deblocking_filter_control_present_flag)); + if(misc->deblocking_filter_control_present_flag) { + u(1, misc_var(deblocking_filter_override_enabled_flag)); + u(1, misc_var(pps_deblocking_filter_disabled_flag)); + if(!misc->pps_deblocking_filter_disabled_flag) { + se(misc_var(pps_beta_offset_div2)); + se(misc_var(pps_tc_offset_div2)); + } + } + + u(1, 0, pps_scaling_list_data_present_flag); + // No scaling list data. + + u(1, misc_var(lists_modification_present_flag)); + ue(pic_var(log2_parallel_merge_level_minus2)); + u(1, 0, slice_segment_header_extension_present_flag); + u(1, 0, pps_extension_present_flag); + + vaapi_hevc_write_rbsp_trailing_bits(s); +} + +static void vaapi_hevc_write_slice_header(PutBitContext *s, + VAAPIHEVCEncodeContext *ctx, + VAAPIHEVCEncodeFrame *current) +{ + VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params; + VAEncPictureParameterBufferHEVC *pic = ¤t->pic_params; + VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params; + VAEncSliceParameterBufferHEVC *slice = ¤t->slice_params; + VAAPIHEVCEncodeMiscPictureParams *miscs = ¤t->misc_params; + int i; + + vaapi_hevc_write_nal_unit_header(s, pic->nal_unit_type); + + u(1, miscs_var(first_slice_segment_in_pic_flag)); + if(pic->nal_unit_type >= NAL_BLA_W_LP && + pic->nal_unit_type <= 23) + u(1, miscs_var(no_output_of_prior_pics_flag)); + + ue(slice_var(slice_pic_parameter_set_id)); + + if(!miscs->first_slice_segment_in_pic_flag) { + if(pic->pic_fields.bits.dependent_slice_segments_enabled_flag) + u(1, slice_field(dependent_slice_segment_flag)); + u(av_log2((ctx->ctu_width * ctx->ctu_height) - 1) + 1, + miscs_var(slice_segment_address)); + } + if(!slice->slice_fields.bits.dependent_slice_segment_flag) { + for(i = 0; i < misc->num_extra_slice_header_bits; i++) + u(1, miscs_var(slice_reserved_flag[i])); + + ue(slice_var(slice_type)); + if(misc->output_flag_present_flag) + u(1, 1, pic_output_flag); + if(seq->seq_fields.bits.separate_colour_plane_flag) + u(2, slice_field(colour_plane_id)); + if(pic->nal_unit_type != NAL_IDR_W_RADL && + pic->nal_unit_type != NAL_IDR_N_LP) { + u(4 + misc->log2_max_pic_order_cnt_lsb_minus4, + current->poc & ((1 << (misc->log2_max_pic_order_cnt_lsb_minus4 + 4)) - 1), + slice_pic_order_cnt_lsb); + + u(1, miscs_var(short_term_ref_pic_set_sps_flag)); + if(!miscs->short_term_ref_pic_set_sps_flag) { + av_assert0(0); + // vaapi_hevc_write_st_ref_pic_set(ctx->num_short_term_ref_pic_sets); + } else if(misc->num_short_term_ref_pic_sets > 1) { + u(av_log2(misc->num_short_term_ref_pic_sets - 1) + 1, + miscs_var(short_term_ref_pic_idx)); + } + + if(misc->long_term_ref_pics_present_flag) { + av_assert0(0); + } + + if(seq->seq_fields.bits.sps_temporal_mvp_enabled_flag) { + u(1, slice_field(slice_temporal_mvp_enabled_flag)); + } + + if(seq->seq_fields.bits.sample_adaptive_offset_enabled_flag) { + u(1, slice_field(slice_sao_luma_flag)); + if(!seq->seq_fields.bits.separate_colour_plane_flag && + seq->seq_fields.bits.chroma_format_idc != 0) { + u(1, slice_field(slice_sao_chroma_flag)); + } + } + + if(slice->slice_type == P_SLICE || slice->slice_type == B_SLICE) { + u(1, slice_field(num_ref_idx_active_override_flag)); + if(slice->slice_fields.bits.num_ref_idx_active_override_flag) { + ue(slice_var(num_ref_idx_l0_active_minus1)); + if(slice->slice_type == B_SLICE) { + ue(slice_var(num_ref_idx_l1_active_minus1)); + } + } + + if(misc->lists_modification_present_flag) { + av_assert0(0); + // ref_pic_lists_modification() + } + if(slice->slice_type == B_SLICE) { + u(1, slice_field(mvd_l1_zero_flag)); + } + if(misc->cabac_init_present_flag) { + u(1, slice_field(cabac_init_flag)); + } + if(slice->slice_fields.bits.slice_temporal_mvp_enabled_flag) { + if(slice->slice_type == B_SLICE) + u(1, slice_field(collocated_from_l0_flag)); + ue(pic->collocated_ref_pic_index, collocated_ref_idx); + } + if((pic->pic_fields.bits.weighted_pred_flag && + slice->slice_type == P_SLICE) || + (pic->pic_fields.bits.weighted_bipred_flag && + slice->slice_type == B_SLICE)) { + ue(5 - slice->max_num_merge_cand, five_minus_max_num_merge_cand); + } + } + + se(slice_var(slice_qp_delta)); + if(misc->pps_slice_chroma_qp_offsets_present_flag) { + se(slice_var(slice_cb_qp_offset)); + se(slice_var(slice_cr_qp_offset)); + } + if(misc->pps_slice_chroma_offset_list_enabled_flag) { + u(1, 0, cu_chroma_qp_offset_enabled_flag); + } + if(misc->deblocking_filter_override_enabled_flag) { + u(1, miscs_var(deblocking_filter_override_flag)); + } + if(miscs->deblocking_filter_override_flag) { + u(1, slice_field(slice_deblocking_filter_disabled_flag)); + if(!slice->slice_fields.bits.slice_deblocking_filter_disabled_flag) { + se(slice_var(slice_beta_offset_div2)); + se(slice_var(slice_tc_offset_div2)); + } + } + if(pic->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag && + (slice->slice_fields.bits.slice_sao_luma_flag || + slice->slice_fields.bits.slice_sao_chroma_flag || + slice->slice_fields.bits.slice_deblocking_filter_disabled_flag)) { + u(1, slice_field(slice_loop_filter_across_slices_enabled_flag)); + } + } + + if(pic->pic_fields.bits.tiles_enabled_flag || + pic->pic_fields.bits.entropy_coding_sync_enabled_flag) { + // num_entry_point_offsets + } + + if(0) { + // slice_segment_header_extension_length + } + } + + u(1, 1, alignment_bit_equal_to_one); + while(put_bits_count(s) & 7) + u(1, 0, alignment_bit_equal_to_zero); +} + +static size_t vaapi_hevc_nal_unit_to_byte_stream(uint8_t *dst, uint8_t *src, size_t len) +{ + size_t dp, sp; + int zero_run = 0; + + // Start code. + dst[0] = dst[1] = dst[2] = 0; + dst[3] = 1; + dp = 4; + + for(sp = 0; sp < len; sp++) { + if(zero_run < 2) { + if(src[sp] == 0) + ++zero_run; + else + zero_run = 0; + } else { + if((src[sp] & ~3) == 0) { + // emulation_prevention_three_byte + dst[dp++] = 3; + } + zero_run = src[sp] == 0; + } + dst[dp++] = src[sp]; + } + + return dp; +} + +static int vaapi_hevc_render_packed_header(VAAPIHEVCEncodeContext *ctx, int type, + char *data, size_t bit_len) +{ + VAStatus vas; + VABufferID id_list[2]; + VAEncPackedHeaderParameterBuffer buffer = { + .type = type, + .bit_length = bit_len, + .has_emulation_bytes = 1, + }; + + vas = vaCreateBuffer(ctx->hardware_context->display, ctx->codec.context_id, + VAEncPackedHeaderParameterBufferType, + sizeof(&buffer), 1, &buffer, &id_list[0]); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to create parameter buffer for packed " + "header (type %d): %d (%s).\n", type, vas, vaErrorStr(vas)); + return AVERROR_EXTERNAL; + } + + vas = vaCreateBuffer(ctx->hardware_context->display, ctx->codec.context_id, + VAEncPackedHeaderDataBufferType, + (bit_len + 7) / 8, 1, data, &id_list[1]); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to create data buffer for packed " + "header (type %d): %d (%s).\n", type, vas, vaErrorStr(vas)); + return AVERROR_EXTERNAL; + } + + av_log(ctx, AV_LOG_DEBUG, "Packed header buffer (%d) is %#x/%#x " + "(%zu bits).\n", type, id_list[0], id_list[1], bit_len); + + vas = vaRenderPicture(ctx->hardware_context->display, ctx->codec.context_id, + id_list, 2); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to render packed " + "header (type %d): %d (%s).\n", type, vas, vaErrorStr(vas)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int vaapi_hevc_render_packed_vps_sps(VAAPIHEVCEncodeContext *ctx) +{ + PutBitContext pbc, *s = &pbc; + uint8_t tmp[256]; + uint8_t buf[512]; + size_t byte_len, nal_len; + + init_put_bits(s, tmp, sizeof(tmp)); + vaapi_hevc_write_vps(s, ctx); + nal_len = put_bits_count(s); + flush_put_bits(s); + byte_len = vaapi_hevc_nal_unit_to_byte_stream(buf, tmp, nal_len / 8); + + init_put_bits(s, tmp, sizeof(tmp)); + vaapi_hevc_write_sps(s, ctx); + nal_len = put_bits_count(s); + flush_put_bits(s); + byte_len += vaapi_hevc_nal_unit_to_byte_stream(buf + byte_len, tmp, nal_len / 8); + + return vaapi_hevc_render_packed_header(ctx, VAEncPackedHeaderSequence, + buf, byte_len * 8); +} + +static int vaapi_hevc_render_packed_pps(VAAPIHEVCEncodeContext *ctx) +{ + PutBitContext pbc, *s = &pbc; + uint8_t tmp[256]; + uint8_t buf[512]; + size_t byte_len, nal_len; + + init_put_bits(s, tmp, sizeof(tmp)); + vaapi_hevc_write_pps(s, ctx); + nal_len = put_bits_count(s); + flush_put_bits(s); + byte_len = vaapi_hevc_nal_unit_to_byte_stream(buf, tmp, nal_len / 8); + + return vaapi_hevc_render_packed_header(ctx, VAEncPackedHeaderPicture, + buf, byte_len * 8); +} + +static int vaapi_hevc_render_packed_slice(VAAPIHEVCEncodeContext *ctx, + VAAPIHEVCEncodeFrame *current) +{ + PutBitContext pbc, *s = &pbc; + uint8_t tmp[256]; + uint8_t buf[512]; + size_t byte_len, nal_len; + + init_put_bits(s, tmp, sizeof(tmp)); + vaapi_hevc_write_slice_header(s, ctx, current); + nal_len = put_bits_count(s); + flush_put_bits(s); + byte_len = vaapi_hevc_nal_unit_to_byte_stream(buf, tmp, nal_len / 8); + + return vaapi_hevc_render_packed_header(ctx, VAEncPackedHeaderSlice, + buf, byte_len * 8); +} + +static int vaapi_hevc_render_sequence(VAAPIHEVCEncodeContext *ctx) +{ + VAStatus vas; + VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params; + + vas = vaCreateBuffer(ctx->hardware_context->display, ctx->codec.context_id, + VAEncSequenceParameterBufferType, + sizeof(*seq), 1, seq, &ctx->seq_params_id); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to create buffer for sequence " + "parameters: %d (%s).\n", vas, vaErrorStr(vas)); + return AVERROR_EXTERNAL; + } + av_log(ctx, AV_LOG_DEBUG, "Sequence parameter buffer is %#x.\n", + ctx->seq_params_id); + + vas = vaRenderPicture(ctx->hardware_context->display, ctx->codec.context_id, + &ctx->seq_params_id, 1); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to send sequence parameters: " + "%d (%s).\n", vas, vaErrorStr(vas)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int vaapi_hevc_render_picture(VAAPIHEVCEncodeContext *ctx, + VAAPIHEVCEncodeFrame *current) +{ + VAStatus vas; + VAEncPictureParameterBufferHEVC *pic = ¤t->pic_params; + + vas = vaCreateBuffer(ctx->hardware_context->display, ctx->codec.context_id, + VAEncPictureParameterBufferType, + sizeof(*pic), 1, pic, &ctx->pic_params_id); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to create buffer for picture " + "parameters: %d (%s).\n", vas, vaErrorStr(vas)); + return AVERROR_EXTERNAL; + } + av_log(ctx, AV_LOG_DEBUG, "Picture parameter buffer is %#x.\n", + ctx->pic_params_id); + + vas = vaRenderPicture(ctx->hardware_context->display, ctx->codec.context_id, + &ctx->pic_params_id, 1); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to send picture parameters: " + "%d (%s).\n", vas, vaErrorStr(vas)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int vaapi_hevc_render_slice(VAAPIHEVCEncodeContext *ctx, + VAAPIHEVCEncodeFrame *current) +{ + VAStatus vas; + VAEncSliceParameterBufferHEVC *slice = ¤t->slice_params; + + vas = vaCreateBuffer(ctx->hardware_context->display, ctx->codec.context_id, + VAEncSliceParameterBufferType, + sizeof(*slice), 1, slice, ¤t->slice_params_id); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to create buffer for slice " + "parameters: %d (%s).\n", vas, vaErrorStr(vas)); + return AVERROR_EXTERNAL; + } + av_log(ctx, AV_LOG_DEBUG, "Slice buffer is %#x.\n", current->slice_params_id); + + vas = vaRenderPicture(ctx->hardware_context->display, ctx->codec.context_id, + ¤t->slice_params_id, 1); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to send slice parameters: " + "%d (%s).\n", vas, vaErrorStr(vas)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static av_cold int vaapi_hevc_encode_init_stream(VAAPIHEVCEncodeContext *ctx) +{ + VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params; + VAEncPictureParameterBufferHEVC *pic = &ctx->pic_params; + VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params; + int i; + + memset(seq, 0, sizeof(*seq)); + memset(pic, 0, sizeof(*pic)); + + { + // general_profile_space == 0. + seq->general_profile_idc = 1; // Main profile. + seq->general_tier_flag = 0; + + seq->general_level_idc = ctx->level * 3; + + seq->intra_period = 0; + seq->intra_idr_period = 0; + seq->ip_period = 0; + + seq->pic_width_in_luma_samples = ctx->aligned_width; + seq->pic_height_in_luma_samples = ctx->aligned_height; + + seq->seq_fields.bits.chroma_format_idc = 1; // 4:2:0. + seq->seq_fields.bits.separate_colour_plane_flag = 0; + seq->seq_fields.bits.bit_depth_luma_minus8 = 0; // 8-bit luma. + seq->seq_fields.bits.bit_depth_chroma_minus8 = 0; // 8-bit chroma. + // Other misc flags all zero. + + // These have to come from the capabilities of the encoder. We have + // no way to query it, so just hardcode ones which worked for me... + // CTB size from 8x8 to 32x32. + seq->log2_min_luma_coding_block_size_minus3 = 0; + seq->log2_diff_max_min_luma_coding_block_size = 2; + // Transform size from 4x4 to 32x32. + seq->log2_min_transform_block_size_minus2 = 0; + seq->log2_diff_max_min_transform_block_size = 3; + // Full transform hierarchy allowed (2-5). + seq->max_transform_hierarchy_depth_inter = 3; + seq->max_transform_hierarchy_depth_intra = 3; + + seq->vui_parameters_present_flag = 0; + } + + { + for(i = 0; i < FF_ARRAY_ELEMS(pic->reference_frames); i++) { + pic->reference_frames[i].picture_id = VA_INVALID_ID; + pic->reference_frames[i].flags = VA_PICTURE_HEVC_INVALID; + } + + pic->collocated_ref_pic_index = 0xff; + + pic->last_picture = 0; + + pic->pic_init_qp = ctx->fixed_qp; + + pic->diff_cu_qp_delta_depth = 0; + pic->pps_cb_qp_offset = 0; + pic->pps_cr_qp_offset = 0; + + // tiles_enabled_flag == 0, so ignore num_tile_(rows|columns)_minus1. + + pic->log2_parallel_merge_level_minus2 = 0; + + // No limit on size. + pic->ctu_max_bitsize_allowed = 0; + + pic->num_ref_idx_l0_default_active_minus1 = 0; + pic->num_ref_idx_l1_default_active_minus1 = 0; + + pic->slice_pic_parameter_set_id = 0; + + pic->pic_fields.bits.screen_content_flag = 0; + pic->pic_fields.bits.enable_gpu_weighted_prediction = 0; + + //pic->pic_fields.bits.cu_qp_delta_enabled_flag = 1; + } + + { + misc->video_parameter_set_id = 5; + misc->seq_parameter_set_id = 5; + + misc->vps_max_layers_minus1 = 0; + misc->vps_max_sub_layers_minus1 = 0; + misc->vps_temporal_id_nesting_flag = 1; + misc->sps_max_sub_layers_minus1 = 0; + misc->sps_temporal_id_nesting_flag = 1; + + for(i = 0; i < 32; i++) { + misc->general_profile_compatibility_flag[i] = + (i == seq->general_profile_idc); + } + + misc->general_progressive_source_flag = 1; + misc->general_interlaced_source_flag = 0; + misc->general_non_packed_constraint_flag = 0; + misc->general_frame_only_constraint_flag = 1; + misc->general_inbld_flag = 0; + + misc->log2_max_pic_order_cnt_lsb_minus4 = 4; + misc->vps_sub_layer_ordering_info_present_flag = 0; + misc->vps_max_dec_pic_buffering_minus1[0] = 0; + misc->vps_max_num_reorder_pics[0] = 0; + misc->vps_max_latency_increase_plus1[0] = 0; + misc->sps_sub_layer_ordering_info_present_flag = 0; + misc->sps_max_dec_pic_buffering_minus1[0] = 0; + misc->sps_max_num_reorder_pics[0] = 0; + misc->sps_max_latency_increase_plus1[0] = 0; + + misc->vps_timing_info_present_flag = 1; + misc->vps_num_units_in_tick = ctx->avctx->time_base.num; + misc->vps_time_scale = ctx->avctx->time_base.den; + misc->vps_poc_proportional_to_timing_flag = 1; + misc->vps_num_ticks_poc_diff_minus1 = 0; + + if(ctx->input_width != ctx->aligned_width || + ctx->input_height != ctx->aligned_height) { + misc->conformance_window_flag = 1; + misc->conf_win_left_offset = 0; + misc->conf_win_right_offset = + (ctx->aligned_width - ctx->input_width) / 2; + misc->conf_win_top_offset = 0; + misc->conf_win_bottom_offset = + (ctx->aligned_height - ctx->input_height) / 2; + } else { + misc->conformance_window_flag = 0; + } + + misc->num_short_term_ref_pic_sets = 1; + misc->st_ref_pic_set[0].num_negative_pics = 1; + misc->st_ref_pic_set[0].num_positive_pics = 0; + misc->st_ref_pic_set[0].delta_poc_s0_minus1[0] = 0; + misc->st_ref_pic_set[0].used_by_curr_pic_s0_flag[0] = 1; + + misc->vui_parameters_present_flag = 1; + if(ctx->avctx->sample_aspect_ratio.num != 0) { + misc->aspect_ratio_info_present_flag = 1; + if(ctx->avctx->sample_aspect_ratio.num == + ctx->avctx->sample_aspect_ratio.den) { + misc->aspect_ratio_idc = 1; + } else { + misc->aspect_ratio_idc = 255; // Extended SAR. + misc->sar_width = ctx->avctx->sample_aspect_ratio.num; + misc->sar_height = ctx->avctx->sample_aspect_ratio.den; + } + } + if(1) { + // Should this be conditional on some of these being set? + misc->video_signal_type_present_flag = 1; + misc->video_format = 5; // Unspecified. + misc->video_full_range_flag = 0; + misc->colour_description_present_flag = 1; + misc->colour_primaries = ctx->avctx->color_primaries; + misc->transfer_characteristics = ctx->avctx->color_trc; + misc->matrix_coeffs = ctx->avctx->colorspace; + } + } + + return 0; +} + +static int vaapi_hevc_encode_init_picture(VAAPIHEVCEncodeContext *ctx, + VAAPIHEVCEncodeFrame *current) +{ + VAEncPictureParameterBufferHEVC *pic = ¤t->pic_params; + VAEncSliceParameterBufferHEVC *slice = ¤t->slice_params; + VAAPIHEVCEncodeMiscPictureParams *misc = ¤t->misc_params; + int idr = current->type == FRAME_TYPE_I; + + memcpy(pic, &ctx->pic_params, sizeof(*pic)); + memset(slice, 0, sizeof(*slice)); + memset(misc, 0, sizeof(*misc)); + + { + memcpy(&pic->decoded_curr_pic, ¤t->pic, sizeof(VAPictureHEVC)); + + if(current->type != FRAME_TYPE_I) { + memcpy(&pic->reference_frames[0], + ¤t->refa->pic, sizeof(VAPictureHEVC)); + } + if(current->type == FRAME_TYPE_B) { + memcpy(&pic->reference_frames[1], + ¤t->refb->pic, sizeof(VAPictureHEVC)); + } + + pic->coded_buf = current->coded_data_id; + + pic->nal_unit_type = (idr ? NAL_IDR_W_RADL : NAL_TRAIL_R); + + pic->pic_fields.bits.idr_pic_flag = (idr ? 1 : 0); + pic->pic_fields.bits.coding_type = (idr ? 1 : 2); + + pic->pic_fields.bits.reference_pic_flag = 1; + } + + { + slice->slice_segment_address = 0; + slice->num_ctu_in_slice = ctx->ctu_width * ctx->ctu_height; + + slice->slice_type = current->type; + slice->slice_pic_parameter_set_id = 0; + + slice->num_ref_idx_l0_active_minus1 = 0; + slice->num_ref_idx_l1_active_minus1 = 0; + memcpy(slice->ref_pic_list0, pic->reference_frames, sizeof(pic->reference_frames)); + memcpy(slice->ref_pic_list1, pic->reference_frames, sizeof(pic->reference_frames)); + + slice->max_num_merge_cand = 5; + slice->slice_qp_delta = 0; + + slice->slice_fields.bits.last_slice_of_pic_flag = 1; + } + + { + misc->first_slice_segment_in_pic_flag = 1; + + misc->short_term_ref_pic_set_sps_flag = 1; + misc->short_term_ref_pic_idx = 0; + } + + return 0; +} + +static int vaapi_hevc_encode_picture(AVCodecContext *avctx, AVPacket *pkt, + const AVFrame *input_image, + int *got_packet) +{ + VAAPIHEVCEncodeContext *ctx = avctx->priv_data; + VASurfaceID input_surface, recon_surface; + VAAPIHEVCEncodeFrame *current; + AVFrame *source_image, *recon_image; + VACodedBufferSegment *buf_list, *buf; + VAStatus vas; + int err; + + av_log(ctx, AV_LOG_DEBUG, "Encode frame: %s, %ux%u (%"PRId64").\n", + av_get_pix_fmt_name(input_image->format), + input_image->width, input_image->height, input_image->pts); + + av_vaapi_lock_hardware_context(ctx->hardware_context); + + if(input_image->format == AV_PIX_FMT_VAAPI) { + source_image = 0; + input_surface = (VASurfaceID)input_image->data[3]; + + } else { + source_image = av_frame_alloc(); + if(!source_image) { + av_log(ctx, AV_LOG_ERROR, "Failed to allocate input frame."); + err = AVERROR(ENOMEM); + goto fail; + } + + err = av_vaapi_surface_pool_get(&ctx->input_pool, source_image); + if(err < 0) { + av_log(ctx, AV_LOG_ERROR, "Failed to allocate input frame " + "from surface pool: %d (%s).\n", err, av_err2str(err)); + goto fail; + } + + source_image->format = AV_PIX_FMT_VAAPI; + source_image->width = input_image->width; + source_image->height = input_image->height; + + err = av_vaapi_copy_to_surface(source_image, input_image); + if(err < 0) { + av_log(ctx, AV_LOG_ERROR, "Failed to copy to input surface: " + "%d (%s).\n", err, av_err2str(err)); + goto fail; + } + + input_surface = (VASurfaceID)source_image->data[3]; + } + av_log(ctx, AV_LOG_DEBUG, "Using surface %#x for input image.\n", + input_surface); + + recon_image = av_frame_alloc(); + if(!recon_image) { + av_log(ctx, AV_LOG_ERROR, "Failed to allocate reconstructed frame."); + err = AVERROR(ENOMEM); + goto fail; + } + + err = av_vaapi_surface_pool_get(&ctx->recon_pool, recon_image); + if(err < 0) { + av_log(ctx, AV_LOG_ERROR, "Failed to allocate reconstructed frame " + "from surface pool: %d (%s).\n", err, av_err2str(err)); + goto fail; + } + + recon_surface = (VASurfaceID)recon_image->data[3]; + av_log(ctx, AV_LOG_DEBUG, "Using surface %#x for reconstructed image.\n", + recon_surface); + + if(ctx->previous_frame != ctx->current_frame) { + av_frame_free(&ctx->dpb[ctx->previous_frame].frame); + } + + ctx->previous_frame = ctx->current_frame; + ctx->current_frame = (ctx->current_frame + 1) % MAX_DPB_PICS; + { + current = &ctx->dpb[ctx->current_frame]; + + if(ctx->poc < 0 || + ctx->poc == ctx->options.idr_interval) + current->type = FRAME_TYPE_I; + else + current->type = FRAME_TYPE_P; + + if(current->type == FRAME_TYPE_I) + ctx->poc = 0; + else + ++ctx->poc; + current->poc = ctx->poc; + + if(current->type == FRAME_TYPE_I) { + current->refa = 0; + current->refb = 0; + } else if(current->type == FRAME_TYPE_P) { + current->refa = &ctx->dpb[ctx->previous_frame]; + current->refb = 0; + } else { + av_assert0(0); + } + + memset(¤t->pic, 0, sizeof(VAPictureHEVC)); + current->pic.picture_id = recon_surface; + current->pic.pic_order_cnt = current->poc; + + current->frame = recon_image; + } + av_log(ctx, AV_LOG_DEBUG, "Encoding as frame as %s (%d).\n", + current->type == FRAME_TYPE_I ? "I" : + current->type == FRAME_TYPE_P ? "P" : "B", current->poc); + + vas = vaBeginPicture(ctx->hardware_context->display, ctx->codec.context_id, + input_surface); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to attach new picture: " + "%d (%s).\n", vas, vaErrorStr(vas)); + err = AVERROR_EXTERNAL; + goto fail; + } + + vaapi_hevc_encode_init_picture(ctx, current); + + if(current->type == FRAME_TYPE_I) { + err = vaapi_hevc_render_sequence(ctx); + if(err < 0) goto fail; + } + + err = vaapi_hevc_render_picture(ctx, current); + if(err < 0) goto fail; + + if(current->type == FRAME_TYPE_I) { + err = vaapi_hevc_render_packed_vps_sps(ctx); + if(err < 0) goto fail; + + err = vaapi_hevc_render_packed_pps(ctx); + if(err < 0) goto fail; + } + + err = vaapi_hevc_render_packed_slice(ctx, current); + if(err < 0) goto fail; + + err = vaapi_hevc_render_slice(ctx, current); + if(err < 0) goto fail; + + vas = vaEndPicture(ctx->hardware_context->display, ctx->codec.context_id); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to start picture processing: " + "%d (%s).\n", vas, vaErrorStr(vas)); + err = AVERROR_EXTERNAL; + goto fail; + } + + vas = vaSyncSurface(ctx->hardware_context->display, input_surface); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to sync to picture completion: " + "%d (%s).\n", vas, vaErrorStr(vas)); + err = AVERROR_EXTERNAL; + goto fail; + } + + buf_list = 0; + vas = vaMapBuffer(ctx->hardware_context->display, current->coded_data_id, + (void**)&buf_list); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to map output buffers: " + "%d (%s).\n", vas, vaErrorStr(vas)); + err = AVERROR_EXTERNAL; + goto fail; + } + + for(buf = buf_list; buf; buf = buf->next) { + av_log(ctx, AV_LOG_DEBUG, "Output buffer: %u bytes.\n", buf->size); + + err = av_new_packet(pkt, buf->size); + if(err < 0) { + av_log(ctx, AV_LOG_ERROR, "Failed to make output buffer " + "(%u bytes).\n", buf->size); + goto fail; + } + + memcpy(pkt->data, buf->buf, buf->size); + + if(current->type == FRAME_TYPE_I) + pkt->flags |= AV_PKT_FLAG_KEY; + + pkt->pts = input_image->pts; + + *got_packet = 1; + } + + vas = vaUnmapBuffer(ctx->hardware_context->display, current->coded_data_id); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to unmap output buffers: " + "%d (%s).\n", vas, vaErrorStr(vas)); + err = AVERROR_EXTERNAL; + goto fail; + } + + if(input_image->format != AV_PIX_FMT_VAAPI) + av_frame_free(&source_image); + + err = 0; + fail: + av_vaapi_unlock_hardware_context(ctx->hardware_context); + return err; +} + +static VAConfigAttrib config_attributes[] = { + { .type = VAConfigAttribRTFormat, + .value = VA_RT_FORMAT_YUV420 }, + { .type = VAConfigAttribRateControl, + .value = VA_RC_CQP }, + { .type = VAConfigAttribEncPackedHeaders, + .value = 0 }, +}; + +static av_cold int vaapi_hevc_encode_init(AVCodecContext *avctx) +{ + VAAPIHEVCEncodeContext *ctx = avctx->priv_data; + VAStatus vas; + int i, err; + + if(ctx->options.hardware_context == 0) { + av_log(ctx, AV_LOG_ERROR, "VAAPI encode requires hardware context.\n"); + return AVERROR(EINVAL); + } + ctx->hardware_context = + (AVVAAPIHardwareContext*)ctx->options.hardware_context; + + ctx->avctx = avctx; + + ctx->va_profile = VAProfileHEVCMain; + ctx->level = -1; + if(sscanf(ctx->options.level, "%d", &ctx->level) <= 0 || + ctx->level < 0 || ctx->level > 63) { + av_log(ctx, AV_LOG_ERROR, "Invaid level '%s'.\n", ctx->options.level); + return AVERROR(EINVAL); + } + + if(ctx->options.qp >= 0) { + ctx->rc_mode = VA_RC_CQP; + } else { + // Default to fixed-QP 26. + ctx->rc_mode = VA_RC_CQP; + ctx->options.qp = 26; + } + av_log(ctx, AV_LOG_VERBOSE, "Using constant-QP mode at %d.\n", + ctx->options.qp); + + ctx->input_width = avctx->width; + ctx->input_height = avctx->height; + + ctx->aligned_width = FFALIGN(ctx->input_width, 16); + ctx->aligned_height = FFALIGN(ctx->input_height, 16); + ctx->ctu_width = FFALIGN(ctx->aligned_width, 32) / 32; + ctx->ctu_height = FFALIGN(ctx->aligned_height, 32) / 32; + + av_log(ctx, AV_LOG_VERBOSE, "Input %ux%u -> Aligned %ux%u -> CTU %ux%u.\n", + ctx->input_width, ctx->input_height, ctx->aligned_width, + ctx->aligned_height, ctx->ctu_width, ctx->ctu_height); + + ctx->fixed_qp = ctx->options.qp; + + ctx->poc = -1; + + av_vaapi_lock_hardware_context(ctx->hardware_context); + + if(avctx->pix_fmt == AV_PIX_FMT_VAAPI) { + // Just use the input surfaces directly. + ctx->input_is_vaapi = 1; + + } else { + AVVAAPISurfaceConfig *config = &ctx->input_config; + + config->rt_format = VA_RT_FORMAT_YUV420; + config->av_format = AV_PIX_FMT_VAAPI; + + config->image_format.fourcc = VA_FOURCC_NV12; + config->image_format.bits_per_pixel = 12; + + config->width = ctx->aligned_width; + config->height = ctx->aligned_height; + + config->attribute_count = 0; + + ctx->input_is_vaapi = 0; + + err = av_vaapi_surface_pool_init(&ctx->input_pool, + ctx->hardware_context, + config, INPUT_PICS); + if(err < 0) { + av_log(ctx, AV_LOG_ERROR, "Failed to create input surface pool: " + "%d (%s).\n", err, av_err2str(err)); + goto fail; + } + } + + { + AVVAAPISurfaceConfig *config = &ctx->recon_config; + + config->rt_format = VA_RT_FORMAT_YUV420; + config->av_format = AV_PIX_FMT_VAAPI; + + config->image_format.fourcc = VA_FOURCC_NV12; + config->image_format.bits_per_pixel = 12; + + config->width = ctx->aligned_width; + config->height = ctx->aligned_height; + + config->attribute_count = 0; + + err = av_vaapi_surface_pool_init(&ctx->recon_pool, + ctx->hardware_context, + config, MAX_DPB_PICS); + if(err < 0) { + av_log(ctx, AV_LOG_ERROR, "Failed to create recon surface pool: " + "%d (%s).\n", err, av_err2str(err)); + goto fail; + } + } + + { + AVVAAPIPipelineConfig *config = &ctx->codec_config; + + config->profile = ctx->va_profile; + config->entrypoint = VAEntrypointEncSlice; + + config->width = ctx->aligned_width; + config->height = ctx->aligned_height; + + config->attribute_count = FF_ARRAY_ELEMS(config_attributes); + config->attributes = config_attributes; + } + + err = av_vaapi_pipeline_init(&ctx->codec, ctx->hardware_context, + &ctx->codec_config, &ctx->recon_pool); + if(err < 0) { + av_log(ctx, AV_LOG_ERROR, "Failed to create codec: %d (%s).\n", + err, av_err2str(err)); + goto fail; + } + + for(i = 0; i < MAX_DPB_PICS; i++) { + vas = vaCreateBuffer(ctx->hardware_context->display, + ctx->codec.context_id, + VAEncCodedBufferType, + 1048576, 1, 0, &ctx->dpb[i].coded_data_id); + if(vas != VA_STATUS_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to create buffer for " + "coded data: %d (%s).\n", vas, vaErrorStr(vas)); + err = AVERROR_EXTERNAL; + goto fail; + } + av_log(ctx, AV_LOG_TRACE, "Coded data buffer %d is %#x.\n", + i, ctx->dpb[i].coded_data_id); + } + + av_vaapi_unlock_hardware_context(ctx->hardware_context); + + av_log(ctx, AV_LOG_VERBOSE, "Started VAAPI H.265 encoder.\n"); + + vaapi_hevc_encode_init_stream(ctx); + + return 0; + + fail: + av_vaapi_unlock_hardware_context(ctx->hardware_context); + return err; +} + +static av_cold int vaapi_hevc_encode_close(AVCodecContext *avctx) +{ + VAAPIHEVCEncodeContext *ctx = avctx->priv_data; + int err; + + av_vaapi_lock_hardware_context(ctx->hardware_context); + + err = av_vaapi_pipeline_uninit(&ctx->codec); + if(err < 0) { + av_log(ctx, AV_LOG_ERROR, "Failed to destroy codec: %d (%s).\n", + err, av_err2str(err)); + } + + err = av_vaapi_surface_pool_uninit(&ctx->recon_pool); + if(err < 0) { + av_log(ctx, AV_LOG_ERROR, "Failed to uninitialise recon " + "surface pool: %d (%s).\n", err, av_err2str(err)); + } + + if(!ctx->input_is_vaapi) { + err = av_vaapi_surface_pool_uninit(&ctx->input_pool); + if(err < 0) { + av_log(ctx, AV_LOG_ERROR, "Failed to uninitialise input " + "surface pool: %d (%s).\n", err, av_err2str(err)); + } + } + + av_vaapi_unlock_hardware_context(ctx->hardware_context); + + return 0; +} + +#define OFFSET(member) offsetof(VAAPIHEVCEncodeContext, options.member) +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM) +static const AVOption vaapi_hevc_options[] = { + { "hardware_context", "VAAPI hardware context", + OFFSET(hardware_context), AV_OPT_TYPE_INT64, + { .i64 = 0 }, INT64_MIN, INT64_MAX, AV_OPT_FLAG_VIDEO_PARAM }, + { "level", "Set H.265 level", + OFFSET(level), AV_OPT_TYPE_STRING, + { .str = "52" }, 0, 0, FLAGS }, + { "qp", "Use constant quantisation parameter", + OFFSET(qp), AV_OPT_TYPE_INT, + { .i64 = -1 }, -1, MAX_QP, FLAGS }, + { "idr_interval", "Number of frames between IDR frames (0 = all intra)", + OFFSET(idr_interval), AV_OPT_TYPE_INT, + { .i64 = -1 }, -1, INT_MAX, FLAGS }, + { 0 } +}; + +static const AVClass vaapi_hevc_class = { + .class_name = "vaapi_hevc", + .item_name = av_default_item_name, + .option = vaapi_hevc_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +AVCodec ff_hevc_vaapi_encoder = { + .name = "vaapi_hevc", + .long_name = NULL_IF_CONFIG_SMALL("H.265/HEVC (VAAPI)"), + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_HEVC, + .priv_data_size = sizeof(VAAPIHEVCEncodeContext), + .init = &vaapi_hevc_encode_init, + .encode2 = &vaapi_hevc_encode_picture, + .close = &vaapi_hevc_encode_close, + .priv_class = &vaapi_hevc_class, + .pix_fmts = (const enum AVPixelFormat[]) { + AV_PIX_FMT_VAAPI, + AV_PIX_FMT_NV12, + AV_PIX_FMT_NONE, + }, +}; -- 2.7.0.rc3 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel