vulkan_av1: port to the new stable API

Lynne Sun, 11 Feb 2024 07:13:19 -0800

Feb 11, 2024, 12:58 by [email protected]:

> On 10/02/2024 22:39, Lynne wrote:
>
>>  11 files changed, 303 insertions(+), 669 deletions(-)
>>  delete mode 100644 libavcodec/vulkan_video_codec_av1std.h
>>  delete mode 100644 libavcodec/vulkan_video_codec_av1std_decode.h
>>
>
> You need something in configure to detect whether the AV1 header is available 
> so it can build.  (Currently this passes configure and errors out at build 
> time for me with 1.3.275 headers.)
>


This affects multiple files, and it's a bit too much to add an ifdef,
so I've added a version check (1.3.277).


>> -    StdVideoAV1MESATile            tiles[MAX_TILES];
>> -    StdVideoAV1MESATileList        tile_list;
>> -    const uint32_t                *tile_offsets;
>> +    uint32_t tile_count;
>> +    uint32_t tile_offsets_s[MAX_TILES];
>> +    uint32_t tile_sizes[MAX_TILES];
>>
>
> I don't see any check before writing things to this array.  What happens if 
> you get a non-conforming stream with more than 256 tiles?
>

Fixed, added a check in vk_av1_decode_slice().


>> +
>> +    for (int i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++) {
>> +        vkav1_std_ref->SavedOrderHints[i] = cbs_ctx->order_hints[i];
>>
>
> "SavedOrderHints is interpreted as defined in section 7.20 of the AV1 
> Specification."
>
> This doesn't look right for frames in the DPB.  SavedOrderHints in 7.20 is a 
> two-dimensional array containing OrderHints when that frame was decoded, not 
> the reference order hints of the current frame.
>

SavedOrderHints is a stack that is simply pushed at the time of decoding,
isn't it?
I think this is correct, as these are derived from the bitstream values before
decoding.


>> +        vkav1_std_ref->RefFrameSignBias |= cbs_ctx->ref_frame_sign_bias[i] 
>> << i;
>> +    }
>> +
>> +    *vkav1_ref = (VkVideoDecodeAV1DpbSlotInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR,
>> +        .pStdReferenceInfo = vkav1_std_ref,
>>  };
>>
>>  for (unsigned i = 0; i < 7; i++) {
>>  const int idx = pic->raw_frame_header->ref_frame_idx[i];
>> -        vkav1_ref->ref_order_hint[i] = 
>> pic->raw_frame_header->ref_order_hint[idx];
>> +        vkav1_std_ref->SavedOrderHints[i] = 
>> pic->raw_frame_header->ref_order_hint[idx];
>>
>
> Is this overwriting some of the above?
>

Yes, removed.


>> +    for (int i = 0; i < STD_VIDEO_AV1_MAX_SEGMENTS; i++) {
>> +        for (int j = 0; j < STD_VIDEO_AV1_SEG_LVL_MAX; j++) {
>> +            ap->segmentation.FeatureEnabled[i] |= 
>> frame_header->feature_enabled[i][j] << j;
>>
>
> "the elements of FeatureEnabled are bitmasks where bit index i of element j 
> corresponds to FeatureEnabled[i][j] as defined in section 6.8.13 of the AV1 
> Specification"
>
> This looks like you have i and j swapped?
>

The header define it as:
uint8_t FeatureEnabled[STD_VIDEO_AV1_MAX_SEGMENTS];
We define it as:
uint8_t feature_enabled[AV1_MAX_SEGMENTS][AV1_SEG_LVL_MAX];

So FeatureEnabled[i] |= feature_enabled[i][j] << j;
should be correct.


>> +            ap->segmentation.FeatureData[i][j] = 
>> frame_header->feature_value[i][j];
>> +        }
>> +    }
>> +
>> +    ap->loop_filter = (StdVideoAV1LoopFilter) {
>> +        .flags = (StdVideoAV1LoopFilterFlags) {
>> +            .loop_filter_delta_enabled = 
>> frame_header->loop_filter_delta_enabled,
>> +            .loop_filter_delta_update = 
>> frame_header->loop_filter_delta_update,
>> +        },
>> +        .loop_filter_sharpness = frame_header->loop_filter_sharpness,
>> +    };
>> +
>> +    for (int i = 0; i < STD_VIDEO_AV1_MAX_LOOP_FILTER_STRENGTHS; i++)
>> +        ap->loop_filter.loop_filter_level[i] = 
>> frame_header->loop_filter_level[i];
>> +    for (int i = 0; i < STD_VIDEO_AV1_LOOP_FILTER_ADJUSTMENTS; i++)
>> +        ap->loop_filter.loop_filter_mode_deltas[i] = 
>> frame_header->loop_filter_mode_deltas[i];
>> +
>> +    ap->cdef = (StdVideoAV1CDEF) {
>> +        .cdef_damping_minus_3 = frame_header->cdef_damping_minus_3,
>> +        .cdef_bits = frame_header->cdef_bits,
>> +    };
>> +
>> +    uses_lr = frame_header->lr_type[0] || frame_header->lr_type[1] || 
>> frame_header->lr_type[2],
>> +    ap->loop_restoration = (StdVideoAV1LoopRestoration) {
>> +        .FrameRestorationType[0] = remap_lr_type[frame_header->lr_type[0]],
>> +        .FrameRestorationType[1] = remap_lr_type[frame_header->lr_type[1]],
>> +        .FrameRestorationType[2] = remap_lr_type[frame_header->lr_type[2]],
>> +        .LoopRestorationSize[0] = 1 + frame_header->lr_unit_shift,
>> +        .LoopRestorationSize[1] = 1 + frame_header->lr_unit_shift - 
>> frame_header->lr_uv_shift,
>> +        .LoopRestorationSize[2] = 1 + frame_header->lr_unit_shift - 
>> frame_header->lr_uv_shift,
>>
>
> The standard has:
>
> LoopRestorationSize[ 0 ] = RESTORATION_TILESIZE_MAX >> (2 - lr_unit_shift)
> LoopRestorationSize[ 1 ] = LoopRestorationSize[ 0 ] >> lr_uv_shift
> LoopRestorationSize[ 2 ] = LoopRestorationSize[ 0 ] >> lr_uv_shift
>
> The values here look like log2 of these rather than what they are meant to be?
>

Correct, fixed. Added an AV1_RESTORATION_TILESIZE_MAX constant to av1.h
and calculated them as the spec requires.


>> +    };
>> +
>> +    ap->film_grain = (StdVideoAV1FilmGrain) {
>> +        .flags = (StdVideoAV1FilmGrainFlags) {
>> +            .chroma_scaling_from_luma = 
>> film_grain->chroma_scaling_from_luma,
>> +            .overlap_flag = film_grain->overlap_flag,
>> +            .clip_to_restricted_range = 
>> film_grain->clip_to_restricted_range,
>> +        },
>> +        .grain_scaling_minus_8 = film_grain->grain_scaling_minus_8,
>> +        .ar_coeff_lag = film_grain->ar_coeff_lag,
>> +        .ar_coeff_shift_minus_6 = film_grain->ar_coeff_shift_minus_6,
>> +        .grain_scale_shift = film_grain->grain_scale_shift,
>> +        .grain_seed = film_grain->grain_seed,
>>
>
> .film_grain_params_ref_idx got lost here.
>

Fixed.


>>
>>  if (apply_grain) {
>>  for (int i = 0; i < 14; i++) {
>>
>
> The Vulkan headers provide some nice constants for you to use here, use them 
> rather than magic numbers.
>
> (Maybe we should add our own versions of these in av1.h?)
>

Fixed. Did not add our own versions, used the header's.


>>
>> -        ap->av1_frame_header.film_grain.ar_coeffs_cb_plus_128[24] = 
>> film_grain->ar_coeffs_cb_plus_128[24];
>> -        ap->av1_frame_header.film_grain.ar_coeffs_cr_plus_128[24] = 
>> film_grain->ar_coeffs_cr_plus_128[24];
>> +        ap->film_grain.ar_coeffs_cb_plus_128[24] = 
>> film_grain->ar_coeffs_cb_plus_128[24];
>> +        ap->film_grain.ar_coeffs_cr_plus_128[24] = 
>> film_grain->ar_coeffs_cr_plus_128[24];
>>  }
>>
>>  /* Workaround for a spec issue. */
>>
>
> Is whatever this is still present?
>

Yes. Removed the comment, added a small TODO at the top.
Sad, all the hardware had the same limitations of requiring a
unique index for each frame, despite supporting image addresses
for the output image.
I think it's fixable with us as the indices should correspond to e.g. 
LAST_FRAME,
but since we don't have threading on right now, we can fix it later.


>> @@ -480,26 +528,22 @@ static int vk_av1_decode_slice(AVCodecContext *avctx,
>>  FFVulkanDecodePicture *vp = &ap->vp;
>>
>>  for (int i = s->tg_start; i <= s->tg_end; i++) {
>> -        ap->tiles[ap->tile_list.nb_tiles] = (StdVideoAV1MESATile) {
>> -            .size     = s->tile_group_info[i].tile_size,
>> -            .offset   = s->tile_group_info[i].tile_offset,
>> -            .row      = s->tile_group_info[i].tile_row,
>> -            .column   = s->tile_group_info[i].tile_column,
>> -            .tg_start = s->tg_start,
>> -            .tg_end   = s->tg_end,
>> -        };
>> +
>> +        ap->tile_offsets_s[ap->tile_count] = 
>> s->tile_group_info[i].tile_offset;
>> +        ap->tile_sizes[ap->tile_count] = s->tile_group_info[i].tile_size;
>>
>
> I'm confused by why this is indexed by ap->tile_count rather than, say, i - 
> s->tg_start?  Isn't this repeatedly overwriting one entry in the array and 
> never touching the others?
>

No, the call to ff_vk_decode_add_slice() modifies ap->tile_count.
It's how it's handled in H26x as well.
The logic is tiles are only added if the function call succeeds.


>>
>> err = ff_vk_decode_add_slice(avctx, vp,
>>  data + s->tile_group_info[i].tile_offset,
>>  s->tile_group_info[i].tile_size, 0,
>> -                                     &ap->tile_list.nb_tiles,
>> +                                     &ap->tile_count,
>>  &ap->tile_offsets);
>>  if (err < 0)
>>  return err;
>>
>> -        ap->tiles[ap->tile_list.nb_tiles - 1].offset = 
>> ap->tile_offsets[ap->tile_list.nb_tiles - 1];
>> +        ap->tile_offsets_s[ap->tile_count - 1] = 
>> ap->tile_offsets[ap->tile_count - 1];
>>
>
> And it's not at all clear what this is doing.
>

Leftover code. Removed, along with tile_offsets_s.


>>  av_log(avctx, AV_LOG_VERBOSE, "Decoder capabilities for %s profile 
>> \"%s\":\n",
>> @@ -911,7 +911,7 @@ static int vulkan_decode_get_profile(AVCodecContext 
>> *avctx, AVBufferRef *frames_
>>  /* TODO: make dedicated_dpb tunable */
>>  dec->dedicated_dpb = !(dec_caps->flags & 
>> VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR);
>>  dec->layered_dpb = !(caps->flags & 
>> VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR);
>> -    dec->external_fg = av1_caps.flags & 
>> VK_VIDEO_DECODE_AV1_CAPABILITY_EXTERNAL_FILM_GRAIN_MESA;
>> +    //    dec->external_fg = av1_caps.flags & 
>> VK_VIDEO_DECODE_AV1_CAPABILITY_EXTERNAL_FILM_GRAIN_KHR;
>>
>
> What does this mean for the film grain capabilities of the current setup?
>

No film grain on Intel.

Drivers will likely error out on profiles with filmGrainSupport = VK_TRUE,
so avctx->export_side_data must have AV_CODEC_EXPORT_DATA_FILM_GRAIN
for filmGrainSupport to be VK_FALSE, and the application will have to support
applying film grain by itself to be compliant.

We'll have to copy libplacebo's film grain generation eventually,
but I think erroring out is fine for now.

We could force side data to be exported in case the hardware doesn't
support it in a later patch.


>> +int ff_vk_av1_level_to_av(StdVideoAV1Level level)
>> +{
>> +    switch (level) {
>> +    case STD_VIDEO_AV1_LEVEL_2_0: return 20;
>> +    case STD_VIDEO_AV1_LEVEL_2_1: return 21;
>> +    case STD_VIDEO_AV1_LEVEL_2_2: return 22;
>> +    case STD_VIDEO_AV1_LEVEL_2_3: return 23;
>> +    case STD_VIDEO_AV1_LEVEL_3_0: return 30;
>> +    case STD_VIDEO_AV1_LEVEL_3_1: return 31;
>> +    case STD_VIDEO_AV1_LEVEL_3_2: return 32;
>> +    case STD_VIDEO_AV1_LEVEL_3_3: return 33;
>> +    case STD_VIDEO_AV1_LEVEL_4_0: return 40;
>> +    case STD_VIDEO_AV1_LEVEL_4_1: return 41;
>> +    case STD_VIDEO_AV1_LEVEL_4_2: return 42;
>> +    case STD_VIDEO_AV1_LEVEL_4_3: return 43;
>> +    case STD_VIDEO_AV1_LEVEL_5_0: return 50;
>> +    case STD_VIDEO_AV1_LEVEL_5_1: return 51;
>> +    case STD_VIDEO_AV1_LEVEL_5_2: return 52;
>> +    case STD_VIDEO_AV1_LEVEL_5_3: return 53;
>> +    case STD_VIDEO_AV1_LEVEL_6_0: return 60;
>> +    case STD_VIDEO_AV1_LEVEL_6_1: return 61;
>> +    case STD_VIDEO_AV1_LEVEL_6_2: return 62;
>> +    case STD_VIDEO_AV1_LEVEL_6_3: return 63;
>> +    case STD_VIDEO_AV1_LEVEL_7_0: return 70;
>> +    case STD_VIDEO_AV1_LEVEL_7_1: return 71;
>> +    case STD_VIDEO_AV1_LEVEL_7_2: return 72;
>> +    default:
>> +    case STD_VIDEO_AV1_LEVEL_7_3: return 73;
>> +    }
>> +}
>>
>
> Er, what?  Where have the numbers on the RHS of this come from?  The vulkan 
> header shows the numbers on the left as correctly matching the spec.
>

It's how lavc represents levels for H26x codecs, so
I thought it's the same for AV1. But checking that now, it isn't the case,
so removed it.

Thanks for the review, this caught a lot of issues.
Will send v4 to the ML shortly.
_______________________________________________
ffmpeg-devel mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH v3 2/2] lavc/vulkan_av1: port to the new stable API

Reply via email to