The branch, master has been updated
via d00f41f21347a93cbcf533dceccd7532bd925067 (commit)
from 28461f2c4332e34cd50ed0cfd947b8ac1bc5fe08 (commit)
- Log -----------------------------------------------------------------
commit d00f41f21347a93cbcf533dceccd7532bd925067
Author: averne <[email protected]>
AuthorDate: Sat Nov 8 19:57:37 2025 +0100
Commit: Lynne <[email protected]>
CommitDate: Sat Nov 8 22:31:21 2025 +0000
vulkan/prores: forward quantization parameter to the IDCT shader
The qScale syntax element has a maximum value of 512, which would overflow
the 16-bit store from the VLD shader in extreme cases.
This fixes that edge case by forwarding the element in a storage buffer,
and applying the inverse quantization fully in the IDCT shader.
diff --git a/libavcodec/vulkan/prores_idct.comp
b/libavcodec/vulkan/prores_idct.comp
index 645cb02979..4b39b3d8ae 100644
--- a/libavcodec/vulkan/prores_idct.comp
+++ b/libavcodec/vulkan/prores_idct.comp
@@ -87,7 +87,7 @@ void main(void)
uint chroma_shift = comp != 0 ? log2_chroma_w : 0;
bool act = gid.x < mb_width << (4 - chroma_shift);
- /* Coalesced load of DCT coeffs in shared memory, second part of inverse
quantization */
+ /* Coalesced load of DCT coeffs in shared memory, inverse quantization */
if (act) {
/**
* According to spec indexing an array in push constant memory with
@@ -95,9 +95,14 @@ void main(void)
* so copy the whole matrix locally.
*/
uint8_t[64] qmat = comp == 0 ? qmat_luma : qmat_chroma;
+
+ /* Table 15 */
+ uint8_t qidx = quant_idx[(gid.y >> 1) * mb_width + (gid.x >> 4)];
+ int qscale = qidx > 128 ? (qidx - 96) << 2 : qidx;
+
[[unroll]] for (uint i = 0; i < 8; ++i) {
- int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) |
i))), 16);
- blocks[block][i * 9 + idx] = float(v * int(qmat[(i << 3) + idx]));
+ int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) +
i))), 16);
+ blocks[block][i * 9 + idx] = float(v * qscale * int(qmat[(i << 3)
+ idx]));
}
}
@@ -117,7 +122,7 @@ void main(void)
if (act) {
[[unroll]] for (uint i = 0; i < 8; ++i) {
float v = blocks[block][i * 9 + idx] * fact + off;
- put_px(comp, ivec2(gid.x, (gid.y << 3) | i), clamp(int(v), 0,
maxv));
+ put_px(comp, ivec2(gid.x, (gid.y << 3) + i), clamp(int(v), 0,
maxv));
}
}
}
diff --git a/libavcodec/vulkan/prores_vld.comp
b/libavcodec/vulkan/prores_vld.comp
index 00e78e08ff..298a5baf4c 100644
--- a/libavcodec/vulkan/prores_vld.comp
+++ b/libavcodec/vulkan/prores_vld.comp
@@ -22,9 +22,9 @@
void put_px(uint tex_idx, ivec2 pos, uint v)
{
#ifndef INTERLACED
- imageStore(dst[tex_idx], pos, uvec4(v));
+ imageStore(dst[tex_idx], pos, uvec4(uint16_t(v)));
#else
- imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field),
uvec4(v));
+ imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field),
uvec4(uint16_t(v)));
#endif
}
@@ -57,7 +57,7 @@ uint decode_codeword(inout GetBitContext gb, int codebook)
}
}
-void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
+void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count)
{
uvec3 gid = gl_GlobalInvocationID;
uint is_luma = uint(gid.z == 0);
@@ -70,7 +70,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint
mb_count, uint qscale)
{
/* First coeff */
uint c = to_signed(decode_codeword(gb, 0x650));
- put_px(gid.z, base_pos, c * qscale & 0xffff);
+ put_px(gid.z, base_pos, c);
/**
* Table 9, encoded as (last_rice_q << 0) | (krice or kexp << 4) |
((kexp or kexp + 1) << 8)
@@ -89,7 +89,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint
mb_count, uint qscale)
int s = int(prev_dc_diff) >> 31;
c += prev_dc_diff = (to_signed(cw) ^ s) - s;
- put_px(gid.z, base_pos + pos_to_block(i, is_luma), c * qscale &
0xffff);
+ put_px(gid.z, base_pos + pos_to_block(i, is_luma), c);
}
}
@@ -152,7 +152,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint
mb_count, uint qscale)
ivec2 bpos = ivec2(scan & 0xf, scan >> 4);
uint c = ((level + 1) ^ -s) + s;
- put_px(gid.z, base_pos + spos + bpos, c * qscale & 0xffff);
+ put_px(gid.z, base_pos + spos + bpos, c);
}
}
}
@@ -218,7 +218,7 @@ void decode_alpha(in GetBitContext gb, uvec2 mb_pos, uint
mb_count)
*/
uint val = (alpha << alpha_rescale_lshift) | (alpha >>
alpha_rescale_rshift);
for (uint end = pos + run; pos < end; ++pos)
- put_px(3, base_pos + ivec2(pos & block_mask, pos >> block_shift),
val & 0xffff);
+ put_px(3, base_pos + ivec2(pos & block_mask, pos >> block_shift),
val);
}
}
@@ -235,13 +235,8 @@ void main(void)
u8buf bs = u8buf(slice_data + slice_off);
/* Decode slice header */
- uint hdr_size, y_size, u_size, v_size, a_size;
- hdr_size = bs[0].v >> 3;
-
- /* Table 15 */
- uint qidx = clamp(bs[1].v, 1, 224),
- qscale = qidx > 128 ? (qidx - 96) << 2 : qidx;
-
+ uint hdr_size, qidx, y_size, u_size, v_size, a_size;
+ hdr_size = bs[0].v >> 3, qidx = clamp(bs[1].v, 1, 224);
y_size = (uint(bs[2].v) << 8) | bs[3].v;
u_size = (uint(bs[4].v) << 8) | bs[5].v;
@@ -308,10 +303,17 @@ void main(void)
uint mb_count = 1 << log2_width;
if (gid.z < 3) {
- /* Color entropy decoding, inverse scanning, first part of inverse
quantization */
- decode_comp(gb, uvec2(mb_x, mb_y), mb_count, qscale);
+ /* Color entropy decoding, inverse scanning */
+ decode_comp(gb, uvec2(mb_x, mb_y), mb_count);
} else {
/* Alpha entropy decoding */
decode_alpha(gb, uvec2(mb_x, mb_y), mb_count);
}
+
+ /* Forward the quantization index to the IDCT shader */
+ if (gid.z == 0) {
+ uint base = mb_y * mb_width + mb_x;
+ for (uint i = 0; i < mb_count; ++i)
+ quant_idx[base + i] = uint8_t(qidx);
+ }
}
diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c
index 2602be112b..8849e337c3 100644
--- a/libavcodec/vulkan_prores.c
+++ b/libavcodec/vulkan_prores.c
@@ -37,11 +37,13 @@ const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc = {
typedef struct ProresVulkanDecodePicture {
FFVulkanDecodePicture vp;
- AVBufferRef *slice_offset_buf;
- uint32_t slice_num;
+ AVBufferRef *metadata_buf;
uint32_t bitstream_start;
uint32_t bitstream_size;
+ uint32_t slice_num;
+
+ uint32_t slice_offsets_sz, mb_params_sz;
} ProresVulkanDecodePicture;
typedef struct ProresVulkanDecodeContext {
@@ -51,7 +53,7 @@ typedef struct ProresVulkanDecodeContext {
FFVulkanShader idct;
} shaders[2]; /* Progressive/interlaced */
- AVBufferPool *slice_offset_pool;
+ AVBufferPool *metadata_pool;
} ProresVulkanDecodeContext;
typedef struct ProresVkParameters {
@@ -88,6 +90,9 @@ static int vk_prores_start_frame(AVCodecContext
*avctx,
int err;
+ pp->slice_offsets_sz = (pr->slice_count + 1) * sizeof(uint32_t);
+ pp->mb_params_sz = pr->mb_width * pr->mb_height * sizeof(uint8_t);
+
/* Host map the input slices data if supported */
if (!vp->slices_buf && ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
RET(ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data,
@@ -96,11 +101,10 @@ static int vk_prores_start_frame(AVCodecContext
*avctx,
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT));
/* Allocate slice offsets buffer */
- RET(ff_vk_get_pooled_buffer(&ctx->s, &pv->slice_offset_pool,
- &pp->slice_offset_buf,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
- VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
- NULL, (pr->slice_count + 1) * sizeof(uint32_t),
+ RET(ff_vk_get_pooled_buffer(&ctx->s, &pv->metadata_pool,
+ &pp->metadata_buf,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ NULL, pp->slice_offsets_sz + pp->mb_params_sz,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
@@ -123,7 +127,7 @@ static int vk_prores_decode_slice(AVCodecContext *avctx,
ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
FFVulkanDecodePicture *vp = &pp->vp;
- FFVkBuffer *slice_offset = (FFVkBuffer *)pp->slice_offset_buf->data;
+ FFVkBuffer *slice_offset = (FFVkBuffer *)pp->metadata_buf->data;
FFVkBuffer *slices_buf = vp->slices_buf ? (FFVkBuffer
*)vp->slices_buf->data : NULL;
/* Skip picture header */
@@ -158,7 +162,7 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
FFVulkanDecodePicture *vp = &pp->vp;
ProresVkParameters pd;
- FFVkBuffer *slice_data, *slice_offsets;
+ FFVkBuffer *slice_data, *metadata;
struct ProresVulkanShaderVariants *shaders;
VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
VkBufferMemoryBarrier2 buf_bar[2];
@@ -172,8 +176,8 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
if (!pix_desc)
return AVERROR(EINVAL);
- slice_data = (FFVkBuffer *)vp->slices_buf->data;
- slice_offsets = (FFVkBuffer *)pp->slice_offset_buf->data;
+ slice_data = (FFVkBuffer *)vp->slices_buf->data;
+ metadata = (FFVkBuffer *)pp->metadata_buf->data;
shaders = &pv->shaders[pr->frame_type != 0];
@@ -209,13 +213,13 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
pr->frame));
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec,
- (AVBufferRef *[]){ vp->slices_buf,
pp->slice_offset_buf },
+ (AVBufferRef *[]){ vp->slices_buf,
pp->metadata_buf, },
2, 0));
/* Transfer ownership to the exec context */
- vp->slices_buf = pp->slice_offset_buf = NULL;
+ vp->slices_buf = pp->metadata_buf = NULL;
- /* Input frame barrier */
+ /* Input barrier */
ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
@@ -223,6 +227,21 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = metadata->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = metadata->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = metadata->buf,
+ .offset = pp->slice_offsets_sz,
+ .size = pp->mb_params_sz,
+ };
+ metadata->stage = buf_bar[0].dstStageMask;
+ metadata->access = buf_bar[0].dstAccessMask;
+
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
@@ -267,12 +286,17 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
/* Entropy decode */
ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->vld,
0, 0, 0,
- slice_offsets,
- 0, (pp->slice_num + 1) * sizeof(uint32_t),
+ metadata, 0,
+ pp->slice_offsets_sz,
+ VK_FORMAT_UNDEFINED);
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->vld,
+ 0, 1, 0,
+ metadata, pp->slice_offsets_sz,
+ pp->mb_params_sz,
VK_FORMAT_UNDEFINED);
ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->vld,
pr->frame, vp->view.out,
- 0, 1,
+ 0, 2,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
@@ -286,7 +310,6 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
3 + !!pr->alpha_info);
/* Synchronize vld and idct shaders */
- nb_img_bar = 0;
ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
@@ -294,6 +317,21 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = metadata->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = metadata->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = metadata->buf,
+ .offset = pp->slice_offsets_sz,
+ .size = pp->mb_params_sz,
+ };
+ metadata->stage = buf_bar[0].dstStageMask;
+ metadata->access = buf_bar[0].dstAccessMask;
+
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
@@ -304,9 +342,14 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
nb_img_bar = nb_buf_bar = 0;
/* Inverse transform */
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->idct,
+ 0, 0, 0,
+ metadata, pp->slice_offsets_sz,
+ pp->mb_params_sz,
+ VK_FORMAT_UNDEFINED);
ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->idct,
pr->frame, vp->view.out,
- 0, 0,
+ 0, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
@@ -406,23 +449,23 @@ static void vk_decode_prores_uninit(FFVulkanDecodeShared
*ctx)
ff_vk_shader_free(&ctx->s, &pv->shaders[i].idct);
}
- av_buffer_pool_uninit(&pv->slice_offset_pool);
+ av_buffer_pool_uninit(&pv->metadata_pool);
av_freep(&pv);
}
static int vk_decode_prores_init(AVCodecContext *avctx)
{
- FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
- FFVulkanDecodeShared *ctx = NULL;
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = NULL;
AVHWFramesContext *out_frames_ctx;
ProresVulkanDecodeContext *pv;
FFVkSPIRVCompiler *spv;
FFVulkanDescriptorSetBinding *desc_set;
- int max_num_slices, i, err;
+ int max_num_mbs, i, err;
- max_num_slices = (avctx->coded_width >> 4) * (avctx->coded_height >> 4);
+ max_num_mbs = (avctx->coded_width >> 4) * (avctx->coded_height >> 4);
spv = ff_vk_spirv_init();
if (!spv) {
@@ -471,7 +514,15 @@ static int vk_decode_prores_init(AVCodecContext *avctx)
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_quali = "readonly",
.buf_content = "uint32_t slice_offsets",
- .buf_elems = max_num_slices + 1,
+ .buf_elems = max_num_mbs + 1,
+ },
+ {
+ .name = "quant_idx_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_quali = "writeonly",
+ .buf_content = "uint8_t quant_idx",
+ .buf_elems = max_num_mbs,
},
{
.name = "dst",
@@ -485,10 +536,18 @@ static int vk_decode_prores_init(AVCodecContext *avctx)
},
};
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->vld,
- "prores_dec_vld", "main", desc_set, 2,
+ "prores_dec_vld", "main", desc_set, 3,
ff_source_prores_vld_comp, 0x080801, i));
desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "quant_idx_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_quali = "readonly",
+ .buf_content = "uint8_t quant_idx",
+ .buf_elems = max_num_mbs,
+ },
{
.name = "dst",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
@@ -500,7 +559,7 @@ static int vk_decode_prores_init(AVCodecContext *avctx)
},
};
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->idct,
- "prores_dec_idct", "main", desc_set, 1,
+ "prores_dec_idct", "main", desc_set, 2,
ff_source_prores_idct_comp, 0x200201, i));
}
-----------------------------------------------------------------------
Summary of changes:
libavcodec/vulkan/prores_idct.comp | 13 +++--
libavcodec/vulkan/prores_vld.comp | 34 +++++------
libavcodec/vulkan_prores.c | 115 ++++++++++++++++++++++++++++---------
3 files changed, 114 insertions(+), 48 deletions(-)
hooks/post-receive
--
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]