--- libavcodec/ffv1enc_vulkan.c | 21 --------- libavcodec/vulkan/ffv1_enc_setup.comp | 65 +++++++++++---------------- libavcodec/vulkan/rangecoder.comp | 28 +++++++----- 3 files changed, 42 insertions(+), 72 deletions(-)
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c index f4b54b8375..d78ba3aca8 100644 --- a/libavcodec/ffv1enc_vulkan.c +++ b/libavcodec/ffv1enc_vulkan.c @@ -88,9 +88,6 @@ typedef struct VulkanEncodeFFv1Context { AVBufferPool *out_data_pool; AVBufferPool *pkt_data_pool; - /* Temporary data buffer */ - AVBufferPool *tmp_data_pool; - /* Slice results buffer */ AVBufferPool *results_data_pool; @@ -303,11 +300,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, AVFrame *intermediate_frame = NULL; - /* Temporary data */ - size_t tmp_data_size; - AVBufferRef *tmp_data_ref; - FFVkBuffer *tmp_data_buf; - /* Slice data */ AVBufferRef *slice_data_ref; FFVkBuffer *slice_data_buf; @@ -352,17 +344,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, f->slice_count = f->max_slice_count; - /* Allocate temporary data buffer */ - tmp_data_size = f->slice_count*CONTEXT_SIZE; - RET(ff_vk_get_pooled_buffer(&fv->s, &fv->tmp_data_pool, - &tmp_data_ref, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - NULL, tmp_data_size, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)); - tmp_data_buf = (FFVkBuffer *)tmp_data_ref->data; - ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0); - /* Allocate slice buffer data */ if (f->ac == AC_GOLOMB_RICE) plane_state_size = 8; @@ -481,7 +462,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, ff_vk_exec_bind_shader(&fv->s, exec, &fv->setup); pd = (FFv1VkParameters) { .slice_state = slice_data_buf->address + f->slice_count*256, - .scratch_data = tmp_data_buf->address, .out_data = out_data_buf->address, .bits_per_raw_sample = f->bits_per_raw_sample, .sar[0] = pict->sample_aspect_ratio.num, @@ -1698,7 +1678,6 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx) av_buffer_pool_uninit(&fv->out_data_pool); av_buffer_pool_uninit(&fv->pkt_data_pool); - av_buffer_pool_uninit(&fv->tmp_data_pool); av_buffer_unref(&fv->keyframe_slice_data_ref); av_buffer_pool_uninit(&fv->slice_data_pool); diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp index 44c13404d8..d395770ba8 100644 --- a/libavcodec/vulkan/ffv1_enc_setup.comp +++ b/libavcodec/vulkan/ffv1_enc_setup.comp @@ -20,6 +20,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +uint8_t state[CONTEXT_SIZE]; + void init_slice(out SliceContext sc, const uint slice_idx) { /* Set coordinates */ @@ -45,67 +47,54 @@ void init_slice(out SliceContext sc, const uint slice_idx) slice_size_max); } -void put_rac_full(inout RangeCoder c, uint64_t state, bool bit) -{ - put_rac_norenorm(c, state, bit); - if (c.range < 0x100) - renorm_encoder_full(c); -} - -void put_symbol_unsigned(inout RangeCoder c, uint64_t state, uint v) +void put_usymbol(inout RangeCoder c, uint v) { bool is_nil = (v == 0); - put_rac_full(c, state, is_nil); + put_rac(c, state[0], is_nil); if (is_nil) return; const int e = findMSB(v); - state += 1; for (int i = 0; i < e; i++) - put_rac_full(c, state + min(i, 9), true); - put_rac_full(c, state + min(e, 9), false); + put_rac(c, state[1 + min(i, 9)], true); + put_rac(c, state[1 + min(e, 9)], false); - state += 21; for (int i = e - 1; i >= 0; i--) - put_rac_full(c, state + min(i, 9), bool(bitfieldExtract(v, i, 1))); + put_rac(c, state[22 + min(i, 9)], bool(bitfieldExtract(v, i, 1))); } -void write_slice_header(inout SliceContext sc, uint64_t state) +void write_slice_header(inout SliceContext sc) { - u8buf sb = u8buf(state); - [[unroll]] for (int i = 0; i < CONTEXT_SIZE; i++) - sb[i].v = uint8_t(128); + state[i] = uint8_t(128); - put_symbol_unsigned(sc.c, state, gl_WorkGroupID.x); - put_symbol_unsigned(sc.c, state, gl_WorkGroupID.y); - put_symbol_unsigned(sc.c, state, 0); - put_symbol_unsigned(sc.c, state, 0); + put_usymbol(sc.c, gl_WorkGroupID.x); + put_usymbol(sc.c, gl_WorkGroupID.y); + put_usymbol(sc.c, 0); + put_usymbol(sc.c, 0); for (int i = 0; i < codec_planes; i++) - put_symbol_unsigned(sc.c, state, sc.quant_table_idx[i]); + put_usymbol(sc.c, sc.quant_table_idx[i]); - put_symbol_unsigned(sc.c, state, pic_mode); - put_symbol_unsigned(sc.c, state, sar.x); - put_symbol_unsigned(sc.c, state, sar.y); + put_usymbol(sc.c, pic_mode); + put_usymbol(sc.c, sar.x); + put_usymbol(sc.c, sar.y); if (version >= 4) { - put_rac_full(sc.c, state, sc.slice_reset_contexts); - put_symbol_unsigned(sc.c, state, sc.slice_coding_mode); + put_rac(sc.c, state[0], sc.slice_reset_contexts); + put_usymbol(sc.c, sc.slice_coding_mode); if (sc.slice_coding_mode != 1 && colorspace == 1) { - put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.y); - put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.x); + put_usymbol(sc.c, sc.slice_rct_coef.y); + put_usymbol(sc.c, sc.slice_rct_coef.x); } } } -void write_frame_header(inout SliceContext sc, uint64_t state) +void write_frame_header(inout SliceContext sc) { - u8buf sb = u8buf(state); - sb.v = uint8_t(128); - put_rac_full(sc.c, state, bool(key_frame)); + put_rac_equi(sc.c, bool(key_frame)); } #ifdef GOLOMB @@ -122,16 +111,12 @@ void main(void) { const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - /* Write slice data */ - uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE; - u8buf sb = u8buf(scratch_state); - init_slice(slice_ctx[slice_idx], slice_idx); if (slice_idx == 0) - write_frame_header(slice_ctx[slice_idx], scratch_state); + write_frame_header(slice_ctx[slice_idx]); - write_slice_header(slice_ctx[slice_idx], scratch_state); + write_slice_header(slice_ctx[slice_idx]); #ifdef GOLOMB init_golomb(slice_ctx[slice_idx]); diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp index 256b5f0e79..1db42e1dc9 100644 --- a/libavcodec/vulkan/rangecoder.comp +++ b/libavcodec/vulkan/rangecoder.comp @@ -91,15 +91,13 @@ void renorm_encoder(inout RangeCoder c) bs[i].v = fill; } -void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit) +void put_rac_direct(inout RangeCoder c, uint8_t state, bool bit) { - u8buf sb = u8buf(state); - uint val = uint(sb.v); - int range1 = uint16_t((c.range * val) >> 8); + int range1 = uint16_t((c.range * state) >> 8); #ifdef DEBUG - if (val == 0) - debugPrintfEXT("Error: state is zero (addr: 0x%lx)", uint64_t(sb)); + if (state == 0) + debugPrintfEXT("Error: state is zero"); if (range1 >= c.range) debugPrintfEXT("Error: range1 >= c.range"); if (range1 <= 0) @@ -113,13 +111,21 @@ void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit) } else { c.range = diff; } +} - sb.v = zero_one_state[(uint(bit) << 8) + val]; +void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit) +{ + put_rac_direct(c, u8buf(state).v, bit); -#ifdef DEBUG - if (sb.v == 0) - debugPrintfEXT("Error: inserted zero state from tab %i idx %i", bit, val); -#endif + u8buf(state).v = zero_one_state[(uint(bit) << 8) + u8buf(state).v]; +} + +void put_rac(inout RangeCoder c, inout uint8_t state, bool bit) +{ + put_rac_direct(c, state, bit); + if (c.range < 0x100) + renorm_encoder_full(c); + state = zero_one_state[(uint(bit) << 8) + state]; } /* Equiprobable bit */ -- 2.49.0.395.g12beb8f557c _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".