This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 9872cb8c0e908db9dd2d834799e4e0de23f75bc0 Author: Lynne <[email protected]> AuthorDate: Fri Jun 12 14:30:26 2026 +0900 Commit: Lynne <[email protected]> CommitDate: Thu Jul 2 16:45:08 2026 +0900 avcodec/vulkan_apv: decode VLC codes branchlessly from a single window apv_read_vlc() dispatched on the three APV code layouts with two data- dependent branches and, for the long form, a second show/skip. The longest legal code plus its optional sign bit fits in a single 32-bit window, so compute all three candidate (value, length) pairs unconditionally and select with no divergent branches: one show_bits(32) and one skip_bits per code. apv_read_vlc_sign() additionally folds the trailing sign bit into the same window, removing the separate get_bit() refill that DC and AC levels did. Clamps on the long-form length guard corrupt streams without affecting legal decodes. --- libavcodec/vulkan/apv_decode.comp.glsl | 106 +++++++++++++++++++++++---------- 1 file changed, 76 insertions(+), 30 deletions(-) diff --git a/libavcodec/vulkan/apv_decode.comp.glsl b/libavcodec/vulkan/apv_decode.comp.glsl index af54ae2f29..fba896f696 100644 --- a/libavcodec/vulkan/apv_decode.comp.glsl +++ b/libavcodec/vulkan/apv_decode.comp.glsl @@ -56,34 +56,84 @@ layout (push_constant, scalar) uniform pushConstants { GetBitContext gb; +/* + * Decode one VLC code from a single 32-bit window, branchlessly. The longest + * legal APV code is 18 bits (3 + 2*5 + 5), so one show + one skip covers every + * case. The three layouts: + * 1xxx len 1+k, val = k bits after the 1 + * 00xxx len 2+k, val = (1<<k) + k bits + * 01 0^n 1 xxx len 3+2n+k, val = (((1<<n)+1)<<k) + (n+k bits) + */ int apv_read_vlc(int k) { - /* Top 32 bits, longest valid APV code is 1 + 2*5 + 5 = 16 bits */ + /* Top 32 bits; the longest legal code is 31 bits */ uint bits = show_bits(gb, 32); uint mask = (1u << k) - 1u; - /* 1xxx: short, length 1+k, value = next k bits */ - if (bits >= 0x80000000u) { - skip_bits(gb, 1 + k); - return int((bits >> (31 - k)) & mask); - } + bool caseA = bits >= 0x80000000u; + bool caseB = bits < 0x40000000u; - /* 00xxx: short, length 2+k, value = (1<<k) + next k bits */ - if (bits < 0x40000000u) { - skip_bits(gb, 2 + k); - return int((bits >> (30 - k)) & mask) + (1 << k); - } + int valA = int((bits >> (31 - k)) & mask); + int valB = int((bits >> (30 - k)) & mask) + (1 << k); - /* 01 prefix + (n leading zeros) + 1 + (n+k value bits), - * after shifting out the 01 prefix, findMSB tells us n */ + /* clamp guards corrupt streams: legal codes have n <= 14 and + * 3 + 2n + k <= 31, so legal decodes are unaffected */ uint suffix = bits << 2; + int n = clamp(31 - findMSB(suffix), 0, 14); + int valC = (((1 << n) + 1) << k) + + int((bits >> max(29 - 2 * n - k, 0)) & ((1u << (n + k)) - 1u)); if (suffix == 0u) - return APV_MAX_TRANS_COEFF + 1; + valC = APV_MAX_TRANS_COEFF + 1; + + int lenA = 1 + k; + int lenB = 2 + k; + int lenC = min(3 + 2 * n + k, 32); + + int val = caseA ? valA : (caseB ? valB : valC); + int len = caseA ? lenA : (caseB ? lenB : lenC); + + skip_bits(gb, len); + return val; +} + +/* + * As above, with the trailing sign bit folded into the same window. The caller + * says whether a sign bit is present (DC: when val != 0; AC levels: always); + * sign is only valid when it is. + */ +int apv_read_vlc_sign(int k, bool sign_always, out bool sign) +{ + /* Top 32 bits; the longest legal code is 31 bits, +1 for the sign */ + uint bits = show_bits(gb, 32); + uint mask = (1u << k) - 1u; + + bool caseA = bits >= 0x80000000u; + bool caseB = bits < 0x40000000u; - int n = 31 - findMSB(suffix); - skip_bits(gb, 3 + n); - /* (2<<k) + ((1<<n)-1) * (1<<k) is equal to ((1<<n) + 1) << k */ - return (((1 << n) + 1) << k) + int(get_bits(gb, n + k)); + int valA = int((bits >> (31 - k)) & mask); + int valB = int((bits >> (30 - k)) & mask) + (1 << k); + + uint suffix = bits << 2; + int n = clamp(31 - findMSB(suffix), 0, 14); + int valC = (((1 << n) + 1) << k) + + int((bits >> max(29 - 2 * n - k, 0)) & ((1u << (n + k)) - 1u)); + bool badC = suffix == 0u; + + int lenA = 1 + k; + int lenB = 2 + k; + int lenC = min(3 + 2 * n + k, 31); + + int val = caseA ? valA : (caseB ? valB : valC); + int len = caseA ? lenA : (caseB ? lenB : lenC); + + bool has_sign = sign_always || val != 0; + sign = has_sign && bool((bits >> (31 - len)) & 1u); + len += has_sign ? 1 : 0; + + skip_bits(gb, len); + if (!caseA && !caseB && badC) + val = APV_MAX_TRANS_COEFF + 1; + return val; } /* ff_zigzag_direct, packed: each byte is the raster index (y*8 + x). */ @@ -113,16 +163,11 @@ int prev_1st_ac_level; void decode_block(uint cbase, int cstride, ivec2 pos, uint comp) { int dc_coeff; - int abs_diff = apv_read_vlc(prev_k_dc); - - if (abs_diff != 0) { - if (get_bit(gb)) - dc_coeff = prev_dc - abs_diff; - else - dc_coeff = prev_dc + abs_diff; - } else { - dc_coeff = prev_dc; - } + bool dc_sign; + + int abs_diff = apv_read_vlc_sign(prev_k_dc, false, dc_sign); + + dc_coeff = prev_dc + (dc_sign ? -abs_diff : abs_diff); if (dc_coeff < APV_MIN_TRANS_COEFF || dc_coeff > APV_MAX_TRANS_COEFF) @@ -154,10 +199,11 @@ void decode_block(uint cbase, int cstride, ivec2 pos, uint comp) if (scan_pos < APV_BLK_COEFFS) { int abs_ac_coeff_minus1; int level; + bool sign_ac_coeff; k_param = clamp(prev_level >> 2, 0, 4); - abs_ac_coeff_minus1 = apv_read_vlc(k_param); - bool sign_ac_coeff = get_bit(gb); + abs_ac_coeff_minus1 = apv_read_vlc_sign(k_param, true, + sign_ac_coeff); if (sign_ac_coeff) level = -abs_ac_coeff_minus1 - 1; _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
