8% speedup on nvidia on 4k.
---
 libavcodec/vulkan/ffv1_dec.comp | 3 +--
 libavcodec/vulkan_ffv1.c        | 6 ++++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp
index a6272d4832..4cc3b9987f 100644
--- a/libavcodec/vulkan/ffv1_dec.comp
+++ b/libavcodec/vulkan/ffv1_dec.comp
@@ -47,8 +47,7 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t 
quant_table_idx)
                quant_table[quant_table_idx][1][(top[0] - top[1]) & 
MAX_QUANT_TABLE_MASK] +
                quant_table[quant_table_idx][2][(top[1] - top[2]) & 
MAX_QUANT_TABLE_MASK];
 
-    if ((quant_table[quant_table_idx][3][127] != 0) ||
-        (quant_table[quant_table_idx][4][127] != 0)) {
+    if (extend_lookup[quant_table_idx] > 0) {
         TYPE cur2 = TYPE(0);
         if (off.x > 0) {
             const ivec2 yoff_border2 = off.x == 1 ? ivec2(-1, -1) : ivec2(-2, 
0);
diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
index aaebcd53b5..72cacb1678 100644
--- a/libavcodec/vulkan_ffv1.c
+++ b/libavcodec/vulkan_ffv1.c
@@ -88,6 +88,7 @@ typedef struct FFv1VkParameters {
     uint32_t crcref;
     int rct_offset;
 
+    uint8_t extend_lookup[8];
     uint8_t bits_per_raw_sample;
     uint8_t quant_table_count;
     uint8_t version;
@@ -120,6 +121,7 @@ static void add_push_data(FFVulkanShader *shd)
     GLSLC(1,    uint32_t crcref;                                    );
     GLSLC(1,    int rct_offset;                                     );
     GLSLC(0,                                                        );
+    GLSLC(1,    uint8_t extend_lookup[8];                           );
     GLSLC(1,    uint8_t bits_per_raw_sample;                        );
     GLSLC(1,    uint8_t quant_table_count;                          );
     GLSLC(1,    uint8_t version;                                    );
@@ -456,6 +458,10 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
         .golomb = f->ac == AC_GOLOMB_RICE,
         .check_crc = !!(avctx->err_recognition & AV_EF_CRCCHECK),
     };
+    for (int i = 0; i < f->quant_table_count; i++)
+        pd.extend_lookup[i] = (f->quant_tables[i][3][127] != 0) ||
+                              (f->quant_tables[i][4][127] != 0);
+
 
     /* For some reason the C FFv1 encoder/decoder treats these differently */
     if (sw_format == AV_PIX_FMT_GBRP10 || sw_format == AV_PIX_FMT_GBRP12 ||
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to