[FFmpeg-devel] [PATCH 14/18] vulkan_ffv1: externalize extended lookup check

Lynne dev at lynne.ee
Sat Apr 12 10:22:45 EEST 2025


8% speedup on nvidia on 4k.
---
 libavcodec/vulkan/ffv1_dec.comp | 3 +--
 libavcodec/vulkan_ffv1.c        | 6 ++++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp
index a6272d4832..4cc3b9987f 100644
--- a/libavcodec/vulkan/ffv1_dec.comp
+++ b/libavcodec/vulkan/ffv1_dec.comp
@@ -47,8 +47,7 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
                quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
                quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
 
-    if ((quant_table[quant_table_idx][3][127] != 0) ||
-        (quant_table[quant_table_idx][4][127] != 0)) {
+    if (extend_lookup[quant_table_idx] > 0) {
         TYPE cur2 = TYPE(0);
         if (off.x > 0) {
             const ivec2 yoff_border2 = off.x == 1 ? ivec2(-1, -1) : ivec2(-2, 0);
diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
index aaebcd53b5..72cacb1678 100644
--- a/libavcodec/vulkan_ffv1.c
+++ b/libavcodec/vulkan_ffv1.c
@@ -88,6 +88,7 @@ typedef struct FFv1VkParameters {
     uint32_t crcref;
     int rct_offset;
 
+    uint8_t extend_lookup[8];
     uint8_t bits_per_raw_sample;
     uint8_t quant_table_count;
     uint8_t version;
@@ -120,6 +121,7 @@ static void add_push_data(FFVulkanShader *shd)
     GLSLC(1,    uint32_t crcref;                                    );
     GLSLC(1,    int rct_offset;                                     );
     GLSLC(0,                                                        );
+    GLSLC(1,    uint8_t extend_lookup[8];                           );
     GLSLC(1,    uint8_t bits_per_raw_sample;                        );
     GLSLC(1,    uint8_t quant_table_count;                          );
     GLSLC(1,    uint8_t version;                                    );
@@ -456,6 +458,10 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
         .golomb = f->ac == AC_GOLOMB_RICE,
         .check_crc = !!(avctx->err_recognition & AV_EF_CRCCHECK),
     };
+    for (int i = 0; i < f->quant_table_count; i++)
+        pd.extend_lookup[i] = (f->quant_tables[i][3][127] != 0) ||
+                              (f->quant_tables[i][4][127] != 0);
+
 
     /* For some reason the C FFv1 encoder/decoder treats these differently */
     if (sw_format == AV_PIX_FMT_GBRP10 || sw_format == AV_PIX_FMT_GBRP12 ||
-- 
2.47.2


More information about the ffmpeg-devel mailing list