[FFmpeg-cvslog] vulkan_ffv1: externalize extended lookup check
Lynne
git at videolan.org
Mon Apr 14 07:16:27 EEST 2025
ffmpeg | branch: master | Lynne <dev at lynne.ee> | Sun Apr 6 07:45:44 2025 +0000| [8ceabb677c2d7796c36ec5318be8feb5847ffd1a] | committer: Lynne
vulkan_ffv1: externalize extended lookup check
8% speedup on nvidia on 4k.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8ceabb677c2d7796c36ec5318be8feb5847ffd1a
---
libavcodec/vulkan/ffv1_dec.comp | 3 +--
libavcodec/vulkan_ffv1.c | 6 ++++++
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp
index a6272d4832..4cc3b9987f 100644
--- a/libavcodec/vulkan/ffv1_dec.comp
+++ b/libavcodec/vulkan/ffv1_dec.comp
@@ -47,8 +47,7 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
- if ((quant_table[quant_table_idx][3][127] != 0) ||
- (quant_table[quant_table_idx][4][127] != 0)) {
+ if (extend_lookup[quant_table_idx] > 0) {
TYPE cur2 = TYPE(0);
if (off.x > 0) {
const ivec2 yoff_border2 = off.x == 1 ? ivec2(-1, -1) : ivec2(-2, 0);
diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
index aaebcd53b5..72cacb1678 100644
--- a/libavcodec/vulkan_ffv1.c
+++ b/libavcodec/vulkan_ffv1.c
@@ -88,6 +88,7 @@ typedef struct FFv1VkParameters {
uint32_t crcref;
int rct_offset;
+ uint8_t extend_lookup[8];
uint8_t bits_per_raw_sample;
uint8_t quant_table_count;
uint8_t version;
@@ -120,6 +121,7 @@ static void add_push_data(FFVulkanShader *shd)
GLSLC(1, uint32_t crcref; );
GLSLC(1, int rct_offset; );
GLSLC(0, );
+ GLSLC(1, uint8_t extend_lookup[8]; );
GLSLC(1, uint8_t bits_per_raw_sample; );
GLSLC(1, uint8_t quant_table_count; );
GLSLC(1, uint8_t version; );
@@ -456,6 +458,10 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
.golomb = f->ac == AC_GOLOMB_RICE,
.check_crc = !!(avctx->err_recognition & AV_EF_CRCCHECK),
};
+ for (int i = 0; i < f->quant_table_count; i++)
+ pd.extend_lookup[i] = (f->quant_tables[i][3][127] != 0) ||
+ (f->quant_tables[i][4][127] != 0);
+
/* For some reason the C FFv1 encoder/decoder treats these differently */
if (sw_format == AV_PIX_FMT_GBRP10 || sw_format == AV_PIX_FMT_GBRP12 ||
More information about the ffmpeg-cvslog
mailing list