[FFmpeg-devel] [PATCH 4/4] lavc/h264dsp: update R-V V intra luma loop filter
Rémi Denis-Courmont
remi at remlab.net
Mon Jul 1 20:08:07 EEST 2024
Note that the performance reported by checkasm is slightly worse.
This is expected since the assembler is now doing more work.
---
libavcodec/riscv/h264dsp_init.c | 3 ++-
libavcodec/riscv/h264dsp_rvv.S | 6 ++++--
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index ab412a9924..9650cae66b 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -30,7 +30,8 @@
void ff_h264_v_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride,
int alpha, int beta, int8_t *tc0);
void ff_h264_h_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride,
- int alpha, int beta, int8_t *tc0);
+ int alpha, int beta, const int8_t *tc0,
+ const int16_t *bS);
void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, ptrdiff_t stride,
int alpha, int beta, int8_t *tc0);
diff --git a/libavcodec/riscv/h264dsp_rvv.S b/libavcodec/riscv/h264dsp_rvv.S
index 96a8a0a8a3..6bc5406ba3 100644
--- a/libavcodec/riscv/h264dsp_rvv.S
+++ b/libavcodec/riscv/h264dsp_rvv.S
@@ -126,9 +126,11 @@ func ff_h264_v_loop_filter_luma_8_rvv, zve32x
endfunc
func ff_h264_h_loop_filter_luma_8_rvv, zve32x
- vsetivli zero, 4, e32, m1, ta, ma
- vle8.v v4, (a4)
+ vsetivli zero, 4, e8, mf4, ta, ma
+ vle16.v v8, (a5)
li t0, 0x01010101
+ vluxei16.v v4, (a4), v8
+ vsetivli zero, 4, e32, m1, ta, ma
vzext.vf4 v6, v4
addi a0, a0, -3
vmul.vx v6, v6, t0
--
2.45.2
More information about the ffmpeg-devel
mailing list