[FFmpeg-devel] [PATCH 4/4] lavc/h264dsp: update R-V V intra luma loop filter

Rémi Denis-Courmont remi at remlab.net
Mon Jul 1 20:08:07 EEST 2024


Note that the performance reported by checkasm is slightly worse.
This is expected since the assembler is now doing more work.
---
 libavcodec/riscv/h264dsp_init.c | 3 ++-
 libavcodec/riscv/h264dsp_rvv.S  | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index ab412a9924..9650cae66b 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -30,7 +30,8 @@
 void ff_h264_v_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride,
                                       int alpha, int beta, int8_t *tc0);
 void ff_h264_h_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride,
-                                      int alpha, int beta, int8_t *tc0);
+                                      int alpha, int beta, const int8_t *tc0,
+                                      const int16_t *bS);
 void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, ptrdiff_t stride,
                                             int alpha, int beta, int8_t *tc0);
 
diff --git a/libavcodec/riscv/h264dsp_rvv.S b/libavcodec/riscv/h264dsp_rvv.S
index 96a8a0a8a3..6bc5406ba3 100644
--- a/libavcodec/riscv/h264dsp_rvv.S
+++ b/libavcodec/riscv/h264dsp_rvv.S
@@ -126,9 +126,11 @@ func ff_h264_v_loop_filter_luma_8_rvv, zve32x
 endfunc
 
 func ff_h264_h_loop_filter_luma_8_rvv, zve32x
-        vsetivli    zero, 4, e32, m1, ta, ma
-        vle8.v      v4, (a4)
+        vsetivli    zero, 4, e8, mf4, ta, ma
+        vle16.v     v8, (a5)
         li          t0, 0x01010101
+        vluxei16.v  v4, (a4), v8
+        vsetivli    zero, 4, e32, m1, ta, ma
         vzext.vf4   v6, v4
         addi        a0, a0, -3
         vmul.vx     v6, v6, t0
-- 
2.45.2



More information about the ffmpeg-devel mailing list