[FFmpeg-devel] [PATCH] lavc/rv34dsp: optimise R-V V idct_dc_add

Rémi Denis-Courmont remi at remlab.net
Wed May 22 23:28:54 EEST 2024


This removes one stray LI and reworks the vector arithmetic to avoid
changing the vector configuration. On K230, this takes the 46.5 cycle
count down from 46.5 to 43.5.
---
 libavcodec/riscv/rv34dsp_rvv.S | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/libavcodec/riscv/rv34dsp_rvv.S b/libavcodec/riscv/rv34dsp_rvv.S
index f1f6345012..e8aff7e570 100644
--- a/libavcodec/riscv/rv34dsp_rvv.S
+++ b/libavcodec/riscv/rv34dsp_rvv.S
@@ -36,16 +36,15 @@ func ff_rv34_idct_dc_add_rvv, zve32x
         vsetivli      zero, 4, e8, mf4, ta, ma
         vlse32.v      v0, (a0), a1
         li            t1, 169
+        li            t2, 128
         mul           t1, t1, a2
-        li            a2, 255
+        vsetivli      zero, 4*4, e8, m1, ta, ma
+        vwsubu.vx     v2, v0, t2
         addi          t1, t1, 512
         srai          t1, t1, 10
-        vsetivli      zero, 4*4, e16, m2, ta, ma
-        vzext.vf2     v2, v0
-        vadd.vx       v2, v2, t1
-        vmax.vx       v2, v2, zero
-        vsetvli       zero, zero, e8, m1, ta, ma
-        vnclipu.wi    v0, v2, 0
+        vwadd.wx      v2, v2, t1
+        vnclip.wi     v0, v2, 0
+        vxor.vx       v0, v0, t2
         vsetivli      zero, 4, e8, mf4, ta, ma
         vsse32.v      v0, (a0), a1
 
-- 
2.45.1



More information about the ffmpeg-devel mailing list