[FFmpeg-devel] [PATCH 2/2] riscv: allow passing addend to vtype_vli macro
Rémi Denis-Courmont
remi at remlab.net
Mon May 27 18:59:46 EEST 2024
A constant (-1) is added to the length value, so we can have an added
for free, and optimise the addition away if the addend is exactly 1.
---
libavcodec/riscv/lpc_rvv.S | 2 +-
libavutil/riscv/asm.S | 9 ++++++---
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/libavcodec/riscv/lpc_rvv.S b/libavcodec/riscv/lpc_rvv.S
index 8cf79963f1..fe80305d9a 100644
--- a/libavcodec/riscv/lpc_rvv.S
+++ b/libavcodec/riscv/lpc_rvv.S
@@ -87,8 +87,8 @@ func ff_lpc_apply_welch_window_rvv, zve64d
endfunc
func ff_lpc_compute_autocorr_rvv, zve64d, zbb
+ vtype_vli t1, a2, t2, e64, ta, ma, 1
addi a2, a2, 1
- vtype_vli t1, a2, t2, e64, ta, ma
li t0, 1
vsetvl zero, a2, t1
fcvt.d.l ft0, t0
diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S
index 1e6358dcb5..2cf4f7b7ab 100644
--- a/libavutil/riscv/asm.S
+++ b/libavutil/riscv/asm.S
@@ -196,18 +196,21 @@
* @param ew element width: e8, e16, e32 or e64
* @param tp tail policy: tu or ta
* @param mp mask policty: mu or ma
+ * @param addend optional addend for the vector length register
*/
- .macro vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu
+ .macro vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu, addend=0
parse_vtype \ew, \tp, \mp
/*
* The difference between the CLZ's notionally equals the VLMUL value
* for 4-bit elements. But we want the value for SEW_MAX-bit elements.
*/
slli \tmp, \rs, 1 + VSEW_MAX
+ .if \addend - 1
+ addi \tmp, \tmp, \addend - 1
+ .endif
csrr \rd, vlenb
- addi \tmp, \tmp, -1
- clz \rd, \rd
clz \tmp, \tmp
+ clz \rd, \rd
sub \rd, \rd, \tmp
max \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX
.if vsew < VSEW_MAX
--
2.45.1
More information about the ffmpeg-devel
mailing list