[FFmpeg-devel] [PATCH 3/3] lavc/flacdsp: optimise RVV vector type for lpc32
Rémi Denis-Courmont
remi at remlab.net
Wed May 15 23:16:19 EEST 2024
This is pretty much the same as for lpc16, though it only improves half
as large prediction orders. With 128-bit vectors, this gives:
C V old V new
1 69.2 181.5 95.5
2 107.7 180.7 95.2
3 145.5 180.0 103.5
4 183.0 179.2 102.7
5 220.7 178.5 128.0
6 257.7 194.0 127.5
7 294.5 193.7 126.7
8 331.0 193.0 126.5
Larger prediction orders see no significant changes at that size.
The code is pretty ugly, so clean-up suggestions are most welcome.
---
libavcodec/riscv/flacdsp_init.c | 15 ++++++++-------
libavcodec/riscv/flacdsp_rvv.S | 25 ++++++++++++++++++++-----
2 files changed, 28 insertions(+), 12 deletions(-)
diff --git a/libavcodec/riscv/flacdsp_init.c b/libavcodec/riscv/flacdsp_init.c
index 735aec0691..830ae36534 100644
--- a/libavcodec/riscv/flacdsp_init.c
+++ b/libavcodec/riscv/flacdsp_init.c
@@ -71,17 +71,18 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt,
if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
int vlenb = ff_get_rv_vlenb();
- if ((flags & AV_CPU_FLAG_RVB_BASIC) && vlenb >= 16)
+ if ((flags & AV_CPU_FLAG_RVB_BASIC) && vlenb >= 16) {
c->lpc16 = ff_flac_lpc16_rvv;
# if (__riscv_xlen >= 64)
- if (flags & AV_CPU_FLAG_RVV_I64) {
- if (vlenb > 16)
- c->lpc32 = ff_flac_lpc32_rvv_simple;
- else
- c->lpc32 = ff_flac_lpc32_rvv;
- }
+ if (flags & AV_CPU_FLAG_RVV_I64) {
+ if (vlenb > 16)
+ c->lpc32 = ff_flac_lpc32_rvv_simple;
+ else
+ c->lpc32 = ff_flac_lpc32_rvv;
+ }
# endif
+ }
c->wasted32 = ff_flac_wasted32_rvv;
diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S
index 7d83909335..b292c15c8c 100644
--- a/libavcodec/riscv/flacdsp_rvv.S
+++ b/libavcodec/riscv/flacdsp_rvv.S
@@ -20,6 +20,12 @@
#include "libavutil/riscv/asm.S"
+ .macro vnarrow rd, rs
+ xori \rd, \rs, 4
+ addi \rd, \rd, -9
+ xori \rd, \rd, 4
+ .endm
+
func ff_flac_lpc16_rvv, zve32x, zbb
csrr t0, vlenb
addi t2, a2, -1
@@ -83,22 +89,31 @@ func ff_flac_lpc32_rvv, zve64x
ret
endfunc
-func ff_flac_lpc32_rvv_simple, zve64x
- vsetivli zero, 1, e64, m1, ta, ma
+func ff_flac_lpc32_rvv_simple, zve64x, zbb
+ csrr t0, vlenb
+ addi t2, a2, -1
+ clz t0, t0
+ clz t2, t2
+ addi t0, t0, (VTYPE_E64 | VTYPE_M8 | VTYPE_TA | VTYPE_MA) + 1
+ li t1, VTYPE_E64 | VTYPE_M1 | VTYPE_TA | VTYPE_MA
+ sub t0, t0, t2 // t0 += log2(next_power_of_two(len) / vlenb) - 1
+ max t3, t0, t1
+ vnarrow t2, t3
+ vsetvl zero, a2, t3 // e64
vmv.s.x v0, zero
- vsetvli zero, a2, e32, m4, ta, ma
+ vsetvl zero, zero, t2 // e32
vle32.v v8, (a1)
sub a4, a4, a2
vle32.v v16, (a0)
sh2add a0, a2, a0
1:
vwmul.vv v24, v8, v16
- vsetvli zero, zero, e64, m8, ta, ma
+ vsetvl zero, zero, t3 // e64
vredsum.vs v24, v24, v0
lw t0, (a0)
addi a4, a4, -1
vmv.x.s t1, v24
- vsetvli zero, zero, e32, m4, ta, ma
+ vsetvl zero, zero, t2 // e32
sra t1, t1, a3
add t0, t0, t1
vslide1down.vx v16, v16, t0
--
2.43.0
More information about the ffmpeg-devel
mailing list