[FFmpeg-devel] [PATCH] lavc/opusdsp: R-V V deemphasis function
Rémi Denis-Courmont
remi at remlab.net
Sat Nov 11 20:18:21 EET 2023
Considering the marginality of the measured performance gains (3-4%),
I suppose that we should not merge this. Furthermore those measurements
are not expected to improve with large vector sizes, since the code
uses only 32 bits per vector no matter what.
deemphasis_c: 7703.2
deemphasis_rvv_f32: 7452.0
---
libavcodec/riscv/opusdsp_init.c | 10 +++++---
libavcodec/riscv/opusdsp_rvv.S | 43 +++++++++++++++++++++++++++++++++
2 files changed, 50 insertions(+), 3 deletions(-)
diff --git a/libavcodec/riscv/opusdsp_init.c b/libavcodec/riscv/opusdsp_init.c
index 88d8e77f0e..8d363aaf37 100644
--- a/libavcodec/riscv/opusdsp_init.c
+++ b/libavcodec/riscv/opusdsp_init.c
@@ -26,14 +26,18 @@
#include "libavcodec/opusdsp.h"
void ff_opus_postfilter_rvv(float *data, int period, float *g, int len);
+float ff_opus_deemphasis_rvv(float *y, float *x, float coeff, int len);
av_cold void ff_opus_dsp_init_riscv(OpusDSP *d)
{
#if HAVE_RVV
int flags = av_get_cpu_flags();
- if ((flags & AV_CPU_FLAG_RVV_F32) && (flags & AV_CPU_FLAG_RVB_ADDR) &&
- (flags & AV_CPU_FLAG_RVB_BASIC))
- d->postfilter = ff_opus_postfilter_rvv;
+ if (flags & AV_CPU_FLAG_RVV_F32) {
+ if ((flags & AV_CPU_FLAG_RVB_ADDR) && (flags & AV_CPU_FLAG_RVB_BASIC))
+ d->postfilter = ff_opus_postfilter_rvv;
+ if (ff_get_rv_vlenb() >= 8)
+ d->deemphasis = ff_opus_deemphasis_rvv;
+ }
#endif
}
diff --git a/libavcodec/riscv/opusdsp_rvv.S b/libavcodec/riscv/opusdsp_rvv.S
index 79ae86c30e..839edfa4b0 100644
--- a/libavcodec/riscv/opusdsp_rvv.S
+++ b/libavcodec/riscv/opusdsp_rvv.S
@@ -64,3 +64,46 @@ func ff_opus_postfilter_rvv, zve32f
ret
endfunc
+
+// FIXME: Zvl64b
+func ff_opus_deemphasis_rvv, zve32f
+ li t0, 0x3f599a00 // 0.85f
+ li t1, 8
+NOHWF fmv.w.x fa0, a2
+NOHWF mv a2, a3
+ vsetivli zero, 1, e32, mf2, ta, ma
+ vmv.s.x v8, t0
+ fmv.w.x ft0, t0
+ blt a2, t1, 2f
+1:
+ vlseg8e32.v v0, (a1)
+ addi a2, a2, -8
+ vfmacc.vf v0, fa0, v8
+ addi a1, a1, 8 * 4
+ vfmacc.vf v1, ft0, v0
+ vfmacc.vf v2, ft0, v1
+ vfmacc.vf v3, ft0, v2
+ vfmacc.vf v4, ft0, v3
+ vfmacc.vf v5, ft0, v4
+ vfmacc.vf v6, ft0, v5
+ vfmacc.vf v7, ft0, v6
+ vfmv.f.s fa0, v7
+ vsseg8e32.v v0, (a0)
+ addi a0, a0, 8 * 4
+ bge a2, t1, 1b
+2:
+ beqz a2, 4f
+3:
+ flw fa1, (a1)
+ addi a2, a2, -1
+ fmadd.s fa0, ft0, fa0, fa1
+ addi a1, a1, 4
+ fsw fa0, (a0)
+ addi a0, a0, 4
+ bnez a2, 3b
+4:
+ ret
+
+NOHWF fmv.x.w a0, fa0
+ ret
+endfunc
--
2.42.0
More information about the ffmpeg-devel
mailing list