[FFmpeg-devel] [PATCH 08/10] lavc/vp9dsp: R-V V mc tap v
uk7b at foxmail.com
uk7b at foxmail.com
Sat May 4 18:03:11 EEST 2024
From: sunyuechi <sunyuechi at iscas.ac.cn>
C908:
vp9_avg_8tap_smooth_4v_8bpp_c: 13.7
vp9_avg_8tap_smooth_4v_8bpp_rvv_i64: 5.0
vp9_avg_8tap_smooth_8v_8bpp_c: 49.7
vp9_avg_8tap_smooth_8v_8bpp_rvv_i64: 9.2
vp9_avg_8tap_smooth_16v_8bpp_c: 191.5
vp9_avg_8tap_smooth_16v_8bpp_rvv_i64: 21.2
vp9_avg_8tap_smooth_32v_8bpp_c: 770.5
vp9_avg_8tap_smooth_32v_8bpp_rvv_i64: 66.0
vp9_avg_8tap_smooth_64v_8bpp_c: 3068.0
vp9_avg_8tap_smooth_64v_8bpp_rvv_i64: 262.5
vp9_put_8tap_smooth_4v_8bpp_c: 12.0
vp9_put_8tap_smooth_4v_8bpp_rvv_i64: 4.5
vp9_put_8tap_smooth_8v_8bpp_c: 43.7
vp9_put_8tap_smooth_8v_8bpp_rvv_i64: 8.5
vp9_put_8tap_smooth_16v_8bpp_c: 168.7
vp9_put_8tap_smooth_16v_8bpp_rvv_i64: 20.0
vp9_put_8tap_smooth_32v_8bpp_c: 681.5
vp9_put_8tap_smooth_32v_8bpp_rvv_i64: 63.7
vp9_put_8tap_smooth_64v_8bpp_c: 2692.7
vp9_put_8tap_smooth_64v_8bpp_rvv_i64: 253.5
---
libavcodec/riscv/vp9_mc_rvv.S | 32 +++++++++++++++++++++++++++++++-
libavcodec/riscv/vp9dsp_init.c | 3 ++-
2 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
index 58b00889ce..151d7702ec 100644
--- a/libavcodec/riscv/vp9_mc_rvv.S
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -222,7 +222,11 @@ endconst
.macro epel_filter name type regtype
lla \regtype\()2, subpel_filters_\name
li \regtype\()1, 8
+.ifc \type,v
+ mul \regtype\()0, a6, \regtype\()1
+.elseif \type == h
mul \regtype\()0, a5, \regtype\()1
+.endif
add \regtype\()0, \regtype\()0, \regtype\()2
.irp n 1,2,3,4,5,6
lb \regtype\n, \n(\regtype\()0)
@@ -239,6 +243,19 @@ endconst
li a5, 64
.ifc \from_mem, 1
vle8.v v22, (a2)
+.ifc \type,v
+ sub a2, a2, a3
+ vle8.v v20, (a2)
+ add a2, a2, a3
+ add a2, a2, a3
+ vle8.v v24, (a2)
+ add a2, a2, a3
+ vle8.v v26, (a2)
+ add a2, a2, a3
+ vle8.v v28, (a2)
+ add a2, a2, a3
+ vle8.v v30, (a2)
+.elseif \type == h
addi a2, a2, -1
vle8.v v20, (a2)
addi a2, a2, 2
@@ -249,6 +266,7 @@ endconst
vle8.v v28, (a2)
addi a2, a2, 1
vle8.v v30, (a2)
+.endif
.ifc \name,smooth
vwmulu.vx v16, v24, \regtype\()4
@@ -267,11 +285,23 @@ endconst
vwmaccsu.vx v16, s7, v30
.endif
+.ifc \type,v
+ .rept 6
+ sub a2, a2, a3
+ .endr
+ vle8.v v28, (a2)
+ sub a2, a2, a3
+ vle8.v v26, (a2)
+ .rept 3
+ add a2, a2, a3
+ .endr
+.elseif \type == h
addi a2, a2, -6
vle8.v v28, (a2)
addi a2, a2, -1
vle8.v v26, (a2)
addi a2, a2, 3
+.endif
.ifc \name,smooth
vwmaccsu.vx v16, \regtype\()1, v28
@@ -411,7 +441,7 @@ endfunc
.irp name regular sharp smooth
.irp do put avg
- .irp type h
+ .irp type h v
gen_epel \len \do \name \type
.endr
.endr
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index 97f02e601d..ff7d445f6a 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -125,7 +125,8 @@ static av_cold void vp9dsp_mc_init_rvv(VP9DSPContext *dsp, int bpp)
init_subpel1(4, idx, idxh, idxv, 4, dir, type)
#define init_subpel3(idx, type) \
- init_subpel2(idx, 1, 0, h, type)
+ init_subpel2(idx, 1, 0, h, type); \
+ init_subpel2(idx, 0, 1, v, type)
init_subpel3(0, put);
init_subpel3(1, avg);
--
2.45.0
More information about the ffmpeg-devel
mailing list