[FFmpeg-devel] [PATCH v4 4/5] lavc/vp9dsp: R-V V mc bilin hv

uk7b at foxmail.com uk7b at foxmail.com
Sat May 18 21:15:32 EEST 2024


From: sunyuechi <sunyuechi at iscas.ac.cn>

C908:
vp9_avg_bilin_4hv_8bpp_c: 11.0
vp9_avg_bilin_4hv_8bpp_rvv_i64: 3.7
vp9_avg_bilin_8hv_8bpp_c: 38.7
vp9_avg_bilin_8hv_8bpp_rvv_i64: 7.2
vp9_avg_bilin_16hv_8bpp_c: 147.0
vp9_avg_bilin_16hv_8bpp_rvv_i64: 14.2
vp9_avg_bilin_32hv_8bpp_c: 574.5
vp9_avg_bilin_32hv_8bpp_rvv_i64: 42.7
vp9_avg_bilin_64hv_8bpp_c: 2311.5
vp9_avg_bilin_64hv_8bpp_rvv_i64: 201.7
vp9_put_bilin_4hv_8bpp_c: 10.0
vp9_put_bilin_4hv_8bpp_rvv_i64: 3.2
vp9_put_bilin_8hv_8bpp_c: 35.2
vp9_put_bilin_8hv_8bpp_rvv_i64: 6.5
vp9_put_bilin_16hv_8bpp_c: 133.7
vp9_put_bilin_16hv_8bpp_rvv_i64: 13.0
vp9_put_bilin_32hv_8bpp_c: 538.2
vp9_put_bilin_32hv_8bpp_rvv_i64: 39.7
vp9_put_bilin_64hv_8bpp_c: 2114.0
vp9_put_bilin_64hv_8bpp_rvv_i64: 153.7
---
 libavcodec/riscv/vp9_mc_rvv.S  | 34 ++++++++++++++++++++++++++++++++++
 libavcodec/riscv/vp9dsp_init.c | 10 ++++++++++
 2 files changed, 44 insertions(+)

diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
index 9d7caeb005..d2c9393fc9 100644
--- a/libavcodec/riscv/vp9_mc_rvv.S
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -104,6 +104,39 @@ func ff_\op\()_bilin_\len\()\type\()_rvv, zve32x
 endfunc
 .endm
 
+.macro bilin_hv len op
+func ff_\op\()_bilin_\len\()hv_rvv, zve32x
+.ifc \op,avg
+        csrwi           vxrm, 0
+.endif
+        vsetvlstatic8   \len t0 64
+        neg             t1, a5
+        neg             t2, a6
+        li              t4, 8
+        bilin_load      v24, \len, put, h, a5
+        add             a2, a2, a3
+1:
+        addi            a4, a4, -1
+        bilin_load      v4, \len, put, h, a5
+        vwmulu.vx       v16, v4, a6
+        vwmaccsu.vx     v16, t2, v24
+        vwadd.wx        v16, v16, t4
+        vnsra.wi        v16, v16, 4
+        vadd.vv         v0, v16, v24
+.ifc \op,avg
+        vle8.v          v16, (a0)
+        vaaddu.vv       v0, v0, v16
+.endif
+        vse8.v          v0, (a0)
+        vmv.v.v         v24, v4
+        add             a2, a2, a3
+        add             a0, a0, a1
+        bnez            a4, 1b
+
+        ret
+endfunc
+.endm
+
 const subpel_filters_regular
         .byte  0,  0,   0, 128,   0,   0,  0,  0
         .byte  0,  1,  -5, 126,   8,  -3,  1,  0
@@ -334,6 +367,7 @@ endfunc
         .irp op, put, avg
                 bilin_h_v \len \op h a5
                 bilin_h_v \len \op v a6
+                bilin_hv \len \op
                 .irp name, regular, sharp, smooth
                         .irp type, h, v
                                 epel \len \op \name \type 128
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index 931b92928a..09519f4005 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -104,6 +104,16 @@ static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp)
     dsp->mc[4][FILTER_BILINEAR ][0][1][0] = ff_put_bilin_4h_rvv;
     dsp->mc[4][FILTER_BILINEAR ][1][0][1] = ff_avg_bilin_4v_rvv;
     dsp->mc[4][FILTER_BILINEAR ][1][1][0] = ff_avg_bilin_4h_rvv;
+    dsp->mc[0][FILTER_BILINEAR ][0][1][1] = ff_put_bilin_64hv_rvv;
+    dsp->mc[0][FILTER_BILINEAR ][1][1][1] = ff_avg_bilin_64hv_rvv;
+    dsp->mc[1][FILTER_BILINEAR ][0][1][1] = ff_put_bilin_32hv_rvv;
+    dsp->mc[1][FILTER_BILINEAR ][1][1][1] = ff_avg_bilin_32hv_rvv;
+    dsp->mc[2][FILTER_BILINEAR ][0][1][1] = ff_put_bilin_16hv_rvv;
+    dsp->mc[2][FILTER_BILINEAR ][1][1][1] = ff_avg_bilin_16hv_rvv;
+    dsp->mc[3][FILTER_BILINEAR ][0][1][1] = ff_put_bilin_8hv_rvv;
+    dsp->mc[3][FILTER_BILINEAR ][1][1][1] = ff_avg_bilin_8hv_rvv;
+    dsp->mc[4][FILTER_BILINEAR ][0][1][1] = ff_put_bilin_4hv_rvv;
+    dsp->mc[4][FILTER_BILINEAR ][1][1][1] = ff_avg_bilin_4hv_rvv;
 
     if (flags & AV_CPU_FLAG_RVB_ADDR) {
         init_subpel2(0, 0, 1, v, put, 128);
-- 
2.45.1



More information about the ffmpeg-devel mailing list