[FFmpeg-devel] [PATCH 4/4] lavc/vp8dsp: R-V V loop_filter

uk7b at foxmail.com uk7b at foxmail.com
Sat Jun 22 18:58:06 EEST 2024


From: sunyuechi <sunyuechi at iscas.ac.cn>

                                                     C908   X60
vp8_loop_filter8uv_v_c                             :   13.7   11.7
vp8_loop_filter8uv_v_rvv_i32                       :    7.7    6.2
vp8_loop_filter16y_h_c                             :   12.2   11.2
vp8_loop_filter16y_h_rvv_i32                       :    9.5    7.2
vp8_loop_filter16y_v_c                             :   13.2   12.0
vp8_loop_filter16y_v_rvv_i32                       :    5.5    3.7
---
 libavcodec/riscv/vp8dsp_init.c |  5 ++-
 libavcodec/riscv/vp8dsp_rvv.S  | 57 ++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/libavcodec/riscv/vp8dsp_init.c b/libavcodec/riscv/vp8dsp_init.c
index 94f78cd84b..72191f558b 100644
--- a/libavcodec/riscv/vp8dsp_init.c
+++ b/libavcodec/riscv/vp8dsp_init.c
@@ -157,7 +157,10 @@ av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c)
     c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter16_simple_rvv##vlen; \
     c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16_inner_rvv##vlen; \
     c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16_inner_rvv##vlen; \
-    c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_rvv##vlen;
+    c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_rvv##vlen; \
+    c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16_rvv##vlen; \
+    c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16_rvv##vlen; \
+    c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_rvv##vlen;
 
     int flags = av_get_cpu_flags();
 
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index ed789ec4fd..98ff389b9a 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -410,6 +410,33 @@ endfunc
         vsra.vi         v24, v24, 1              // (f1 + 1) >> 1;
         vadd.vv         v8, v18, v24
         vsub.vv         v10, v20, v24
+        .else
+        li              t5, 27
+        li              t3, 9
+        li              a7, 18
+        vwmul.vx        v2, v11, t5
+        vwmul.vx        v6, v11, t3
+        vwmul.vx        v4, v11, a7
+        vsetvlstatic16  \len, \vlen
+        li              a7, 63
+        vzext.vf2       v14, v15                 // p2
+        vzext.vf2       v24, v10                 // q2
+        vadd.vx         v2, v2, a7
+        vadd.vx         v4, v4, a7
+        vadd.vx         v6, v6, a7
+        vsra.vi         v2, v2, 7                // a0
+        vsra.vi         v12, v4, 7               // a1
+        vsra.vi         v6, v6, 7                // a2
+        vadd.vv         v14, v14, v6             // p2 + a2
+        vsub.vv         v22, v24, v6             // q2 - a2
+        vsub.vv         v10, v20, v12            // q1 - a1
+        vadd.vv         v4, v8, v2               // p0 + a0
+        vsub.vv         v6, v16, v2              // q0 - a0
+        vadd.vv         v8, v12, v18             // a1 + p1
+        vmax.vx         v4, v4, zero
+        vmax.vx         v6, v6, zero
+        vmax.vx         v14, v14, zero
+        vmax.vx         v16, v22, zero
         .endif
 
         vmax.vx         v8, v8, zero
@@ -430,6 +457,17 @@ endfunc
         vsse8.v         v6, (a6), \stride, v0.t
         vsse8.v         v7, (t4), \stride, v0.t
         .endif
+        .if !\inner
+        vnclipu.wi      v14, v14, 0
+        vnclipu.wi      v16, v16, 0
+        .ifc \type,v
+        vse8.v          v14, (t0), v0.t
+        vse8.v          v16, (t6), v0.t
+        .else
+        vsse8.v         v14, (t0), \stride, v0.t
+        vsse8.v         v16, (t6), \stride, v0.t
+        .endif
+        .endif
 .endif
 .endm
 
@@ -464,6 +502,25 @@ func ff_vp8_v_loop_filter8uv_inner_rvv\vlen, zve32x
         filter 8, \vlen, v, 1, 1, a1, a2, a3, a4, a5
         ret
 endfunc
+
+func ff_vp8_v_loop_filter16_rvv\vlen, zve32x
+        vsetvlstatic8   16, \vlen
+        filter 16, \vlen, v, 1, 0, a0, a1, a2, a3, a4
+        ret
+endfunc
+
+func ff_vp8_h_loop_filter16_rvv\vlen, zve32x
+        vsetvlstatic8   16, \vlen
+        filter 16, \vlen, h, 1, 0, a0, a1, a2, a3, a4
+        ret
+endfunc
+
+func ff_vp8_v_loop_filter8uv_rvv\vlen, zve32x
+        vsetvlstatic8   8, \vlen
+        filter 8, \vlen, v, 1, 0, a0, a2, a3, a4, a5
+        filter 8, \vlen, v, 1, 0, a1, a2, a3, a4, a5
+        ret
+endfunc
 .endr
 
 .macro bilin_load_h dst mn
-- 
2.45.2



More information about the ffmpeg-devel mailing list