[FFmpeg-devel] [PATCH 5/5] lavc/vp8dsp: factor R-V V EPEL functions for all lengths
flow gg
hlefthleft at gmail.com
Sat May 25 21:16:22 EEST 2024
Would it be better to replace the two vsetvlstatic8 and vsetvlstatic16 with
two vsetvl? This would require the previous patch and this one to work
together, increasing the number of lines of code and making the code a bit
harder to read.
Additionally, I have a question about patch 4 'save one R-V GPR' and patch
5. Should they be submitted as a single patch? Because patch 4 looks
similar to what I initially submitted, and you suggested changing it to
save lines of code. If it is only for patch 5, shouldn't they be combined
together?
Rémi Denis-Courmont <remi at remlab.net> 于2024年5月25日周六 23:39写道:
> ---
> libavcodec/riscv/vp8dsp_rvv.S | 56 ++++++++++++++++++++---------------
> 1 file changed, 32 insertions(+), 24 deletions(-)
>
> diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
> index a4fcd158a5..002e7f3174 100644
> --- a/libavcodec/riscv/vp8dsp_rvv.S
> +++ b/libavcodec/riscv/vp8dsp_rvv.S
> @@ -32,16 +32,6 @@
> .endif
> .endm
>
> -.macro vsetvlstatic16 len
> -.if \len <= 4
> - vsetivli zero, \len, e16, mf2, ta, ma
> -.elseif \len <= 8
> - vsetivli zero, \len, e16, m1, ta, ma
> -.elseif \len <= 16
> - vsetivli zero, \len, e16, m2, ta, ma
> -.endif
> -.endm
> -
> .macro vp8_idct_dc_add
> vlse32.v v0, (a0), a2
> lh a5, 0(a1)
> @@ -181,13 +171,8 @@ const subpel_filters
> .byte 0, -1, 12, 123, -6, 0
> endconst
>
> -.macro epel len size type
> -func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
> -.ifc \type,v
> - addi t0, a6, -1
> -.else
> - addi t0, a5, -1
> -.endif
> +.macro epel_common size, type
> +func ff_put_vp8_epel_\type\()\size\().rvv, zve32x
> lla t2, subpel_filters
> sh1add t0, t0, t0
> sh1add t0, t0, t2
> @@ -198,7 +183,6 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv,
> zve32x
> lb t5, 5(t0)
> lb t0, (t0)
> .endif
> - vsetvlstatic8 \len
> 1:
> addi a4, a4, -1
> .ifc \type,v
> @@ -236,11 +220,11 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv,
> zve32x
> vwmaccsu.vx v16, t1, v22
> vwmaccsu.vx v16, t4, v28
> vwadd.wx v16, v16, t6
> - vsetvlstatic16 \len
> + vsetvl zero, zero, a6 # e16
> vwadd.vv v24, v16, v20
> vnsra.wi v24, v24, 7
> vmax.vx v24, v24, zero
> - vsetvlstatic8 \len
> + vsetvl zero, zero, a5 # e8
> vnclipu.wi v30, v24, 0
> add a2, a2, a3
> vse8.v v30, (a0)
> @@ -251,9 +235,33 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv,
> zve32x
> endfunc
> .endm
>
> +.macro epel len, size, type
> +func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
> +.ifc \type,v
> + addi t0, a6, -1
> +.else
> + addi t0, a5, -1
> +.endif
> +.if \len <= 4
> + li a5, 0306 # e8, mf4, ta, ma
> + li a6, 0317 # e16, mf2, ta, ma
> +.elseif \len <= 8
> + li a5, 0307 # e8, mf2, ta, ma
> + li a6, 0310 # e16, m1, ta, ma
> +.else # if len <= 16
> + li a5, 0300 # e8, m1, ta, ma
> + li a6, 0311 # e16, m2, ta, ma
> +.endif
> + vsetvlstatic8 \len
> + j ff_put_vp8_epel_\type\()\size\().rvv
> +endfunc
> +.endm
> +
> +.irp type,h,v
> +.irp size,4,6
> +epel_common \size, \type
> .irp len,16,8,4
> -epel \len 6 h
> -epel \len 4 h
> -epel \len 6 v
> -epel \len 4 v
> +epel \len, \size, \type
> +.endr
> +.endr
> .endr
> --
> 2.45.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>
More information about the ffmpeg-devel
mailing list