[FFmpeg-devel] [PATCH] lavc/h264chroma: RISC-V V add motion compensation for 4xH and 2xH chroma blocks
Rémi Denis-Courmont
remi at remlab.net
Wed Jun 14 18:57:50 EEST 2023
Le perjantaina 9. kesäkuuta 2023, 10.17.27 EEST Arnie Chang a écrit :
> Optimize the put and avg filtering for 4xH and 2xH blocks
>
> Signed-off-by: Arnie Chang <arnie.chang at sifive.com>
> ---
> checkasm: using random seed 3475799765
> RVVi32:
> - h264chroma.chroma_mc [OK]
> checkasm: all 6 tests passed
> avg_h264_chroma_mc1_8_c: 1821.5
> avg_h264_chroma_mc1_8_rvv_i32: 466.5
> avg_h264_chroma_mc2_8_c: 939.2
> avg_h264_chroma_mc2_8_rvv_i32: 466.5
> avg_h264_chroma_mc4_8_c: 502.2
> avg_h264_chroma_mc4_8_rvv_i32: 466.5
> put_h264_chroma_mc1_8_c: 1436.5
> put_h264_chroma_mc1_8_rvv_i32: 382.5
> put_h264_chroma_mc2_8_c: 824.2
> put_h264_chroma_mc2_8_rvv_i32: 382.5
> put_h264_chroma_mc4_8_c: 431.2
> put_h264_chroma_mc4_8_rvv_i32: 382.5
>
> libavcodec/riscv/h264_chroma_init_riscv.c | 8 +
> libavcodec/riscv/h264_mc_chroma.S | 216 ++++++++++++++--------
> 2 files changed, 144 insertions(+), 80 deletions(-)
>
> diff --git a/libavcodec/riscv/h264_chroma_init_riscv.c
> b/libavcodec/riscv/h264_chroma_init_riscv.c index 7c905edfcd..9f95150ea3
> 100644
> --- a/libavcodec/riscv/h264_chroma_init_riscv.c
> +++ b/libavcodec/riscv/h264_chroma_init_riscv.c
> @@ -27,6 +27,10 @@
>
> void h264_put_chroma_mc8_rvv(uint8_t *p_dst, const uint8_t *p_src,
> ptrdiff_t stride, int h, int x, int y); void
> h264_avg_chroma_mc8_rvv(uint8_t *p_dst, const uint8_t *p_src, ptrdiff_t
> stride, int h, int x, int y); +void h264_put_chroma_mc4_rvv(uint8_t *p_dst,
> const uint8_t *p_src, ptrdiff_t stride, int h, int x, int y); +void
> h264_avg_chroma_mc4_rvv(uint8_t *p_dst, const uint8_t *p_src, ptrdiff_t
> stride, int h, int x, int y); +void h264_put_chroma_mc2_rvv(uint8_t *p_dst,
> const uint8_t *p_src, ptrdiff_t stride, int h, int x, int y); +void
> h264_avg_chroma_mc2_rvv(uint8_t *p_dst, const uint8_t *p_src, ptrdiff_t
> stride, int h, int x, int y);
>
> av_cold void ff_h264chroma_init_riscv(H264ChromaContext *c, int bit_depth)
> {
> @@ -36,6 +40,10 @@ av_cold void ff_h264chroma_init_riscv(H264ChromaContext
> *c, int bit_depth) if (bit_depth == 8 && (flags & AV_CPU_FLAG_RVV_I32) &&
> ff_get_rv_vlenb() >= 16) { c->put_h264_chroma_pixels_tab[0] =
> h264_put_chroma_mc8_rvv; c->avg_h264_chroma_pixels_tab[0] =
> h264_avg_chroma_mc8_rvv; + c->put_h264_chroma_pixels_tab[1] =
> h264_put_chroma_mc4_rvv; + c->avg_h264_chroma_pixels_tab[1] =
> h264_avg_chroma_mc4_rvv; + c->put_h264_chroma_pixels_tab[2] =
> h264_put_chroma_mc2_rvv; + c->avg_h264_chroma_pixels_tab[2] =
> h264_avg_chroma_mc2_rvv; }
> #endif
> }
> diff --git a/libavcodec/riscv/h264_mc_chroma.S
> b/libavcodec/riscv/h264_mc_chroma.S index 364bc3156e..c97cdbad86 100644
> --- a/libavcodec/riscv/h264_mc_chroma.S
> +++ b/libavcodec/riscv/h264_mc_chroma.S
> @@ -19,8 +19,7 @@
> */
> #include "libavutil/riscv/asm.S"
>
> -.macro h264_chroma_mc8 type
> -func h264_\type\()_chroma_mc8_rvv, zve32x
> +.macro do_chroma_mc type width unroll
It looks like \width is only ever used as AVL. You could advantageously pass
it as a run-time argument to an internal function, and spare the instruction
cache, instead of instantiating otherwise identical code thrice.
> csrw vxrm, zero
> slli t2, a5, 3
> mul t1, a5, a4
> @@ -30,94 +29,104 @@ func h264_\type\()_chroma_mc8_rvv, zve32x
> sub a7, a4, t1
> addi a6, a5, 64
> sub t0, t2, t1
> - vsetivli t3, 8, e8, m1, ta, mu
> + vsetivli t3, \width, e8, m1, ta, mu
> beqz t1, 2f
> blez a3, 8f
> li t4, 0
> li t2, 0
> li t5, 1
> addi a5, t3, 1
> + .ifc \unroll,1
> slli t3, a2, 2
> + .else
> + slli t3, a2, 1
> + .endif
Note that all those 5-line conditional shift blocks could be simplified by
folding, e.g.:
slli t3, a2, (1 + \unroll)
Though I wonder if we could leverage SH*ADD instructions in some cases instead
of SLLI?
(..)
> +.endm
> +
> +.macro h264_chroma_mc type width
> +func h264_\type\()_chroma_mc\width\()_rvv, zve32x
> + .ifc \width,8
> + do_chroma_mc \type 8 1
> + .else
> + li a7, 3
> + blt a3, a7, 11f
> + do_chroma_mc \type \width 1
> +11:
> + do_chroma_mc \type \width 0
> + .endif
--
Rémi Denis-Courmont
http://www.remlab.net/
More information about the ffmpeg-devel
mailing list