[FFmpeg-devel] [PATCH v5 1/4] avcodec/loongarch: Add wrapper for __lsx_vldx
Andreas Rheinhardt
andreas.rheinhardt at outlook.com
Thu Aug 4 04:26:50 EEST 2022
Andreas Rheinhardt:
> __lsx_vldx does not accept a pointer to const (in fact,
> no function in lsxintrin.h does so), although it is not allowed
> to modify the pointed-to buffer. Therefore this commit adds a wrapper
> for it in order to constify the HEVC DSP functions in a later commit.
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at outlook.com>
> ---
> I have now found a toolchain that supports LSX and LASX:
> https://gitee.com/wenux/cross-compiler-la-on-x86
> With this it was easy to find out why constifying adds new warnings:
> None of the intrinsic functions in lsxintrin.h and lasxintrin.h
> use const at all, including the functions used to read from src
> (__lsx_vldx for LSX and __lasx_xvldx for LASX).
>
> Therefore I am adding wrappers for these functions. They are added
> in commits separate from the constifications in order to be able
> to revert them in a simple manner when they are no longer needed,
> namely when lsxintrin.h and lasxintrin.h are fixed.
> I have resisted the temptation to just #define __lsx_vldx(ptr, stride)
> __lsx_vldx((void*)(ptr), (stride)), because I didn't want to mess
> with reserved identifiers.
>
> Fixing lsxintrin.h and lasxintrin.h would of course be preferable;
> in order to draw loongson's attention to this issue I have cc'ed
> several of the devs that contributed the lsx and lasx code
> in question.
>
I just received a notification that these mails could not be delivered
(for whatever reason). I have therefore given up all hope that this
could simply be fixed by loongson in their headers and will therefore
apply this patchset tonight with the wrapper macro moved to
loongson_intrinsics.h (unless there are objections, of course).
- Andreas
> (All of the above presumes that using __lsx_vldx and __lasx_xvldx
> with a pointer to const is safe; if it isn't, then we are in a lot
> of trouble.)
>
> libavcodec/loongarch/hevc_lpf_sao_lsx.c | 56 +++---
> libavcodec/loongarch/hevc_mc_bi_lsx.c | 144 +++++++--------
> libavcodec/loongarch/hevc_mc_uni_lsx.c | 80 +++++----
> libavcodec/loongarch/hevc_mc_uniw_lsx.c | 12 +-
> libavcodec/loongarch/hevcdsp_lsx.c | 222 ++++++++++++------------
> 5 files changed, 267 insertions(+), 247 deletions(-)
>
> diff --git a/libavcodec/loongarch/hevc_lpf_sao_lsx.c b/libavcodec/loongarch/hevc_lpf_sao_lsx.c
> index fc10e8eda8..d90eaa745e 100644
> --- a/libavcodec/loongarch/hevc_lpf_sao_lsx.c
> +++ b/libavcodec/loongarch/hevc_lpf_sao_lsx.c
> @@ -23,6 +23,10 @@
> #include "libavutil/loongarch/loongson_intrinsics.h"
> #include "hevcdsp_lsx.h"
>
> +/* __lsx_vldx() from lsxintrin.h does not accept a const void*;
> + * remove the following once it does. */
> +#define LSX_VLDX(cptr, stride) __lsx_vldx((void*)cptr, stride)
> +
> void ff_hevc_loop_filter_luma_h_8_lsx(uint8_t *src, ptrdiff_t stride,
> int32_t beta, int32_t *tc,
> uint8_t *p_is_pcm, uint8_t *q_is_pcm)
> @@ -1201,17 +1205,17 @@ static void hevc_sao_edge_filter_0degree_16multiple_lsx(uint8_t *dst,
> for (; height; height -= 4) {
> src_minus1 = src - 1;
> src_minus10 = __lsx_vld(src_minus1, 0);
> - DUP2_ARG2(__lsx_vldx, src_minus1, src_stride, src_minus1,
> + DUP2_ARG2(LSX_VLDX, src_minus1, src_stride, src_minus1,
> src_stride_2x, src_minus11, src_minus12);
> - src_minus13 = __lsx_vldx(src_minus1, src_stride_3x);
> + src_minus13 = LSX_VLDX(src_minus1, src_stride_3x);
>
> for (v_cnt = 0; v_cnt < width; v_cnt += 16) {
> src_minus1 += 16;
> dst_ptr = dst + v_cnt;
> src10 = __lsx_vld(src_minus1, 0);
> - DUP2_ARG2(__lsx_vldx, src_minus1, src_stride, src_minus1,
> + DUP2_ARG2(LSX_VLDX, src_minus1, src_stride, src_minus1,
> src_stride_2x, src11, src12);
> - src13 = __lsx_vldx(src_minus1, src_stride_3x);
> + src13 = LSX_VLDX(src_minus1, src_stride_3x);
> DUP4_ARG3(__lsx_vshuf_b, src10, src_minus10, shuf1, src11,
> src_minus11, shuf1, src12, src_minus12, shuf1, src13,
> src_minus13, shuf1, src_zero0, src_zero1,
> @@ -1358,7 +1362,7 @@ static void hevc_sao_edge_filter_90degree_4width_lsx(uint8_t *dst,
> src_minus11 = src11;
>
> /* load in advance */
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x,
> src10, src11);
>
> __lsx_vstelm_w(dst0, dst, 0, 0);
> @@ -1417,7 +1421,7 @@ static void hevc_sao_edge_filter_90degree_8width_lsx(uint8_t *dst,
>
> /* load in advance */
> DUP2_ARG2(__lsx_vld, src - src_stride, 0, src, 0, src_minus10, src_minus11);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src10, src11);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src10, src11);
>
> for (height -= 2; height; height -= 2) {
> src += src_stride_2x;
> @@ -1451,7 +1455,7 @@ static void hevc_sao_edge_filter_90degree_8width_lsx(uint8_t *dst,
> src_minus11 = src11;
>
> /* load in advance */
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x,
> src10, src11);
>
> __lsx_vstelm_d(dst0, dst, 0, 0);
> @@ -1528,7 +1532,7 @@ static void hevc_sao_edge_filter_90degree_16multiple_lsx(uint8_t *dst,
> src_minus10, src_minus11);
>
> for (h_cnt = (height >> 2); h_cnt--;) {
> - DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x,
> + DUP4_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x,
> src, src_stride_3x, src, src_stride_4x,
> src10, src11, src12, src13);
> DUP4_ARG2(__lsx_vseq_b, src_minus11, src_minus10, src_minus11,
> @@ -1635,7 +1639,7 @@ static void hevc_sao_edge_filter_45degree_4width_lsx(uint8_t *dst,
> /* load in advance */
> DUP2_ARG2(__lsx_vld, src_orig - src_stride, 0, src_orig, 0,
> src_minus10, src_minus11);
> - DUP2_ARG2(__lsx_vldx, src_orig, src_stride, src_orig, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_orig, src_stride, src_orig, src_stride_2x,
> src10, src11);
>
> for (height -= 2; height; height -= 2) {
> @@ -1677,7 +1681,7 @@ static void hevc_sao_edge_filter_45degree_4width_lsx(uint8_t *dst,
> src_minus11 = src11;
>
> /* load in advance */
> - DUP2_ARG2(__lsx_vldx, src_orig, src_stride, src_orig, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_orig, src_stride, src_orig, src_stride_2x,
> src10, src11);
>
> __lsx_vstelm_w(dst0, dst, 0, 0);
> @@ -1748,7 +1752,7 @@ static void hevc_sao_edge_filter_45degree_8width_lsx(uint8_t *dst,
> /* load in advance */
> DUP2_ARG2(__lsx_vld, src_orig - src_stride, 0, src_orig, 0, src_minus10,
> src_minus11);
> - DUP2_ARG2(__lsx_vldx, src_orig, src_stride, src_orig, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_orig, src_stride, src_orig, src_stride_2x,
> src10, src11);
>
> for (height -= 2; height; height -= 2) {
> @@ -1790,7 +1794,7 @@ static void hevc_sao_edge_filter_45degree_8width_lsx(uint8_t *dst,
> src_minus11 = src11;
>
> /* load in advance */
> - DUP2_ARG2(__lsx_vldx, src_orig, src_stride, src_orig, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_orig, src_stride, src_orig, src_stride_2x,
> src10, src11)
> __lsx_vstelm_d(dst0, dst, 0, 0);
> __lsx_vstelm_d(dst0, dst + dst_stride, 0, 1);
> @@ -1833,7 +1837,7 @@ static void hevc_sao_edge_filter_45degree_8width_lsx(uint8_t *dst,
> src_minus11 = src11;
>
> /* load in advance */
> - DUP2_ARG2(__lsx_vldx, src_orig, src_stride, src_orig, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_orig, src_stride, src_orig, src_stride_2x,
> src10, src11);
>
> __lsx_vstelm_d(dst0, dst, 0, 0);
> @@ -1880,17 +1884,17 @@ static void hevc_sao_edge_filter_45degree_16multiple_lsx(uint8_t *dst,
> src_orig = src - 1;
> dst_orig = dst;
> src_minus11 = __lsx_vld(src_orig, 0);
> - DUP2_ARG2(__lsx_vldx, src_orig, src_stride, src_orig, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_orig, src_stride, src_orig, src_stride_2x,
> src_minus12, src_minus13);
> - src_minus14 = __lsx_vldx(src_orig, src_stride_3x);
> + src_minus14 = LSX_VLDX(src_orig, src_stride_3x);
>
> for (v_cnt = 0; v_cnt < width; v_cnt += 16) {
> src_minus10 = __lsx_vld(src_orig - src_stride, 0);
> src_orig += 16;
> src10 = __lsx_vld(src_orig, 0);
> - DUP2_ARG2(__lsx_vldx, src_orig, src_stride, src_orig,
> + DUP2_ARG2(LSX_VLDX, src_orig, src_stride, src_orig,
> src_stride_2x, src11, src12);
> - src13 = __lsx_vldx(src_orig, src_stride_3x);
> + src13 = LSX_VLDX(src_orig, src_stride_3x);
> src_plus13 = __lsx_vld(src + v_cnt + src_stride_4x, 1);
>
> DUP4_ARG3(__lsx_vshuf_b, src10, src_minus11, shuf1, src11,
> @@ -2016,7 +2020,7 @@ static void hevc_sao_edge_filter_135degree_4width_lsx(uint8_t *dst,
> /* load in advance */
> DUP2_ARG2(__lsx_vld, src_orig - src_stride, 0, src_orig, 0,
> src_minus10, src_minus11);
> - DUP2_ARG2(__lsx_vldx, src_orig, src_stride, src_orig, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_orig, src_stride, src_orig, src_stride_2x,
> src10, src11);
>
> for (height -= 2; height; height -= 2) {
> @@ -2058,7 +2062,7 @@ static void hevc_sao_edge_filter_135degree_4width_lsx(uint8_t *dst,
> src_minus11 = src11;
>
> /* load in advance */
> - DUP2_ARG2(__lsx_vldx, src_orig, src_stride, src_orig, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_orig, src_stride, src_orig, src_stride_2x,
> src10, src11);
>
> __lsx_vstelm_w(dst0, dst, 0, 0);
> @@ -2131,7 +2135,7 @@ static void hevc_sao_edge_filter_135degree_8width_lsx(uint8_t *dst,
> /* load in advance */
> DUP2_ARG2(__lsx_vld, src_orig - src_stride, 0, src_orig, 0,
> src_minus10, src_minus11);
> - DUP2_ARG2(__lsx_vldx, src_orig, src_stride, src_orig, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_orig, src_stride, src_orig, src_stride_2x,
> src10, src11);
>
> for (height -= 2; height; height -= 2) {
> @@ -2173,7 +2177,7 @@ static void hevc_sao_edge_filter_135degree_8width_lsx(uint8_t *dst,
> src_minus11 = src11;
>
> /* load in advance */
> - DUP2_ARG2(__lsx_vldx, src_orig, src_stride, src_orig, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_orig, src_stride, src_orig, src_stride_2x,
> src10, src11);
>
> __lsx_vstelm_d(dst0, dst, 0, 0);
> @@ -2255,18 +2259,18 @@ static void hevc_sao_edge_filter_135degree_16multiple_lsx(uint8_t *dst,
> dst_orig = dst;
>
> src_minus11 = __lsx_vld(src_orig, 0);
> - DUP2_ARG2(__lsx_vldx, src_orig, src_stride, src_orig, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_orig, src_stride, src_orig, src_stride_2x,
> src_plus10, src_plus11);
> - src_plus12 = __lsx_vldx(src_orig, src_stride_3x);
> + src_plus12 = LSX_VLDX(src_orig, src_stride_3x);
>
> for (v_cnt = 0; v_cnt < width; v_cnt += 16) {
> src_minus10 = __lsx_vld(src_orig - src_stride, 2);
> - src_plus13 = __lsx_vldx(src_orig, src_stride_4x);
> + src_plus13 = LSX_VLDX(src_orig, src_stride_4x);
> src_orig += 16;
> src10 = __lsx_vld(src_orig, 0);
> - DUP2_ARG2(__lsx_vldx, src_orig, src_stride, src_orig, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_orig, src_stride, src_orig, src_stride_2x,
> src11, src12);
> - src13 =__lsx_vldx(src_orig, src_stride_3x);
> + src13 =LSX_VLDX(src_orig, src_stride_3x);
>
> DUP4_ARG3(__lsx_vshuf_b, src10, src_minus11, shuf1, src11,
> src_plus10, shuf1, src12, src_plus11, shuf1, src13,
> diff --git a/libavcodec/loongarch/hevc_mc_bi_lsx.c b/libavcodec/loongarch/hevc_mc_bi_lsx.c
> index 9092fdccb2..7a789ed92c 100644
> --- a/libavcodec/loongarch/hevc_mc_bi_lsx.c
> +++ b/libavcodec/loongarch/hevc_mc_bi_lsx.c
> @@ -23,6 +23,10 @@
> #include "libavutil/loongarch/loongson_intrinsics.h"
> #include "hevcdsp_lsx.h"
>
> +/* __lsx_vldx() from lsxintrin.h does not accept a const void*;
> + * remove the following once it does. */
> +#define LSX_VLDX(cptr, stride) __lsx_vldx((void*)cptr, stride)
> +
> static const uint8_t ff_hevc_mask_arr[16 * 2] __attribute__((aligned(0x40))) = {
> /* 8 width cases */
> 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
> @@ -163,14 +167,14 @@ void hevc_bi_copy_6w_lsx(uint8_t *src0_ptr, int32_t src_stride,
> DUP2_ARG2(__lsx_vilvl_d, reg1, reg0, reg3, reg2, src2, src3);
> src0_ptr += src_stride_4x;
> in0 = __lsx_vld(src1_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src1_ptr, src2_stride_x, src1_ptr,
> + DUP2_ARG2(LSX_VLDX, src1_ptr, src2_stride_x, src1_ptr,
> src2_stride_2x, in1, in2);
> - in3 = __lsx_vldx(src1_ptr, src2_stride_3x);
> + in3 = LSX_VLDX(src1_ptr, src2_stride_3x);
> src1_ptr += src2_stride_2x;
> in4 = __lsx_vld(src1_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src1_ptr, src2_stride_x, src1_ptr,
> + DUP2_ARG2(LSX_VLDX, src1_ptr, src2_stride_x, src1_ptr,
> src2_stride_2x, in5, in6);
> - in7 = __lsx_vldx(src1_ptr, src2_stride_3x);
> + in7 = LSX_VLDX(src1_ptr, src2_stride_3x);
> src1_ptr += src2_stride_2x;
> DUP4_ARG2(__lsx_vsllwil_hu_bu, src0, 6, src1, 6, src2, 6, src3, 6,
> dst0, dst2, dst4, dst6);
> @@ -207,7 +211,7 @@ void hevc_bi_copy_6w_lsx(uint8_t *src0_ptr, int32_t src_stride,
> src0 = __lsx_vilvl_d(reg1, reg0);
> src0_ptr += src_stride_2x;
> in0 = __lsx_vld(src1_ptr, 0);
> - in1 = __lsx_vldx(src1_ptr, src2_stride_x);
> + in1 = LSX_VLDX(src1_ptr, src2_stride_x);
> src1_ptr += src2_stride_x;
> dst0 = __lsx_vsllwil_hu_bu(src0, 6);
> dst1 = __lsx_vilvh_b(zero, src0);
> @@ -265,14 +269,14 @@ void hevc_bi_copy_8w_lsx(uint8_t *src0_ptr, int32_t src_stride,
> DUP4_ARG2(__lsx_vslli_h, dst1, 6, dst3, 6, dst5, 6, dst7, 6, dst1,
> dst3, dst5, dst7);
> in0 = __lsx_vld(src1_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src1_ptr, src2_stride_x, src1_ptr,
> + DUP2_ARG2(LSX_VLDX, src1_ptr, src2_stride_x, src1_ptr,
> src2_stride_2x, in1, in2);
> - in3 = __lsx_vldx(src1_ptr, src2_stride_3x);
> + in3 = LSX_VLDX(src1_ptr, src2_stride_3x);
> src1_ptr += src2_stride_2x;
> in4 = __lsx_vld(src1_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src1_ptr, src2_stride_x, src1_ptr,
> + DUP2_ARG2(LSX_VLDX, src1_ptr, src2_stride_x, src1_ptr,
> src2_stride_2x, in5, in6);
> - in7 = __lsx_vldx(src1_ptr, src2_stride_3x);
> + in7 = LSX_VLDX(src1_ptr, src2_stride_3x);
> src1_ptr += src2_stride_2x;
> out0 = hevc_bi_rnd_clip(in0, dst0, in1, dst1);
> out1 = hevc_bi_rnd_clip(in2, dst2, in3, dst3);
> @@ -294,7 +298,7 @@ void hevc_bi_copy_8w_lsx(uint8_t *src0_ptr, int32_t src_stride,
> reg1 = __lsx_vldrepl_d(src0_ptr + src_stride, 0);
> src0 = __lsx_vilvl_d(reg1, reg0);
> in0 = __lsx_vld(src1_ptr, 0);
> - in1 = __lsx_vldx(src1_ptr, src2_stride_x);
> + in1 = LSX_VLDX(src1_ptr, src2_stride_x);
> dst0 = __lsx_vsllwil_hu_bu(src0, 6);
> dst1 = __lsx_vilvh_b(zero, src0);
> dst1 = __lsx_vslli_h(dst1, 6);
> @@ -330,19 +334,19 @@ void hevc_bi_copy_12w_lsx(uint8_t *src0_ptr, int32_t src_stride,
>
> for (loop_cnt = 4; loop_cnt--;) {
> src0 = __lsx_vld(src0_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src1, src2);
> - src3 = __lsx_vldx(src0_ptr, src_stride_3x);
> + src3 = LSX_VLDX(src0_ptr, src_stride_3x);
> src0_ptr += src_stride_4x;
> in0 = __lsx_vld(src1_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src1_ptr, src2_stride_x, src1_ptr,
> + DUP2_ARG2(LSX_VLDX, src1_ptr, src2_stride_x, src1_ptr,
> src2_stride_2x, in1, in2);
> - in3 = __lsx_vldx(src1_ptr, src2_stride_3x);
> + in3 = LSX_VLDX(src1_ptr, src2_stride_3x);
> src1_ptr += src2_stride_2x;
> in4 = __lsx_vld(_src1, 0);
> - DUP2_ARG2(__lsx_vldx, _src1, src2_stride_x, _src1, src2_stride_2x,
> + DUP2_ARG2(LSX_VLDX, _src1, src2_stride_x, _src1, src2_stride_2x,
> in5, in6);
> - in7 = __lsx_vldx(_src1, src2_stride_3x);
> + in7 = LSX_VLDX(_src1, src2_stride_3x);
> _src1 += src2_stride_2x;
>
> DUP2_ARG2(__lsx_vilvl_d, in5, in4, in7, in6, in4, in5);
> @@ -389,19 +393,19 @@ void hevc_bi_copy_16w_lsx(uint8_t *src0_ptr, int32_t src_stride,
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> src0 = __lsx_vld(src0_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src1, src2);
> - src3 = __lsx_vldx(src0_ptr, src_stride_3x);
> + src3 = LSX_VLDX(src0_ptr, src_stride_3x);
> src0_ptr += src_stride_4x;
> in0 = __lsx_vld(src1_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src1_ptr, src2_stride_x, src1_ptr,
> + DUP2_ARG2(LSX_VLDX, src1_ptr, src2_stride_x, src1_ptr,
> src2_stride_2x, in1, in2);
> - in3 = __lsx_vldx(src1_ptr, src2_stride_3x);
> + in3 = LSX_VLDX(src1_ptr, src2_stride_3x);
> src1_ptr += src2_stride_2x;
> in4 = __lsx_vld(_src1, 0);
> - DUP2_ARG2(__lsx_vldx, _src1, src2_stride_x, _src1, src2_stride_2x,
> + DUP2_ARG2(LSX_VLDX, _src1, src2_stride_x, _src1, src2_stride_2x,
> in5, in6);
> - in7 = __lsx_vldx(_src1, src2_stride_3x);
> + in7 = LSX_VLDX(_src1, src2_stride_3x);
> _src1 += src2_stride_2x;
> DUP4_ARG2(__lsx_vsllwil_hu_bu, src0, 6, src1, 6, src2, 6, src3, 6,
> dst0_r, dst1_r, dst2_r, dst3_r)
> @@ -647,12 +651,12 @@ void hevc_vt_8t_8w_lsx(uint8_t *src0_ptr, int32_t src_stride, int16_t *src1_ptr,
> filt0, filt1, filt2, filt3);
>
> src0 = __lsx_vld(src0_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src1, src2);
> - src3 = __lsx_vldx(src0_ptr, src_stride_3x);
> + src3 = LSX_VLDX(src0_ptr, src_stride_3x);
> src0_ptr += src_stride_4x;
> src4 = __lsx_vld(src0_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src5, src6);
> src0_ptr += src_stride_3x;
> DUP4_ARG2(__lsx_vilvl_b, src1, src0, src3, src2, src5, src4, src2, src1,
> @@ -661,14 +665,14 @@ void hevc_vt_8t_8w_lsx(uint8_t *src0_ptr, int32_t src_stride, int16_t *src1_ptr,
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> src7 = __lsx_vld(src0_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src8, src9);
> - src10 = __lsx_vldx(src0_ptr, src_stride_3x);
> + src10 = LSX_VLDX(src0_ptr, src_stride_3x);
> src0_ptr += src_stride_4x;
> in0 = __lsx_vld(src1_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src1_ptr, src2_stride_x, src1_ptr, src2_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src1_ptr, src2_stride_x, src1_ptr, src2_stride_2x,
> in1, in2);
> - in3 = __lsx_vldx(src1_ptr, src2_stride_3x);
> + in3 = LSX_VLDX(src1_ptr, src2_stride_3x);
> src1_ptr += src2_stride_2x;
> DUP4_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src9, src8, src10, src9,
> src76_r, src87_r, src98_r, src109_r);
> @@ -741,12 +745,12 @@ void hevc_vt_8t_16multx2mult_lsx(uint8_t *src0_ptr, int32_t src_stride,
> dst_tmp = dst;
>
> src0 = __lsx_vld(src0_ptr_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr_tmp, src_stride, src0_ptr_tmp,
> + DUP2_ARG2(LSX_VLDX, src0_ptr_tmp, src_stride, src0_ptr_tmp,
> src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src0_ptr_tmp, src_stride_3x);
> + src3 = LSX_VLDX(src0_ptr_tmp, src_stride_3x);
> src0_ptr_tmp += src_stride_4x;
> src4 = __lsx_vld(src0_ptr_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr_tmp, src_stride, src0_ptr_tmp,
> + DUP2_ARG2(LSX_VLDX, src0_ptr_tmp, src_stride, src0_ptr_tmp,
> src_stride_2x, src5, src6);
> src0_ptr_tmp += src_stride_3x;
>
> @@ -759,7 +763,7 @@ void hevc_vt_8t_16multx2mult_lsx(uint8_t *src0_ptr, int32_t src_stride,
>
> for (loop_cnt = (height >> 1); loop_cnt--;) {
> src7 = __lsx_vld(src0_ptr_tmp, 0);
> - src8 = __lsx_vldx(src0_ptr_tmp, src_stride);
> + src8 = LSX_VLDX(src0_ptr_tmp, src_stride);
> src0_ptr_tmp += src_stride_2x;
> DUP2_ARG2(__lsx_vld, src1_ptr_tmp, 0, src1_ptr_tmp, 16, in0, in2);
> src1_ptr_tmp += src2_stride;
> @@ -903,12 +907,12 @@ void hevc_hv_8t_8multx1mult_lsx(uint8_t *src0_ptr, int32_t src_stride,
> src1_ptr_tmp = src1_ptr;
>
> src0 = __lsx_vld(src0_ptr_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr_tmp, src_stride, src0_ptr_tmp,
> + DUP2_ARG2(LSX_VLDX, src0_ptr_tmp, src_stride, src0_ptr_tmp,
> src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src0_ptr_tmp, src_stride_3x);
> + src3 = LSX_VLDX(src0_ptr_tmp, src_stride_3x);
> src0_ptr_tmp += src_stride_4x;
> src4 = __lsx_vld(src0_ptr_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr_tmp, src_stride, src0_ptr_tmp,
> + DUP2_ARG2(LSX_VLDX, src0_ptr_tmp, src_stride, src0_ptr_tmp,
> src_stride_2x, src5, src6);
> src0_ptr_tmp += src_stride_3x;
>
> @@ -1134,9 +1138,9 @@ static void hevc_hz_4t_24w_lsx(uint8_t *src0_ptr, int32_t src_stride,
> dst += dst_stride_4x;
>
> in0 = __lsx_vld(src1_ptr_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src1_ptr_tmp, src2_stride_x, src1_ptr_tmp,
> + DUP2_ARG2(LSX_VLDX, src1_ptr_tmp, src2_stride_x, src1_ptr_tmp,
> src2_stride_2x, in1, in2);
> - in3 = __lsx_vldx(src1_ptr_tmp, src2_stride_3x);
> + in3 = LSX_VLDX(src1_ptr_tmp, src2_stride_3x);
> src1_ptr_tmp += src2_stride_2x;
>
> DUP4_ARG3(__lsx_vshuf_b, src1, src1, mask0, src3, src3, mask0, src5,
> @@ -1229,7 +1233,7 @@ static void hevc_vt_4t_12w_lsx(uint8_t *src0_ptr, int32_t src_stride,
> DUP2_ARG2(__lsx_vldrepl_h, filter, 0, filter, 2, filt0, filt1);
>
> src0 = __lsx_vld(src0_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src1, src2);
> src0_ptr += src_stride_3x;
> DUP2_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, src10_r, src21_r);
> @@ -1238,19 +1242,19 @@ static void hevc_vt_4t_12w_lsx(uint8_t *src0_ptr, int32_t src_stride,
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> src3 = __lsx_vld(src0_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src4, src5);
> - src6 = __lsx_vldx(src0_ptr, src_stride_3x);
> + src6 = LSX_VLDX(src0_ptr, src_stride_3x);
> src0_ptr += src_stride_4x;
> in0 = __lsx_vld(src1_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src1_ptr, src2_stride_x, src1_ptr,
> + DUP2_ARG2(LSX_VLDX, src1_ptr, src2_stride_x, src1_ptr,
> src2_stride_2x, in1, in2);
> - in3 = __lsx_vldx(src1_ptr, src2_stride_3x);
> + in3 = LSX_VLDX(src1_ptr, src2_stride_3x);
> src1_ptr += src2_stride_2x;
> in4 = __lsx_vld(_src1, 0);
> - DUP2_ARG2(__lsx_vldx, _src1, src2_stride_x, _src1, src2_stride_2x,
> + DUP2_ARG2(LSX_VLDX, _src1, src2_stride_x, _src1, src2_stride_2x,
> in5, in6);
> - in7 = __lsx_vldx(_src1, src2_stride_3x);
> + in7 = LSX_VLDX(_src1, src2_stride_3x);
> _src1 += src2_stride_2x;
> DUP2_ARG2(__lsx_vilvl_d, in5, in4, in7, in6, in4, in5);
>
> @@ -1310,7 +1314,7 @@ static void hevc_vt_4t_16w_lsx(uint8_t *src0_ptr, int32_t src_stride,
> DUP2_ARG2(__lsx_vldrepl_h, filter, 0, filter, 2, filt0, filt1);
>
> src0 = __lsx_vld(src0_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src1, src2);
> src0_ptr += src_stride_3x;
> DUP2_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, src10_r, src21_r);
> @@ -1318,7 +1322,7 @@ static void hevc_vt_4t_16w_lsx(uint8_t *src0_ptr, int32_t src_stride,
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> src3 = __lsx_vld(src0_ptr, 0);
> - src4 = __lsx_vldx(src0_ptr, src_stride);
> + src4 = LSX_VLDX(src0_ptr, src_stride);
> src0_ptr += src_stride_2x;
> DUP2_ARG2(__lsx_vld, src1_ptr, 0, src1_ptr, 16, in0, in2);
> src1_ptr += src2_stride;
> @@ -1340,7 +1344,7 @@ static void hevc_vt_4t_16w_lsx(uint8_t *src0_ptr, int32_t src_stride,
> dst += dst_stride_2x;
>
> src5 = __lsx_vld(src0_ptr, 0);
> - src2 = __lsx_vldx(src0_ptr, src_stride);
> + src2 = LSX_VLDX(src0_ptr, src_stride);
> src0_ptr += src_stride_2x;
> DUP2_ARG2(__lsx_vld, src1_ptr, 0, src1_ptr, 16, in0, in2);
> src1_ptr += src2_stride;
> @@ -1517,7 +1521,7 @@ static void hevc_hv_4t_6w_lsx(uint8_t *src0_ptr, int32_t src_stride,
> mask1 = __lsx_vaddi_bu(mask0, 2);
>
> src0 = __lsx_vld(src0_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src1, src2);
> src0_ptr += src_stride_3x;
>
> @@ -1535,9 +1539,9 @@ static void hevc_hv_4t_6w_lsx(uint8_t *src0_ptr, int32_t src_stride,
> DUP2_ARG2(__lsx_vilvh_h, dsth1, dsth0, dsth2, dsth1, tmp1, tmp3);
>
> src3 = __lsx_vld(src0_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src4, src5);
> - src6 = __lsx_vldx(src0_ptr, src_stride_3x);
> + src6 = LSX_VLDX(src0_ptr, src_stride_3x);
> src0_ptr += src_stride_4x;
> DUP2_ARG3(__lsx_vshuf_b, src3, src3, mask0, src3, src3, mask1, vec0, vec1);
> DUP2_ARG3(__lsx_vshuf_b, src4, src4, mask0, src4, src4, mask1, vec2, vec3);
> @@ -1550,9 +1554,9 @@ static void hevc_hv_4t_6w_lsx(uint8_t *src0_ptr, int32_t src_stride,
> vec5, filt1, dsth6, vec7, filt1, dsth3, dsth4, dsth5, dsth6);
>
> src3 = __lsx_vld(src0_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src4, src5);
> - src6 = __lsx_vldx(src0_ptr, src_stride_3x);
> + src6 = LSX_VLDX(src0_ptr, src_stride_3x);
>
> DUP2_ARG3(__lsx_vshuf_b, src3, src3, mask0, src3, src3, mask1, vec0, vec1);
> DUP2_ARG3(__lsx_vshuf_b, src4, src4, mask0, src4, src4, mask1, vec2, vec3);
> @@ -1700,7 +1704,7 @@ void hevc_hv_4t_8x2_lsx(uint8_t *src0_ptr, int32_t src_stride, int16_t *src1_ptr
> mask1 = __lsx_vaddi_bu(mask0, 2);
>
> src0 = __lsx_vld(src0_ptr, 0);
> - DUP4_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP4_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src0_ptr, src_stride_3x, src0_ptr, src_stride_4x,
> src1, src2, src3, src4);
>
> @@ -1777,19 +1781,19 @@ void hevc_hv_4t_8multx4_lsx(uint8_t *src0_ptr, int32_t src_stride,
>
> for (cnt = width8mult; cnt--;) {
> src0 = __lsx_vld(src0_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src1, src2);
> - src3 = __lsx_vldx(src0_ptr, src_stride_3x);
> + src3 = LSX_VLDX(src0_ptr, src_stride_3x);
> src0_ptr += src_stride_4x;
> src4 = __lsx_vld(src0_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src5, src6);
> src0_ptr += (8 - src_stride_4x);
>
> in0 = __lsx_vld(src1_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src1_ptr, src2_stride_x, src1_ptr,
> + DUP2_ARG2(LSX_VLDX, src1_ptr, src2_stride_x, src1_ptr,
> src2_stride_2x, in1, in2);
> - in3 = __lsx_vldx(src1_ptr, src2_stride_3x);
> + in3 = LSX_VLDX(src1_ptr, src2_stride_3x);
> src1_ptr += 8;
>
> DUP2_ARG3(__lsx_vshuf_b, src0, src0, mask0, src0, src0, mask1,
> @@ -1900,22 +1904,22 @@ void hevc_hv_4t_8x6_lsx(uint8_t *src0_ptr, int32_t src_stride, int16_t *src1_ptr
> mask1 = __lsx_vaddi_bu(mask0, 2);
>
> src0 = __lsx_vld(src0_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src1, src2);
> - src3 = __lsx_vldx(src0_ptr, src_stride_3x);
> + src3 = LSX_VLDX(src0_ptr, src_stride_3x);
> src0_ptr += src_stride_4x;
> src4 = __lsx_vld(src0_ptr, 0);
> - DUP4_ARG2(__lsx_vldx, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> + DUP4_ARG2(LSX_VLDX, src0_ptr, src_stride, src0_ptr, src_stride_2x,
> src0_ptr, src_stride_3x, src0_ptr, src_stride_4x,
> src5, src6, src7, src8);
>
> in0 = __lsx_vld(src1_ptr, 0);
> - DUP2_ARG2(__lsx_vldx, src1_ptr, src2_stride_x, src1_ptr, src2_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src1_ptr, src2_stride_x, src1_ptr, src2_stride_2x,
> in1, in2);
> - in3 = __lsx_vldx(src1_ptr, src2_stride_3x);
> + in3 = LSX_VLDX(src1_ptr, src2_stride_3x);
> src1_ptr += src2_stride_2x;
> in4 = __lsx_vld(src1_ptr, 0);
> - in5 = __lsx_vldx(src1_ptr, src2_stride_x);
> + in5 = LSX_VLDX(src1_ptr, src2_stride_x);
>
> DUP2_ARG3(__lsx_vshuf_b, src0, src0, mask0, src0, src0, mask1, vec0, vec1);
> DUP2_ARG3(__lsx_vshuf_b, src1, src1, mask0, src1, src1, mask1, vec2, vec3);
> @@ -2041,7 +2045,7 @@ void hevc_hv_4t_8multx4mult_lsx(uint8_t *src0_ptr, int32_t src_stride,
> src1_ptr_tmp = src1_ptr;
>
> src0 = __lsx_vld(src0_ptr_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr_tmp, src_stride, src0_ptr_tmp,
> + DUP2_ARG2(LSX_VLDX, src0_ptr_tmp, src_stride, src0_ptr_tmp,
> src_stride_2x, src1, src2);
> src0_ptr_tmp += src_stride_3x;
>
> @@ -2063,14 +2067,14 @@ void hevc_hv_4t_8multx4mult_lsx(uint8_t *src0_ptr, int32_t src_stride,
>
> for (loop_cnt = height >> 2; loop_cnt--;) {
> src3 = __lsx_vld(src0_ptr_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src0_ptr_tmp, src_stride, src0_ptr_tmp,
> + DUP2_ARG2(LSX_VLDX, src0_ptr_tmp, src_stride, src0_ptr_tmp,
> src_stride_2x, src4, src5);
> - src6 = __lsx_vldx(src0_ptr_tmp, src_stride_3x);
> + src6 = LSX_VLDX(src0_ptr_tmp, src_stride_3x);
> src0_ptr_tmp += src_stride_4x;
> in0 = __lsx_vld(src1_ptr_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src1_ptr_tmp, src2_stride_x, src1_ptr_tmp,
> + DUP2_ARG2(LSX_VLDX, src1_ptr_tmp, src2_stride_x, src1_ptr_tmp,
> src2_stride_2x, in1, in2);
> - in3 = __lsx_vldx(src1_ptr_tmp, src2_stride_3x);
> + in3 = LSX_VLDX(src1_ptr_tmp, src2_stride_3x);
> src1_ptr_tmp += src2_stride_2x;
>
> DUP4_ARG3(__lsx_vshuf_b, src3, src3, mask0, src3, src3, mask1, src4,
> diff --git a/libavcodec/loongarch/hevc_mc_uni_lsx.c b/libavcodec/loongarch/hevc_mc_uni_lsx.c
> index a15c86268f..8997ba4868 100644
> --- a/libavcodec/loongarch/hevc_mc_uni_lsx.c
> +++ b/libavcodec/loongarch/hevc_mc_uni_lsx.c
> @@ -23,6 +23,10 @@
> #include "libavutil/loongarch/loongson_intrinsics.h"
> #include "hevcdsp_lsx.h"
>
> +/* __lsx_vldx() from lsxintrin.h does not accept a const void*;
> + * remove the following once it does. */
> +#define LSX_VLDX(cptr, stride) __lsx_vldx((void*)cptr, stride)
> +
> static const uint8_t ff_hevc_mask_arr[16 * 3] __attribute__((aligned(0x40))) = {
> /* 8 width cases */
> 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
> @@ -148,11 +152,11 @@ void common_vt_8t_8w_lsx(uint8_t *src, int32_t src_stride,
> filt0, filt1, filt2, filt3);
>
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src4 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src5, src6);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src5, src6);
> src += src_stride_3x;
> DUP4_ARG2(__lsx_vilvl_b, src1, src0, src3, src2, src5, src4, src2, src1,
> src10_r, src32_r, src54_r, src21_r);
> @@ -160,8 +164,8 @@ void common_vt_8t_8w_lsx(uint8_t *src, int32_t src_stride,
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> src7 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src8, src9);
> - src10 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src8, src9);
> + src10 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
>
> DUP4_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src9, src8, src10,
> @@ -228,12 +232,12 @@ void common_vt_8t_16w_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
> dst_tmp = dst;
>
> src0 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src1, src2);
> - src3 = __lsx_vldx(src_tmp, src_stride_3x);
> + src3 = LSX_VLDX(src_tmp, src_stride_3x);
> src_tmp += src_stride_4x;
> src4 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src5, src6);
> src_tmp += src_stride_3x;
> DUP4_ARG2(__lsx_vilvl_b, src1, src0, src3, src2, src5, src4, src2, src1,
> @@ -245,9 +249,9 @@ void common_vt_8t_16w_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> src7 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src8, src9);
> - src10 = __lsx_vldx(src_tmp, src_stride_3x);
> + src10 = LSX_VLDX(src_tmp, src_stride_3x);
> src_tmp += src_stride_4x;
> DUP4_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src9, src8, src10,
> src9, src76_r, src87_r, src98_r, src109_r);
> @@ -380,12 +384,12 @@ void hevc_hv_8t_8x2_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
> dst_tmp = dst;
>
> src0 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src1, src2);
> - src3 = __lsx_vldx(src_tmp, src_stride_3x);
> + src3 = LSX_VLDX(src_tmp, src_stride_3x);
> src_tmp += src_stride_4x;
> src4 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src5, src6);
> src_tmp += src_stride_3x;
>
> @@ -429,7 +433,7 @@ void hevc_hv_8t_8x2_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
>
> for (loop_cnt = height >> 1; loop_cnt--;) {
> src7 = __lsx_vld(src_tmp, 0);
> - src8 = __lsx_vldx(src_tmp, src_stride);
> + src8 = LSX_VLDX(src_tmp, src_stride);
> src_tmp += src_stride_2x;
>
> DUP4_ARG3(__lsx_vshuf_b, src7, src7, mask0, src7, src7, mask1, src7,
> @@ -567,13 +571,13 @@ void common_vt_4t_24w_lsx(uint8_t *src, int32_t src_stride,
>
> /* 16 width */
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> DUP2_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, src10_r, src21_r);
> DUP2_ARG2(__lsx_vilvh_b, src1, src0, src2, src1, src10_l, src21_l);
>
> /* 8 width */
> src6 = __lsx_vld(_src, 0);
> - DUP2_ARG2(__lsx_vldx, _src, src_stride, _src, src_stride_2x, src7, src8);
> + DUP2_ARG2(LSX_VLDX, _src, src_stride, _src, src_stride_2x, src7, src8);
> src += src_stride_3x;
> _src += src_stride_3x;
> DUP2_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src76_r, src87_r);
> @@ -581,7 +585,7 @@ void common_vt_4t_24w_lsx(uint8_t *src, int32_t src_stride,
> for (loop_cnt = 8; loop_cnt--;) {
> /* 16 width */
> DUP2_ARG2(__lsx_vld, src, 0, _src, 0, src3, src9);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, _src, src_stride, src4, src10);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, _src, src_stride, src4, src10);
> DUP2_ARG2(__lsx_vilvl_b, src3, src2, src4, src3, src32_r, src43_r);
> DUP2_ARG2(__lsx_vilvh_b, src3, src2, src4, src3, src32_l, src43_l);
>
> @@ -615,7 +619,7 @@ void common_vt_4t_24w_lsx(uint8_t *src, int32_t src_stride,
>
> /* 16 width */
> DUP2_ARG2(__lsx_vld, src, 0, _src, 0, src5, src11);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, _src, src_stride, src2, src8);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, _src, src_stride, src2, src8);
> DUP2_ARG2(__lsx_vilvl_b, src5, src4, src2, src5, src10_r, src21_r);
> DUP2_ARG2(__lsx_vilvh_b, src5, src4, src2, src5, src10_l, src21_l);
>
> @@ -676,14 +680,14 @@ void common_vt_4t_32w_lsx(uint8_t *src, int32_t src_stride,
>
> /* 16 width */
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
>
> DUP2_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, src10_r, src21_r);
> DUP2_ARG2(__lsx_vilvh_b, src1, src0, src2, src1, src10_l, src21_l);
>
> /* next 16 width */
> src6 = __lsx_vld(_src, 0);
> - DUP2_ARG2(__lsx_vldx, _src, src_stride, _src, src_stride_2x, src7, src8);
> + DUP2_ARG2(LSX_VLDX, _src, src_stride, _src, src_stride_2x, src7, src8);
> src += src_stride_3x;
> _src += src_stride_3x;
>
> @@ -693,7 +697,7 @@ void common_vt_4t_32w_lsx(uint8_t *src, int32_t src_stride,
> for (loop_cnt = (height >> 1); loop_cnt--;) {
> /* 16 width */
> DUP2_ARG2(__lsx_vld, src, 0, _src, 0, src3, src9);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, _src, src_stride, src4, src10);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, _src, src_stride, src4, src10);
> DUP2_ARG2(__lsx_vilvl_b, src3, src2, src4, src3, src32_r, src43_r);
> DUP2_ARG2(__lsx_vilvh_b, src3, src2, src4, src3, src32_l, src43_l);
>
> @@ -774,7 +778,7 @@ void hevc_hv_4t_8x2_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
>
> mask1 = __lsx_vaddi_bu(mask0, 2);
> src0 = __lsx_vld(src, 0);
> - DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src,
> + DUP4_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src,
> src_stride_3x, src, src_stride_4x, src1, src2, src3, src4);
>
> DUP4_ARG3(__lsx_vshuf_b, src0, src0, mask0, src0, src0, mask1, src1, src1,
> @@ -838,11 +842,11 @@ void hevc_hv_4t_8multx4_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
>
> for (cnt = width8mult; cnt--;) {
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src4 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src5, src6);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src5, src6);
> src += (8 - src_stride_4x);
> DUP2_ARG3(__lsx_vshuf_b, src0, src0, mask0, src0, src0, mask1,
> vec0, vec1);
> @@ -939,10 +943,10 @@ void hevc_hv_4t_8x6_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
> mask1 = __lsx_vaddi_bu(mask0, 2);
>
> src0 = __lsx_vld(src, 0);
> - DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x,src,
> + DUP4_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x,src,
> src_stride_3x, src, src_stride_4x, src1, src2, src3, src4);
> src += src_stride_4x;
> - DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x,src,
> + DUP4_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x,src,
> src_stride_3x, src, src_stride_4x, src5, src6, src7, src8);
>
> DUP4_ARG3(__lsx_vshuf_b, src0, src0, mask0, src0, src0, mask1, src1, src1,
> @@ -1051,7 +1055,7 @@ void hevc_hv_4t_8multx4mult_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
> dst_tmp = dst;
>
> src0 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src1, src2);
> src_tmp += src_stride_3x;
>
> @@ -1073,9 +1077,9 @@ void hevc_hv_4t_8multx4mult_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> src3 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src4, src5);
> - src6 = __lsx_vldx(src_tmp, src_stride_3x);
> + src6 = LSX_VLDX(src_tmp, src_stride_3x);
> src_tmp += src_stride_4x;
>
> DUP4_ARG3(__lsx_vshuf_b, src3, src3, mask0, src3, src3, mask1, src4,
> @@ -1185,7 +1189,7 @@ void hevc_hv_4t_12w_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
> dst_tmp = dst;
>
> src0 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src1, src2);
> src_tmp += src_stride_3x;
>
> @@ -1204,9 +1208,9 @@ void hevc_hv_4t_12w_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
>
> for (loop_cnt = 4; loop_cnt--;) {
> src3 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src4, src5);
> - src6 = __lsx_vldx(src_tmp, src_stride_3x);
> + src6 = LSX_VLDX(src_tmp, src_stride_3x);
> src_tmp += src_stride_4x;
>
> DUP4_ARG3(__lsx_vshuf_b, src3, src3, mask0, src3, src3, mask1, src4,
> @@ -1260,7 +1264,7 @@ void hevc_hv_4t_12w_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
> mask3 = __lsx_vaddi_bu(mask2, 2);
>
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> src += src_stride_3x;
> DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask2, src1, src0, mask3, vec0, vec1);
> DUP2_ARG3(__lsx_vshuf_b, src2, src1, mask2, src2, src1, mask3, vec2, vec3);
> @@ -1275,12 +1279,12 @@ void hevc_hv_4t_12w_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
>
> for (loop_cnt = 2; loop_cnt--;) {
> src3 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src4, src5);
> - src6 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src4, src5);
> + src6 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src7 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src8, src9);
> - src10 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src8, src9);
> + src10 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> DUP4_ARG3(__lsx_vshuf_b, src7, src3, mask2, src7, src3, mask3, src8,
> src4, mask2, src8, src4, mask3, vec0, vec1, vec2, vec3);
> diff --git a/libavcodec/loongarch/hevc_mc_uniw_lsx.c b/libavcodec/loongarch/hevc_mc_uniw_lsx.c
> index 118f5b820e..30ee341e54 100644
> --- a/libavcodec/loongarch/hevc_mc_uniw_lsx.c
> +++ b/libavcodec/loongarch/hevc_mc_uniw_lsx.c
> @@ -23,6 +23,10 @@
> #include "libavutil/loongarch/loongson_intrinsics.h"
> #include "hevcdsp_lsx.h"
>
> +/* __lsx_vldx() from lsxintrin.h does not accept a const void*;
> + * remove the following once it does. */
> +#define LSX_VLDX(cptr, stride) __lsx_vldx((void*)cptr, stride)
> +
> static const uint8_t ff_hevc_mask_arr[16 * 2] __attribute__((aligned(0x40))) = {
> /* 8 width cases */
> 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
> @@ -79,12 +83,12 @@ void hevc_hv_8t_8x2_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
> dst_tmp = dst;
>
> src0 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src1, src2);
> - src3 = __lsx_vldx(src_tmp, src_stride_3x);
> + src3 = LSX_VLDX(src_tmp, src_stride_3x);
> src_tmp += src_stride_4x;
> src4 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src5, src6);
> src_tmp += src_stride_3x;
>
> @@ -127,7 +131,7 @@ void hevc_hv_8t_8x2_lsx(uint8_t *src, int32_t src_stride, uint8_t *dst,
>
> for (loop_cnt = height >> 1; loop_cnt--;) {
> src7 = __lsx_vld(src_tmp, 0);
> - src8 = __lsx_vldx(src_tmp, src_stride);
> + src8 = LSX_VLDX(src_tmp, src_stride);
> src_tmp += src_stride_2x;
> DUP4_ARG3(__lsx_vshuf_b, src7, src7, mask0, src7, src7, mask1, src7,
> src7, mask2, src7, src7, mask3, vec0, vec1, vec2, vec3);
> diff --git a/libavcodec/loongarch/hevcdsp_lsx.c b/libavcodec/loongarch/hevcdsp_lsx.c
> index a520f02bd1..c88d1087fc 100644
> --- a/libavcodec/loongarch/hevcdsp_lsx.c
> +++ b/libavcodec/loongarch/hevcdsp_lsx.c
> @@ -23,6 +23,10 @@
> #include "libavutil/loongarch/loongson_intrinsics.h"
> #include "hevcdsp_lsx.h"
>
> +/* __lsx_vldx() from lsxintrin.h does not accept a const void*;
> + * remove the following once it does. */
> +#define LSX_VLDX(cptr, stride) __lsx_vldx((void*)cptr, stride)
> +
> static const uint8_t ff_hevc_mask_arr[16 * 2] __attribute__((aligned(0x40))) = {
> /* 8 width cases */
> 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
> @@ -48,14 +52,14 @@ static void hevc_copy_4w_lsx(uint8_t *src, int32_t src_stride,
> __m128i in0, in1, in2, in3;
> for (; loop_cnt--;) {
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x,
> src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src4 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x,
> src5, src6);
> - src7 = __lsx_vldx(src, src_stride_3x);
> + src7 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
>
> DUP4_ARG2(__lsx_vilvl_w, src1, src0, src3, src2, src5, src4, src7, src6,
> @@ -98,12 +102,12 @@ static void hevc_copy_6w_lsx(uint8_t *src, int32_t src_stride,
>
> for (loop_cnt = (height >> 3); loop_cnt--;) {
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src4 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src5, src6);
> - src7 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src5, src6);
> + src7 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
>
> DUP4_ARG2(__lsx_vsllwil_hu_bu, src0, 6, src1, 6, src2, 6, src3, 6,
> @@ -163,14 +167,14 @@ static void hevc_copy_8w_lsx(uint8_t *src, int32_t src_stride,
>
> for (loop_cnt = (height >> 3); loop_cnt--;) {
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x,
> src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src4 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x,
> src5, src6);
> - src7 = __lsx_vldx(src, src_stride_3x);
> + src7 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
>
> DUP4_ARG2(__lsx_vsllwil_hu_bu, src0, 6, src1, 6, src2, 6, src3, 6,
> @@ -215,12 +219,12 @@ static void hevc_copy_12w_lsx(uint8_t *src, int32_t src_stride,
>
> for (loop_cnt = (height >> 3); loop_cnt--;) {
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src4 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src5, src6);
> - src7 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src5, src6);
> + src7 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
>
> DUP4_ARG2(__lsx_vsllwil_hu_bu, src0, 6, src1, 6, src2, 6, src3, 6,
> @@ -288,14 +292,14 @@ static void hevc_copy_16w_lsx(uint8_t *src, int32_t src_stride,
>
> for (loop_cnt = (height >> 3); loop_cnt--;) {
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x,
> src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src4 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x,
> src5, src6);
> - src7 = __lsx_vldx(src, src_stride_3x);
> + src7 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> DUP4_ARG2(__lsx_vilvh_b, zero, src0, zero, src1, zero, src2, zero, src3,
> in0_l, in1_l, in2_l, in3_l);
> @@ -333,8 +337,8 @@ static void hevc_copy_16w_lsx(uint8_t *src, int32_t src_stride,
> }
> if (res) {
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
>
> DUP4_ARG2(__lsx_vilvh_b, zero, src0, zero, src1, zero, src2, zero, src3,
> in0_l, in1_l, in2_l, in3_l);
> @@ -373,13 +377,13 @@ static void hevc_copy_24w_lsx(uint8_t *src, int32_t src_stride,
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src4 = __lsx_vld(_src, 0);
> - DUP2_ARG2(__lsx_vldx, _src, src_stride, _src, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, _src, src_stride, _src, src_stride_2x,
> src5, src6);
> - src7 = __lsx_vldx(_src, src_stride_3x);
> + src7 = LSX_VLDX(_src, src_stride_3x);
> _src += src_stride_4x;
>
> DUP4_ARG2(__lsx_vilvh_b, zero, src0, zero, src1, zero, src2, zero,
> @@ -423,13 +427,13 @@ static void hevc_copy_32w_lsx(uint8_t *src, int32_t src_stride,
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src2, src4);
> - src6 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src2, src4);
> + src6 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src1 = __lsx_vld(_src, 0);
> - DUP2_ARG2(__lsx_vldx, _src, src_stride, _src, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, _src, src_stride, _src, src_stride_2x,
> src3, src5);
> - src7 = __lsx_vldx(_src, src_stride_3x);
> + src7 = LSX_VLDX(_src, src_stride_3x);
> _src += src_stride_4x;
>
> DUP4_ARG2(__lsx_vilvh_b, zero, src0, zero, src1, zero, src2, zero,
> @@ -623,12 +627,12 @@ static void hevc_hz_8t_4w_lsx(uint8_t *src, int32_t src_stride,
>
> for (;loop_cnt--;) {
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src4 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src5, src6);
> - src7 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src5, src6);
> + src7 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> DUP4_ARG3(__lsx_vshuf_b, src1, src0, mask0, src1, src0, mask1, src1,
> src0, mask2, src1, src0, mask3, vec0, vec1, vec2, vec3);
> @@ -668,7 +672,7 @@ static void hevc_hz_8t_4w_lsx(uint8_t *src, int32_t src_stride,
> }
> for (;res--;) {
> src0 = __lsx_vld(src, 0);
> - src1 = __lsx_vldx(src, src_stride);
> + src1 = LSX_VLDX(src, src_stride);
> DUP4_ARG3(__lsx_vshuf_b, src1, src0, mask0, src1, src0, mask1, src1,
> src0, mask2, src1, src0, mask3, vec0, vec1, vec2, vec3);
> dst0 = __lsx_vdp2_h_bu_b(vec0, filt0);
> @@ -709,8 +713,8 @@ static void hevc_hz_8t_8w_lsx(uint8_t *src, int32_t src_stride,
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
>
> DUP4_ARG3(__lsx_vshuf_b, src0, src0, mask0, src0, src0, mask1, src0,
> @@ -774,12 +778,12 @@ static void hevc_hz_8t_12w_lsx(uint8_t *src, int32_t src_stride,
>
> for (loop_cnt = 4; loop_cnt--;) {
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src4 = __lsx_vld(_src, 0);
> - DUP2_ARG2(__lsx_vldx, _src, src_stride, _src, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, _src, src_stride, _src, src_stride_2x,
> src5, src6);
> - src7 = __lsx_vldx(_src, src_stride_3x);
> + src7 = LSX_VLDX(_src, src_stride_3x);
> src += src_stride_4x;
> _src += src_stride_4x;
>
> @@ -1216,11 +1220,11 @@ static void hevc_vt_8t_4w_lsx(uint8_t *src, int32_t src_stride,
> filt0, filt1, filt2, filt3);
>
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src4 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src5, src6);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src5, src6);
> src += src_stride_3x;
> DUP4_ARG2(__lsx_vilvl_b, src1, src0, src3, src2, src5, src4, src2, src1,
> src10_r, src32_r, src54_r, src21_r);
> @@ -1231,13 +1235,13 @@ static void hevc_vt_8t_4w_lsx(uint8_t *src, int32_t src_stride,
>
> for (loop_cnt = (height >> 3); loop_cnt--;) {
> src7 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src8, src9);
> - src10 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src8, src9);
> + src10 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src11 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x,
> src12, src13);
> - src14 = __lsx_vldx(src, src_stride_3x);
> + src14 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
>
> DUP4_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src9, src8, src10, src9,
> @@ -1289,7 +1293,7 @@ static void hevc_vt_8t_4w_lsx(uint8_t *src, int32_t src_stride,
> }
> for (;res--;) {
> src7 = __lsx_vld(src, 0);
> - src8 = __lsx_vldx(src, src_stride);
> + src8 = LSX_VLDX(src, src_stride);
> DUP2_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src76_r, src87_r);
> src += src_stride_2x;
> src8776 = __lsx_vilvl_d(src87_r, src76_r);
> @@ -1334,11 +1338,11 @@ static void hevc_vt_8t_8w_lsx(uint8_t *src, int32_t src_stride,
> filt0, filt1, filt2, filt3);
>
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src4 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src5, src6);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src5, src6);
> src += src_stride_3x;
> DUP4_ARG2(__lsx_vilvl_b, src1, src0, src3, src2, src5, src4, src2, src1,
> src10_r, src32_r, src54_r, src21_r);
> @@ -1346,8 +1350,8 @@ static void hevc_vt_8t_8w_lsx(uint8_t *src, int32_t src_stride,
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> src7 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src8, src9);
> - src10 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src8, src9);
> + src10 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> DUP4_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src9, src8, src10,
> src9, src76_r, src87_r, src98_r, src109_r);
> @@ -1408,11 +1412,11 @@ static void hevc_vt_8t_12w_lsx(uint8_t *src, int32_t src_stride,
> DUP4_ARG2(__lsx_vldrepl_h, filter, 0, filter, 2, filter, 4, filter, 6,
> filt0, filt1, filt2, filt3);
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src4 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src5, src6);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src5, src6);
> src += src_stride_3x;
> DUP4_ARG2(__lsx_vilvl_b, src1, src0, src3, src2, src5, src4, src2, src1,
> src10_r, src32_r, src54_r, src21_r);
> @@ -1426,8 +1430,8 @@ static void hevc_vt_8t_12w_lsx(uint8_t *src, int32_t src_stride,
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> src7 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src8, src9);
> - src10 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src8, src9);
> + src10 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> DUP4_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src9, src8, src10,
> src9, src76_r, src87_r, src98_r, src109_r);
> @@ -1520,12 +1524,12 @@ static void hevc_vt_8t_16multx4mult_lsx(uint8_t *src,
> dst_tmp = dst;
>
> src0 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src1, src2);
> - src3 = __lsx_vldx(src_tmp, src_stride_3x);
> + src3 = LSX_VLDX(src_tmp, src_stride_3x);
> src_tmp += src_stride_4x;
> src4 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src5, src6);
> src_tmp += src_stride_3x;
> DUP4_ARG2(__lsx_vilvl_b, src1, src0, src3, src2, src5, src4, src2, src1,
> @@ -1537,9 +1541,9 @@ static void hevc_vt_8t_16multx4mult_lsx(uint8_t *src,
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> src7 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src8, src9);
> - src10 = __lsx_vldx(src_tmp, src_stride_3x);
> + src10 = LSX_VLDX(src_tmp, src_stride_3x);
> src_tmp += src_stride_4x;
> DUP4_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src9, src8,
> src10, src9, src76_r, src87_r, src98_r, src109_r);
> @@ -1689,11 +1693,11 @@ static void hevc_hv_8t_4w_lsx(uint8_t *src, int32_t src_stride,
> mask3 = __lsx_vaddi_bu(mask0, 6);
>
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src4 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src5, src6);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src5, src6);
> src += src_stride_3x;
>
> DUP4_ARG3(__lsx_vshuf_b, src3, src0, mask0, src3, src0, mask1, src3, src0,
> @@ -1729,8 +1733,8 @@ static void hevc_hv_8t_4w_lsx(uint8_t *src, int32_t src_stride,
>
> for (loop_cnt = height >> 2; loop_cnt--;) {
> src7 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src8, src9);
> - src10 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src8, src9);
> + src10 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
>
> DUP4_ARG3(__lsx_vshuf_b, src9, src7, mask0, src9, src7, mask1, src9, src7,
> @@ -1830,12 +1834,12 @@ static void hevc_hv_8t_8multx1mult_lsx(uint8_t *src,
> src_tmp = src;
> dst_tmp = dst;
> src0 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src1, src2);
> - src3 = __lsx_vldx(src_tmp, src_stride_3x);
> + src3 = LSX_VLDX(src_tmp, src_stride_3x);
> src_tmp += src_stride_4x;
> src4 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src5, src6);
> src_tmp += src_stride_3x;
>
> @@ -1978,12 +1982,12 @@ static void hevc_hv_8t_12w_lsx(uint8_t *src, int32_t src_stride,
> dst_tmp = dst;
>
> src0 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src1, src2);
> - src3 = __lsx_vldx(src_tmp, src_stride_3x);
> + src3 = LSX_VLDX(src_tmp, src_stride_3x);
> src_tmp += src_stride_4x;
> src4 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src5, src6);
> src_tmp += src_stride_3x;
>
> @@ -2077,11 +2081,11 @@ static void hevc_hv_8t_12w_lsx(uint8_t *src, int32_t src_stride,
> mask7 = __lsx_vaddi_bu(mask4, 6);
>
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src4 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src5, src6);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src5, src6);
> src += src_stride_3x;
>
> DUP4_ARG3(__lsx_vshuf_b, src3, src0, mask4, src3, src0, mask5, src3, src0,
> @@ -2118,8 +2122,8 @@ static void hevc_hv_8t_12w_lsx(uint8_t *src, int32_t src_stride,
>
> for (loop_cnt = height >> 2; loop_cnt--;) {
> src7 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src8, src9);
> - src10 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src8, src9);
> + src10 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
>
> DUP4_ARG3(__lsx_vshuf_b, src9, src7, mask4, src9, src7, mask5, src9,
> @@ -2285,14 +2289,14 @@ static void hevc_vt_4t_16w_lsx(uint8_t *src,
> DUP2_ARG2(__lsx_vldrepl_h, filter, 0, filter, 2, filt0, filt1);
>
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> src += src_stride_3x;
> DUP2_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, src10_r, src21_r);
> DUP2_ARG2(__lsx_vilvh_b, src1, src0, src2, src1, src10_l, src21_l);
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> src3 = __lsx_vld(src, 0);
> - src4 = __lsx_vldx(src, src_stride);
> + src4 = LSX_VLDX(src, src_stride);
> src += src_stride_2x;
> DUP2_ARG2(__lsx_vilvl_b, src3, src2, src4, src3, src32_r, src43_r);
> DUP2_ARG2(__lsx_vilvh_b, src3, src2, src4, src3, src32_l, src43_l);
> @@ -2309,7 +2313,7 @@ static void hevc_vt_4t_16w_lsx(uint8_t *src,
> dst += dst_stride;
>
> src5 = __lsx_vld(src, 0);
> - src2 = __lsx_vldx(src, src_stride);
> + src2 = LSX_VLDX(src, src_stride);
> src += src_stride_2x;
> DUP2_ARG2(__lsx_vilvl_b, src5, src4, src2, src5, src10_r, src21_r);
> DUP2_ARG2(__lsx_vilvh_b, src5, src4, src2, src5, src10_l, src21_l);
> @@ -2353,19 +2357,19 @@ static void hevc_vt_4t_24w_lsx(uint8_t *src,
> DUP2_ARG2(__lsx_vldrepl_h, filter, 0, filter, 2, filt0, filt1);
>
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> DUP2_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, src10_r, src21_r);
> DUP2_ARG2(__lsx_vilvh_b, src1, src0, src2, src1, src10_l, src21_l);
>
> src6 = __lsx_vld(_src, 0);
> - DUP2_ARG2(__lsx_vldx, _src, src_stride, _src, src_stride_2x, src7, src8);
> + DUP2_ARG2(LSX_VLDX, _src, src_stride, _src, src_stride_2x, src7, src8);
> src += src_stride_3x;
> _src += src_stride_3x;
> DUP2_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src76_r, src87_r);
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> DUP2_ARG2(__lsx_vld, src, 0, _src, 0, src3, src9);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, _src, src_stride, src4, src10);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, _src, src_stride, src4, src10);
> src += src_stride_2x;
> _src += src_stride_2x;
> DUP2_ARG2(__lsx_vilvl_b, src3, src2, src4, src3, src32_r, src43_r);
> @@ -2392,7 +2396,7 @@ static void hevc_vt_4t_24w_lsx(uint8_t *src,
> dst += dst_stride;
>
> DUP2_ARG2(__lsx_vld, src, 0, _src, 0, src5, src11);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, _src, src_stride, src2, src8);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, _src, src_stride, src2, src8);
> src += src_stride_2x;
> _src += src_stride_2x;
> DUP2_ARG2(__lsx_vilvl_b, src5, src4, src2, src5, src10_r, src21_r);
> @@ -2448,12 +2452,12 @@ static void hevc_vt_4t_32w_lsx(uint8_t *src,
> DUP2_ARG2(__lsx_vldrepl_h, filter, 0, filter, 2, filt0, filt1);
>
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> DUP2_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, src10_r, src21_r);
> DUP2_ARG2(__lsx_vilvh_b, src1, src0, src2, src1, src10_l, src21_l);
>
> src6 = __lsx_vld(_src, 0);
> - DUP2_ARG2(__lsx_vldx, _src, src_stride, _src, src_stride_2x, src7, src8);
> + DUP2_ARG2(LSX_VLDX, _src, src_stride, _src, src_stride_2x, src7, src8);
> src += src_stride_3x;
> _src += src_stride_3x;
> DUP2_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src76_r, src87_r);
> @@ -2461,7 +2465,7 @@ static void hevc_vt_4t_32w_lsx(uint8_t *src,
>
> for (loop_cnt = (height >> 2); loop_cnt--;) {
> DUP2_ARG2(__lsx_vld, src, 0, _src, 0, src3, src9);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, _src, src_stride, src4, src10);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, _src, src_stride, src4, src10);
> src += src_stride_2x;
> _src += src_stride_2x;
> DUP2_ARG2(__lsx_vilvl_b, src3, src2, src4, src3, src32_r, src43_r);
> @@ -2493,7 +2497,7 @@ static void hevc_vt_4t_32w_lsx(uint8_t *src,
> dst += dst_stride;
>
> DUP2_ARG2(__lsx_vld, src, 0, _src, 0, src5, src11);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, _src, src_stride, src2, src8);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, _src, src_stride, src2, src8);
> src += src_stride_2x;
> _src += src_stride_2x;
> DUP2_ARG2(__lsx_vilvl_b, src5, src4, src2, src5, src10_r, src21_r);
> @@ -2560,9 +2564,9 @@ static void hevc_hv_4t_8x2_lsx(uint8_t *src,
> mask1 = __lsx_vaddi_bu(mask0, 2);
>
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> - src3 = __lsx_vldx(src, src_stride_3x);
> - src4 = __lsx_vldx(src, src_stride_4x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> + src3 = LSX_VLDX(src, src_stride_3x);
> + src4 = LSX_VLDX(src, src_stride_4x);
>
> DUP2_ARG3(__lsx_vshuf_b, src0, src0, mask0, src0, src0, mask1, vec0, vec1);
> DUP2_ARG3(__lsx_vshuf_b, src1, src1, mask0, src1, src1, mask1, vec2, vec3);
> @@ -2627,10 +2631,10 @@ static void hevc_hv_4t_8multx4_lsx(uint8_t *src, int32_t src_stride,
>
> for (cnt = width8mult; cnt--;) {
> src0 = __lsx_vld(src, 0);
> - DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src,
> + DUP4_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src,
> src_stride_3x, src, src_stride_4x, src1, src2, src3, src4);
> src += src_stride_4x;
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src5, src6);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src5, src6);
> src += (8 - src_stride_4x);
>
> DUP2_ARG3(__lsx_vshuf_b, src0, src0, mask0, src0, src0, mask1,
> @@ -2730,10 +2734,10 @@ static void hevc_hv_4t_8x6_lsx(uint8_t *src,
> mask1 = __lsx_vaddi_bu(mask0, 2);
>
> src0 = __lsx_vld(src, 0);
> - DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src,
> + DUP4_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src,
> src_stride_3x, src, src_stride_4x, src1, src2, src3, src4);
> src += src_stride_4x;
> - DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src,
> + DUP4_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src,
> src_stride_3x, src, src_stride_4x, src5, src6, src7, src8);
>
> DUP4_ARG3(__lsx_vshuf_b, src0, src0, mask0, src0, src0, mask1, src1, src1,
> @@ -2847,7 +2851,7 @@ static void hevc_hv_4t_8multx4mult_lsx(uint8_t *src,
> dst_tmp = dst;
>
> src0 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src1, src2);
> src_tmp += src_stride_3x;
>
> @@ -2869,9 +2873,9 @@ static void hevc_hv_4t_8multx4mult_lsx(uint8_t *src,
>
> for (loop_cnt = height >> 2; loop_cnt--;) {
> src3 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src4, src5);
> - src6 = __lsx_vldx(src_tmp, src_stride_3x);
> + src6 = LSX_VLDX(src_tmp, src_stride_3x);
> src_tmp += src_stride_4x;
>
> DUP2_ARG3(__lsx_vshuf_b, src3, src3, mask0, src3, src3, mask1,
> @@ -2997,7 +3001,7 @@ static void hevc_hv_4t_12w_lsx(uint8_t *src,
> dst_tmp = dst;
>
> src0 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src1, src2);
> src_tmp += src_stride_3x;
>
> @@ -3016,9 +3020,9 @@ static void hevc_hv_4t_12w_lsx(uint8_t *src,
>
> for (loop_cnt = 4; loop_cnt--;) {
> src3 = __lsx_vld(src_tmp, 0);
> - DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride_2x,
> + DUP2_ARG2(LSX_VLDX, src_tmp, src_stride, src_tmp, src_stride_2x,
> src4, src5);
> - src6 = __lsx_vldx(src_tmp, src_stride_3x);
> + src6 = LSX_VLDX(src_tmp, src_stride_3x);
> src_tmp += src_stride_4x;
>
> DUP2_ARG3(__lsx_vshuf_b, src3, src3, mask0, src3, src3, mask1,
> @@ -3077,7 +3081,7 @@ static void hevc_hv_4t_12w_lsx(uint8_t *src,
> mask3 = __lsx_vaddi_bu(mask2, 2);
>
> src0 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src1, src2);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src1, src2);
> src += src_stride_3x;
> DUP2_ARG3(__lsx_vshuf_b, src1, src0, mask2, src1, src0, mask3, vec0, vec1);
> DUP2_ARG3(__lsx_vshuf_b, src2, src1, mask2, src2, src1, mask3, vec2, vec3);
> @@ -3090,12 +3094,12 @@ static void hevc_hv_4t_12w_lsx(uint8_t *src,
>
> for (loop_cnt = 2; loop_cnt--;) {
> src3 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src4, src5);
> - src6 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src4, src5);
> + src6 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> src7 = __lsx_vld(src, 0);
> - DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride_2x, src8, src9);
> - src10 = __lsx_vldx(src, src_stride_3x);
> + DUP2_ARG2(LSX_VLDX, src, src_stride, src, src_stride_2x, src8, src9);
> + src10 = LSX_VLDX(src, src_stride_3x);
> src += src_stride_4x;
> DUP2_ARG3(__lsx_vshuf_b, src7, src3, mask2, src7, src3, mask3,
> vec0, vec1);
More information about the ffmpeg-devel
mailing list