[FFmpeg-devel] [PATCH] avutil/mips: refactor msa SLDI_Bn_0 and SLDI_Bn macros.

Shiyou Yin yinshiyou-hf at loongson.cn
Tue Aug 6 12:09:28 EEST 2019


>-----Original Message-----
>From: ffmpeg-devel-bounces at ffmpeg.org [mailto:ffmpeg-devel-bounces at ffmpeg.org] On Behalf Of gxw
>Sent: Tuesday, August 6, 2019 11:38 AM
>To: ffmpeg-devel at ffmpeg.org
>Subject: [FFmpeg-devel] [PATCH] avutil/mips: refactor msa SLDI_Bn_0 and SLDI_Bn macros.
>
>Changing details as following:
>1. Modified the parameters order of SLDI_Bn. The previous order of
>   parameters is difficult to understand.
>2. Remove the redundant macro SLDI_Bn_0 and use SLDI_Bn instead.

It would be better to add some explanation for the new macro parameter or the rules it followed in the commit message.


……
>diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h
>index 9ac0583..a5f8bba 100644
>--- a/libavutil/mips/generic_macros_msa.h
>+++ b/libavutil/mips/generic_macros_msa.h
>@@ -602,67 +602,48 @@
> }
> #define AVER_UB4_UB(...) AVER_UB4(v16u8, __VA_ARGS__)
>
>-/* Description : Immediate number of columns to slide with zero
>-   Arguments   : Inputs  - in0, in1, slide_val
>-                 Outputs - out0, out1
>+/* Description : Immediate number of columns to slide
>+   Arguments   : Inputs  - s, d, slide_val
>+                 Outputs - out
>                  Return Type - as per RTYPE
>-   Details     : Byte elements from 'zero_m' vector are slide into 'in0' by
>+   Details     : Byte elements from 'd' vector are slide into 's' by
>                  number of elements specified by 'slide_val'
> */
>-#define SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val)                 \
>-{                                                                         \
>-    v16i8 zero_m = { 0 };                                                 \
>-    out0 = (RTYPE) __msa_sldi_b((v16i8) zero_m, (v16i8) in0, slide_val);  \
>-    out1 = (RTYPE) __msa_sldi_b((v16i8) zero_m, (v16i8) in1, slide_val);  \
>-}
>-#define SLDI_B2_0_UB(...) SLDI_B2_0(v16u8, __VA_ARGS__)
>-#define SLDI_B2_0_SB(...) SLDI_B2_0(v16i8, __VA_ARGS__)
>-#define SLDI_B2_0_SW(...) SLDI_B2_0(v4i32, __VA_ARGS__)
>-
>-#define SLDI_B3_0(RTYPE, in0, in1, in2, out0, out1, out2,  slide_val)     \
>-{                                                                         \
>-    v16i8 zero_m = { 0 };                                                 \
>-    SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val);                    \
>-    out2 = (RTYPE) __msa_sldi_b((v16i8) zero_m, (v16i8) in2, slide_val);  \
>-}
>-#define SLDI_B3_0_UB(...) SLDI_B3_0(v16u8, __VA_ARGS__)
>-#define SLDI_B3_0_SB(...) SLDI_B3_0(v16i8, __VA_ARGS__)
>-
>-#define SLDI_B4_0(RTYPE, in0, in1, in2, in3,            \
>-                  out0, out1, out2, out3, slide_val)    \
>-{                                                       \
>-    SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val);  \
>-    SLDI_B2_0(RTYPE, in2, in3, out2, out3, slide_val);  \
>+#define SLDI_B1(RTYPE, d, s, slide_val, out)                      \
>+{                                                                 \
>+    out = (RTYPE) __msa_sldi_b((v16i8) d, (v16i8) s, slide_val);  \
> }
>-#define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__)
>-#define SLDI_B4_0_SB(...) SLDI_B4_0(v16i8, __VA_ARGS__)
>-#define SLDI_B4_0_SH(...) SLDI_B4_0(v8i16, __VA_ARGS__)
>
>-/* Description : Immediate number of columns to slide
>-   Arguments   : Inputs  - in0_0, in0_1, in1_0, in1_1, slide_val
>-                 Outputs - out0, out1
>-                 Return Type - as per RTYPE
>-   Details     : Byte elements from 'in0_0' vector are slide into 'in1_0' by
>-                 number of elements specified by 'slide_val'
>-*/
>-#define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val)  \
>-{                                                                          \
>-    out0 = (RTYPE) __msa_sldi_b((v16i8) in0_0, (v16i8) in1_0, slide_val);  \
>-    out1 = (RTYPE) __msa_sldi_b((v16i8) in0_1, (v16i8) in1_1, slide_val);  \
>+#define SLDI_B2(RTYPE, d0, s0, d1, s1, slide_val, out0, out1)  \
>+{                                                              \
>+    SLDI_B1(RTYPE, d0, s0, slide_val, out0)                    \
>+    SLDI_B1(RTYPE, d1, s1, slide_val, out1)                    \
> }
> #define SLDI_B2_UB(...) SLDI_B2(v16u8, __VA_ARGS__)
> #define SLDI_B2_SB(...) SLDI_B2(v16i8, __VA_ARGS__)
> #define SLDI_B2_SH(...) SLDI_B2(v8i16, __VA_ARGS__)
>+#define SLDI_B2_SW(...) SLDI_B2(v4i32, __VA_ARGS__)
>
>-#define SLDI_B3(RTYPE, in0_0, in0_1, in0_2, in1_0, in1_1, in1_2,           \
>-                out0, out1, out2, slide_val)                               \
>-{                                                                          \
>-    SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val)      \
>-    out2 = (RTYPE) __msa_sldi_b((v16i8) in0_2, (v16i8) in1_2, slide_val);  \
>+#define SLDI_B3(RTYPE, d0, s0, d1, s1, d2, s2, slide_val,  \
>+                out0, out1, out2)                          \
>+{                                                          \
>+    SLDI_B2(RTYPE, d0, s0, d1, s1, slide_val, out0, out1)  \
>+    SLDI_B1(RTYPE, d2, s2, slide_val, out2)                \
> }
>+#define SLDI_B3_UB(...) SLDI_B3(v16u8, __VA_ARGS__)
> #define SLDI_B3_SB(...) SLDI_B3(v16i8, __VA_ARGS__)
> #define SLDI_B3_UH(...) SLDI_B3(v8u16, __VA_ARGS__)
>
>+#define SLDI_B4(RTYPE, d0, s0, d1, s1, d2, s2, d3, s3,     \
>+                slide_val, out0, out1, out2, out3)         \
>+{                                                          \
>+    SLDI_B2(RTYPE, d0, s0, d1, s1, slide_val, out0, out1)  \
>+    SLDI_B2(RTYPE, d2, s2, d3, s3, slide_val, out2, out3)  \
>+}
>+#define SLDI_B4_UB(...) SLDI_B4(v16u8, __VA_ARGS__)
>+#define SLDI_B4_SB(...) SLDI_B4(v16i8, __VA_ARGS__)
>+#define SLDI_B4_SH(...) SLDI_B4(v8i16, __VA_ARGS__)
>+
> /* Description : Shuffle byte vector elements as per mask vector
>    Arguments   : Inputs  - in0, in1, in2, in3, mask0, mask1
>                  Outputs - out0, out1
>@@ -2433,6 +2414,7 @@
> {                                                                        \
>     v16i8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                \
>     v16i8 tmp4_m, tmp5_m, tmp6_m, tmp7_m;                                \
>+    v16i8 zeros = { 0 };                                                 \
>                                                                          \
>     ILVR_B4_SB(in2, in0, in3, in1, in6, in4, in7, in5,                   \
>                tmp0_m, tmp1_m, tmp2_m, tmp3_m);                          \
>@@ -2440,8 +2422,8 @@
>     ILVRL_B2_SB(tmp3_m, tmp2_m, tmp6_m, tmp7_m);                         \
>     ILVRL_W2(RTYPE, tmp6_m, tmp4_m, out0, out2);                         \
>     ILVRL_W2(RTYPE, tmp7_m, tmp5_m, out4, out6);                         \
>-    SLDI_B2_0(RTYPE, out0, out2, out1, out3, 8);                         \
>-    SLDI_B2_0(RTYPE, out4, out6, out5, out7, 8);                         \
>+    SLDI_B4(RTYPE, zeros, out0, zeros, out2, zeros, out4, zeros, out6,   \
>+            8, out1, out3, out5, out7);                                  \
> }
> #define TRANSPOSE8x8_UB_UB(...) TRANSPOSE8x8_UB(v16u8, __VA_ARGS__)
> #define TRANSPOSE8x8_UB_UH(...) TRANSPOSE8x8_UB(v8u16, __VA_ARGS__)
>--
>2.1.0

Let's omit the suffix '1' in 'SLDI_B1', other macros will follow this rule too in future.





More information about the ffmpeg-devel mailing list