[FFmpeg-devel] [PATCH] avcodec/mips: simplified code in vp3dsp_idct_msa.c.

gxw guxiwei-hf at loongson.cn
Sun Sep 15 13:35:57 EEST 2019


Use the macros of ADD8 to replace continuous addition operations.
---
 libavcodec/mips/vp3dsp_idct_msa.c   | 80 ++++++++-----------------------------
 libavutil/mips/generic_macros_msa.h |  6 +++
 2 files changed, 22 insertions(+), 64 deletions(-)

diff --git a/libavcodec/mips/vp3dsp_idct_msa.c b/libavcodec/mips/vp3dsp_idct_msa.c
index 90c578f..e4cd377 100644
--- a/libavcodec/mips/vp3dsp_idct_msa.c
+++ b/libavcodec/mips/vp3dsp_idct_msa.c
@@ -178,14 +178,8 @@ static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
                    c0, c1, c2, c3);
         ILVR_H4_SW(zero, f4, zero, f5, zero, f6, zero, f7,
                    c4, c5, c6, c7);
-        A += c0;
-        B += c7;
-        C += c1;
-        D += c2;
-        E += c3;
-        F += c4;
-        G += c5;
-        H += c6;
+        ADD8(A, c0, B, c7, C, c1, D, c2, E, c3, F, c4, G, c5, H, c6,
+             A, B, C, D, E, F, G, H);
     }
     CLIP_SW8_0_255(A, B, C, D, E, F, G, H);
     sign_l = __msa_or_v((v16u8)r1_r, (v16u8)r2_r);
@@ -208,14 +202,8 @@ static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
         Gd = Bdd;
         Hd = Bdd;
     } else {
-        Ad = Add + c0;
-        Bd = Add + c1;
-        Cd = Add + c2;
-        Dd = Add + c3;
-        Ed = Add + c4;
-        Fd = Add + c5;
-        Gd = Add + c6;
-        Hd = Add + c7;
+        ADD8(Add, c0, Add, c1, Add, c2, Add, c3, Add, c4, Add, c5, Add, c6,
+             Add, c7, Ad, Bd, Cd, Dd, Ed, Fd, Gd, Hd);
         CLIP_SW8_0_255(Ad, Bd, Cd, Dd, Ed, Fd, Gd, Hd);
     }
     Ad = (v4i32)__msa_and_v((v16u8)Ad, (v16u8)sign_t);
@@ -235,14 +223,8 @@ static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
     F = (v4i32)__msa_and_v((v16u8)F, (v16u8)sign_t);
     G = (v4i32)__msa_and_v((v16u8)G, (v16u8)sign_t);
     H = (v4i32)__msa_and_v((v16u8)H, (v16u8)sign_t);
-    r0_r = Ad + A;
-    r1_r = Bd + C;
-    r2_r = Cd + D;
-    r3_r = Dd + E;
-    r0_l = Ed + F;
-    r1_l = Fd + G;
-    r2_l = Gd + H;
-    r3_l = Hd + B;
+    ADD8(Ad, A, Bd, C, Cd, D, Dd, E, Ed, F, Fd, G, Gd, H, Hd, B,
+         r0_r, r1_r, r2_r, r3_r, r0_l, r1_l, r2_l, r3_l);
 
     /* Row 4 to 7 */
     TRANSPOSE4x4_SW_SW(r4_r, r5_r, r6_r, r7_r,
@@ -286,14 +268,8 @@ static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
                    c0, c1, c2, c3);
         ILVL_H4_SW(zero, f4, zero, f5, zero, f6, zero, f7,
                    c4, c5, c6, c7);
-        A += c0;
-        B += c7;
-        C += c1;
-        D += c2;
-        E += c3;
-        F += c4;
-        G += c5;
-        H += c6;
+        ADD8(A, c0, B, c7, C, c1, D, c2, E, c3, F, c4, G, c5, H, c6,
+             A, B, C, D, E, F, G, H);
     }
     CLIP_SW8_0_255(A, B, C, D, E, F, G, H);
     sign_l = __msa_or_v((v16u8)r5_r, (v16u8)r6_r);
@@ -316,14 +292,8 @@ static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
         Gd = Bdd;
         Hd = Bdd;
     } else {
-        Ad = Add + c0;
-        Bd = Add + c1;
-        Cd = Add + c2;
-        Dd = Add + c3;
-        Ed = Add + c4;
-        Fd = Add + c5;
-        Gd = Add + c6;
-        Hd = Add + c7;
+        ADD8(Add, c0, Add, c1, Add, c2, Add, c3, Add, c4, Add, c5, Add, c6,
+             Add, c7, Ad, Bd, Cd, Dd, Ed, Fd, Gd, Hd);
         CLIP_SW8_0_255(Ad, Bd, Cd, Dd, Ed, Fd, Gd, Hd);
     }
     Ad = (v4i32)__msa_and_v((v16u8)Ad, (v16u8)sign_t);
@@ -343,14 +313,8 @@ static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
     F = (v4i32)__msa_and_v((v16u8)F, (v16u8)sign_t);
     G = (v4i32)__msa_and_v((v16u8)G, (v16u8)sign_t);
     H = (v4i32)__msa_and_v((v16u8)H, (v16u8)sign_t);
-    r4_r = Ad + A;
-    r5_r = Bd + C;
-    r6_r = Cd + D;
-    r7_r = Dd + E;
-    r4_l = Ed + F;
-    r5_l = Fd + G;
-    r6_l = Gd + H;
-    r7_l = Hd + B;
+    ADD8(Ad, A, Bd, C, Cd, D, Dd, E, Ed, F, Fd, G, Gd, H, Hd, B,
+         r4_r, r5_r, r6_r, r7_r, r4_l, r5_l, r6_l, r7_l);
     VSHF_B2_SB(r0_r, r4_r, r1_r, r5_r, mask, mask, d0, d1);
     VSHF_B2_SB(r2_r, r6_r, r3_r, r7_r, mask, mask, d2, d3);
     VSHF_B2_SB(r0_l, r4_l, r1_l, r5_l, mask, mask, d4, d5);
@@ -400,14 +364,8 @@ void ff_vp3_idct_dc_add_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
                e0, e1, e2, e3);
     ILVR_H4_SW(zero, c4, zero, c5, zero, c6, zero, c7,
                e4, e5, e6, e7);
-    e0 += dc;
-    e1 += dc;
-    e2 += dc;
-    e3 += dc;
-    e4 += dc;
-    e5 += dc;
-    e6 += dc;
-    e7 += dc;
+    ADD8(e0, dc, e1, dc, e2, dc, e3, dc, e4, dc, e5, dc, e6, dc, e7, dc,
+         e0, e1, e2, e3, e4, e5, e6, e7);
     CLIP_SW8_0_255(e0, e1, e2, e3, e4, e5, e6, e7);
 
     /* Left part */
@@ -415,14 +373,8 @@ void ff_vp3_idct_dc_add_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
                r0, r1, r2, r3);
     ILVL_H4_SW(zero, c4, zero, c5, zero, c6, zero, c7,
                r4, r5, r6, r7);
-    r0 += dc;
-    r1 += dc;
-    r2 += dc;
-    r3 += dc;
-    r4 += dc;
-    r5 += dc;
-    r6 += dc;
-    r7 += dc;
+    ADD8(r0, dc, r1, dc, r2, dc, r3, dc, r4, dc, r5, dc, r6, dc, r7, dc,
+         r0, r1, r2, r3, r4, r5, r6, r7);
     CLIP_SW8_0_255(r0, r1, r2, r3, r4, r5, r6, r7);
     VSHF_B2_SB(e0, r0, e1, r1, mask, mask, d0, d1);
     VSHF_B2_SB(e2, r2, e3, r3, mask, mask, d2, d3);
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h
index c085d58..3d892ce 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -2153,6 +2153,12 @@
     ADD2(in0, in1, in2, in3, out0, out1);                                     \
     ADD2(in4, in5, in6, in7, out2, out3);                                     \
 }
+#define ADD8(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, \
+             in13, in14, in15, out0, out1, out2, out3, out4, out5, out6, out7)   \
+{                                                                                \
+    ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3);        \
+    ADD4(in8, in9, in10, in11, in12, in13, in14, in15, out4, out5, out6, out7);  \
+}
 
 /* Description : Subtraction of 2 pairs of vectors
    Arguments   : Inputs  - in0, in1, in2, in3
-- 
2.1.0




More information about the ffmpeg-devel mailing list