[FFmpeg-devel] [PATCH 4/9] avcodec/mips: [loongson] optimize memset in h264dsp.
Shiyou Yin
yinshiyou-hf at loongson.cn
Wed Jul 11 12:45:43 EEST 2018
Optimized memset with mmi in following functions:
1. ff_h264_add_pixels4_8_mmi.
2. ff_h264_idct_add_8_mmi.
3. ff_h264_idct8_add_8_mmi.
Change-Id: If2656d2c945ce7996c0d5398f8fbae94540f1d83
Signed-off-by: Shiyou Yin <yinshiyou-hf at loongson.cn>
---
libavcodec/mips/h264dsp_mmi.c | 36 +++++++++++++++++++++++++++++++++---
1 file changed, 33 insertions(+), 3 deletions(-)
diff --git a/libavcodec/mips/h264dsp_mmi.c b/libavcodec/mips/h264dsp_mmi.c
index ac6fa99..8cc632c 100644
--- a/libavcodec/mips/h264dsp_mmi.c
+++ b/libavcodec/mips/h264dsp_mmi.c
@@ -59,6 +59,17 @@ void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
MMI_SWC1(%[ftmp2], %[dst1], 0x00)
MMI_SWC1(%[ftmp3], %[dst2], 0x00)
MMI_SWC1(%[ftmp4], %[dst3], 0x00)
+
+ /* memset(src, 0, 32); */
+ "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
+ "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
+ "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
+ "gssdlc1 %[ftmp0], 0x0f(%[src]) \n\t"
+ "gssdrc1 %[ftmp0], 0x08(%[src]) \n\t"
+ "gssdlc1 %[ftmp0], 0x17(%[src]) \n\t"
+ "gssdrc1 %[ftmp0], 0x10(%[src]) \n\t"
+ "gssdlc1 %[ftmp0], 0x1f(%[src]) \n\t"
+ "gssdrc1 %[ftmp0], 0x18(%[src]) \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
@@ -72,7 +83,6 @@ void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
: "memory"
);
- memset(src, 0, 32);
}
void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
@@ -152,6 +162,17 @@ void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
MMI_SWC1(%[ftmp2], %[dst], 0x00)
"packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
+
+ /* memset(block, 0, 32) */
+ "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
+ "gssdlc1 %[ftmp0], 0x07(%[block]) \n\t"
+ "gssdrc1 %[ftmp0], 0x00(%[block]) \n\t"
+ "gssdlc1 %[ftmp0], 0x0f(%[block]) \n\t"
+ "gssdrc1 %[ftmp0], 0x08(%[block]) \n\t"
+ "gssdlc1 %[ftmp0], 0x17(%[block]) \n\t"
+ "gssdrc1 %[ftmp0], 0x10(%[block]) \n\t"
+ "gssdlc1 %[ftmp0], 0x1f(%[block]) \n\t"
+ "gssdrc1 %[ftmp0], 0x18(%[block]) \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
@@ -167,7 +188,6 @@ void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
: "memory"
);
- memset(block, 0, 32);
}
void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
@@ -617,6 +637,17 @@ void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
MMI_SWC1(%[ftmp6], %[addr0], 0x00)
MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
PTR_ADDIU "$29, $29, 0x20 \n\t"
+
+ /* memset(block, 0, 32) */
+ "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
+ "gssdlc1 %[ftmp0], 0x07(%[block]) \n\t"
+ "gssdrc1 %[ftmp0], 0x00(%[block]) \n\t"
+ "gssdlc1 %[ftmp0], 0x0f(%[block]) \n\t"
+ "gssdrc1 %[ftmp0], 0x08(%[block]) \n\t"
+ "gssdlc1 %[ftmp0], 0x17(%[block]) \n\t"
+ "gssdrc1 %[ftmp0], 0x10(%[block]) \n\t"
+ "gssdlc1 %[ftmp0], 0x1f(%[block]) \n\t"
+ "gssdrc1 %[ftmp0], 0x18(%[block]) \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
@@ -638,7 +669,6 @@ void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
: "$29","memory"
);
- memset(block, 0, 128);
}
void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
--
2.1.0
More information about the ffmpeg-devel
mailing list