[FFmpeg-devel] [PATCH] x86/hevc_deblock: use existing x86util transpose macro in chroma_{10, 12}
James Almer
jamrial at gmail.com
Tue Jul 29 23:07:27 CEST 2014
Cosmetic change. No measurable difference in speed.
Signed-off-by: James Almer <jamrial at gmail.com>
---
libavcodec/x86/hevc_deblock.asm | 25 ++++++++-----------------
1 file changed, 8 insertions(+), 17 deletions(-)
diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm
index 2d4353a..5951e86 100644
--- a/libavcodec/x86/hevc_deblock.asm
+++ b/libavcodec/x86/hevc_deblock.asm
@@ -138,31 +138,22 @@ INIT_XMM sse2
; in: 4 rows of 8 words in m0..m3
; out: 8 rows of 4 words in %1..%8
%macro TRANSPOSE8x4W_STORE 9
+ TRANSPOSE4x4W 0, 1, 2, 3, 4
+
pxor m5, m5; zeros reg
CLIPW m0, m5, %9
CLIPW m1, m5, %9
CLIPW m2, m5, %9
CLIPW m3, m5, %9
- punpckhwd m4, m0, m1
- punpcklwd m0, m1
- punpckhwd m5, m2, m3
- punpcklwd m2, m3
- punpckhdq m6, m0, m2
- punpckldq m0, m2
-
movq %1, m0
movhps %2, m0
- movq %3, m6
- movhps %4, m6
-
- punpckhdq m6, m4, m5
- punpckldq m4, m5
-
- movq %5, m4
- movhps %6, m4
- movq %7, m6
- movhps %8, m6
+ movq %3, m1
+ movhps %4, m1
+ movq %5, m2
+ movhps %6, m2
+ movq %7, m3
+ movhps %8, m3
%endmacro
; in: 8 rows of 8 bytes in %1..%8
--
1.8.5.5
More information about the ffmpeg-devel
mailing list