[FFmpeg-devel] [PATCH] x86/yadif: improve usage of ABS macros
James Almer
jamrial at gmail.com
Tue Jul 8 06:42:26 CEST 2014
ABS2 might help on processors where Out Of Order execution isn't very good.
Also remove a duplicate macro and use the x86util ones instead, which are
optimized for mmxext/sse2.
Signed-off-by: James Almer <jamrial at gmail.com>
---
libavfilter/x86/vf_yadif.asm | 6 ++----
libavfilter/x86/yadif-10.asm | 21 ++++-----------------
libavfilter/x86/yadif-16.asm | 22 ++++++++++++++++++----
3 files changed, 24 insertions(+), 25 deletions(-)
diff --git a/libavfilter/x86/vf_yadif.asm b/libavfilter/x86/vf_yadif.asm
index a29620c..d2e7d9b 100644
--- a/libavfilter/x86/vf_yadif.asm
+++ b/libavfilter/x86/vf_yadif.asm
@@ -104,8 +104,7 @@ SECTION .text
LOAD m4, [prevq+t0]
psubw m3, m0
psubw m4, m1
- ABS1 m3, m5
- ABS1 m4, m5
+ ABS2 m3, m4, m5, m6
paddw m3, m4
psrlw m2, 1
psrlw m3, 1
@@ -114,8 +113,7 @@ SECTION .text
LOAD m4, [nextq+t0]
psubw m3, m0
psubw m4, m1
- ABS1 m3, m5
- ABS1 m4, m5
+ ABS2 m3, m4, m5, m6
paddw m3, m4
psrlw m3, 1
pmaxsw m2, m3
diff --git a/libavfilter/x86/yadif-10.asm b/libavfilter/x86/yadif-10.asm
index 1b01709..8853e0d 100644
--- a/libavfilter/x86/yadif-10.asm
+++ b/libavfilter/x86/yadif-10.asm
@@ -30,17 +30,6 @@ pw_1: times 8 dw 1
SECTION .text
-%macro PABS 2
-%if cpuflag(ssse3)
- pabsw %1, %1
-%else
- pxor %2, %2
- pcmpgtw %2, %1
- pxor %1, %2
- psubw %1, %2
-%endif
-%endmacro
-
%macro PMAXUW 2
%if cpuflag(sse4)
pmaxuw %1, %2
@@ -131,13 +120,12 @@ SECTION .text
mova [rsp+16], m3
mova [rsp+32], m1
psubw m2, m4
- PABS m2, m4
+ ABS1 m2, m4
LOAD m3, [prevq+t1]
LOAD m4, [prevq+t0]
psubw m3, m0
psubw m4, m1
- PABS m3, m5
- PABS m4, m5
+ ABS2 m3, m4, m5, m6
paddw m3, m4
psrlw m2, 1
psrlw m3, 1
@@ -146,8 +134,7 @@ SECTION .text
LOAD m4, [nextq+t0]
psubw m3, m0
psubw m4, m1
- PABS m3, m5
- PABS m4, m5
+ ABS2 m3, m4, m5, m6
paddw m3, m4
psrlw m3, 1
pmaxsw m2, m3
@@ -157,7 +144,7 @@ SECTION .text
paddw m0, m0
psubw m0, m1
psrlw m1, 1
- PABS m0, m2
+ ABS1 m0, m2
movu m2, [curq+t1-1*2]
movu m3, [curq+t0-1*2]
diff --git a/libavfilter/x86/yadif-16.asm b/libavfilter/x86/yadif-16.asm
index a65da89..0bd7f84 100644
--- a/libavfilter/x86/yadif-16.asm
+++ b/libavfilter/x86/yadif-16.asm
@@ -44,6 +44,22 @@ SECTION .text
%endif
%endmacro
+%macro PABS2 4
+%if cpuflag(ssse3)
+ pabsd %1, %1
+ pabsd %2, %2
+%else
+ pxor %3, %3
+ pxor %4, %4
+ pcmpgtd %3, %1
+ pcmpgtd %4, %2
+ pxor %1, %3
+ pxor %2, %4
+ psubd %1, %3
+ psubd %2, %4
+%endif
+%endmacro
+
%macro PACK 1
%if cpuflag(sse4)
packusdw %1, %1
@@ -180,8 +196,7 @@ SECTION .text
LOAD m4, [prevq+t0]
psubd m3, m0
psubd m4, m1
- PABS m3, m5
- PABS m4, m5
+ PABS2 m3, m4, m5, m6
paddd m3, m4
psrld m2, 1
psrld m3, 1
@@ -190,8 +205,7 @@ SECTION .text
LOAD m4, [nextq+t0]
psubd m3, m0
psubd m4, m1
- PABS m3, m5
- PABS m4, m5
+ PABS2 m3, m4, m5, m6
paddd m3, m4
psrld m3, 1
PMAXSD m2, m3, m6
--
1.8.5.5
More information about the ffmpeg-devel
mailing list