[FFmpeg-devel] [PATCH] x86/yadif: improve usage of ABS macros

James Almer jamrial at gmail.com
Tue Jul 8 06:42:26 CEST 2014


ABS2 might help on processors where Out Of Order execution isn't very good.
Also remove a duplicate macro and use the x86util ones instead, which are 
optimized for mmxext/sse2.

Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavfilter/x86/vf_yadif.asm |  6 ++----
 libavfilter/x86/yadif-10.asm | 21 ++++-----------------
 libavfilter/x86/yadif-16.asm | 22 ++++++++++++++++++----
 3 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/libavfilter/x86/vf_yadif.asm b/libavfilter/x86/vf_yadif.asm
index a29620c..d2e7d9b 100644
--- a/libavfilter/x86/vf_yadif.asm
+++ b/libavfilter/x86/vf_yadif.asm
@@ -104,8 +104,7 @@ SECTION .text
     LOAD         m4, [prevq+t0]
     psubw        m3, m0
     psubw        m4, m1
-    ABS1         m3, m5
-    ABS1         m4, m5
+    ABS2         m3, m4, m5, m6
     paddw        m3, m4
     psrlw        m2, 1
     psrlw        m3, 1
@@ -114,8 +113,7 @@ SECTION .text
     LOAD         m4, [nextq+t0]
     psubw        m3, m0
     psubw        m4, m1
-    ABS1         m3, m5
-    ABS1         m4, m5
+    ABS2         m3, m4, m5, m6
     paddw        m3, m4
     psrlw        m3, 1
     pmaxsw       m2, m3
diff --git a/libavfilter/x86/yadif-10.asm b/libavfilter/x86/yadif-10.asm
index 1b01709..8853e0d 100644
--- a/libavfilter/x86/yadif-10.asm
+++ b/libavfilter/x86/yadif-10.asm
@@ -30,17 +30,6 @@ pw_1: times 8 dw 1
 
 SECTION .text
 
-%macro PABS 2
-%if cpuflag(ssse3)
-    pabsw %1, %1
-%else
-    pxor    %2, %2
-    pcmpgtw %2, %1
-    pxor    %1, %2
-    psubw   %1, %2
-%endif
-%endmacro
-
 %macro PMAXUW 2
 %if cpuflag(sse4)
     pmaxuw %1, %2
@@ -131,13 +120,12 @@ SECTION .text
     mova   [rsp+16], m3
     mova   [rsp+32], m1
     psubw        m2, m4
-    PABS         m2, m4
+    ABS1         m2, m4
     LOAD         m3, [prevq+t1]
     LOAD         m4, [prevq+t0]
     psubw        m3, m0
     psubw        m4, m1
-    PABS         m3, m5
-    PABS         m4, m5
+    ABS2         m3, m4, m5, m6
     paddw        m3, m4
     psrlw        m2, 1
     psrlw        m3, 1
@@ -146,8 +134,7 @@ SECTION .text
     LOAD         m4, [nextq+t0]
     psubw        m3, m0
     psubw        m4, m1
-    PABS         m3, m5
-    PABS         m4, m5
+    ABS2         m3, m4, m5, m6
     paddw        m3, m4
     psrlw        m3, 1
     pmaxsw       m2, m3
@@ -157,7 +144,7 @@ SECTION .text
     paddw        m0, m0
     psubw        m0, m1
     psrlw        m1, 1
-    PABS         m0, m2
+    ABS1         m0, m2
 
     movu         m2, [curq+t1-1*2]
     movu         m3, [curq+t0-1*2]
diff --git a/libavfilter/x86/yadif-16.asm b/libavfilter/x86/yadif-16.asm
index a65da89..0bd7f84 100644
--- a/libavfilter/x86/yadif-16.asm
+++ b/libavfilter/x86/yadif-16.asm
@@ -44,6 +44,22 @@ SECTION .text
 %endif
 %endmacro
 
+%macro PABS2 4
+%if cpuflag(ssse3)
+    pabsd %1, %1
+    pabsd %2, %2
+%else
+    pxor    %3, %3
+    pxor    %4, %4
+    pcmpgtd %3, %1
+    pcmpgtd %4, %2
+    pxor    %1, %3
+    pxor    %2, %4
+    psubd   %1, %3
+    psubd   %2, %4
+%endif
+%endmacro
+
 %macro PACK 1
 %if cpuflag(sse4)
     packusdw %1, %1
@@ -180,8 +196,7 @@ SECTION .text
     LOAD         m4, [prevq+t0]
     psubd        m3, m0
     psubd        m4, m1
-    PABS         m3, m5
-    PABS         m4, m5
+    PABS2        m3, m4, m5, m6
     paddd        m3, m4
     psrld        m2, 1
     psrld        m3, 1
@@ -190,8 +205,7 @@ SECTION .text
     LOAD         m4, [nextq+t0]
     psubd        m3, m0
     psubd        m4, m1
-    PABS         m3, m5
-    PABS         m4, m5
+    PABS2        m3, m4, m5, m6
     paddd        m3, m4
     psrld        m3, 1
     PMAXSD       m2, m3, m6
-- 
1.8.5.5



More information about the ffmpeg-devel mailing list