[FFmpeg-cvslog] x86/vf_blend: add sse2 versions of blend_difference and blend_negation

James Almer git at videolan.org
Thu Dec 24 17:06:22 CET 2015


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Wed Dec 23 23:55:56 2015 -0300| [8dba3fb8fdcf06a51d27ae92321d713060e3c781] | committer: James Almer

x86/vf_blend: add sse2 versions of blend_difference and blend_negation

Reviewed-by: Paul B Mahol <onemda at gmail.com>
Signed-off-by: James Almer <jamrial at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8dba3fb8fdcf06a51d27ae92321d713060e3c781
---

 libavfilter/x86/vf_blend.asm    |   12 +++++++++---
 libavfilter/x86/vf_blend_init.c |    4 ++++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 5f0271a..730be77 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -176,7 +176,7 @@ BLEND_INIT phoenix, 4
     jl .loop
 BLEND_END
 
-INIT_XMM ssse3
+%macro BLEND_ABS 0
 BLEND_INIT difference, 3
     pxor       m2, m2
 .nextrow:
@@ -188,7 +188,7 @@ BLEND_INIT difference, 3
         punpcklbw       m0, m2
         punpcklbw       m1, m2
         psubw           m0, m1
-        pabsw           m0, m0
+        ABS1            m0, m1
         packuswb        m0, m0
         movh   [dstq + xq], m0
         add             xq, mmsize / 2
@@ -209,7 +209,7 @@ BLEND_INIT negation, 5
         mova            m3, m4
         psubw           m3, m0
         psubw           m3, m1
-        pabsw           m3, m3
+        ABS1            m3, m1
         mova            m0, m4
         psubw           m0, m3
         packuswb        m0, m0
@@ -217,3 +217,9 @@ BLEND_INIT negation, 5
         add             xq, mmsize / 2
     jl .loop
 BLEND_END
+%endmacro
+
+INIT_XMM sse2
+BLEND_ABS
+INIT_XMM ssse3
+BLEND_ABS
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index b7d234f..dc29547 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -42,7 +42,9 @@ BLEND_FUNC(or, sse2)
 BLEND_FUNC(phoenix, sse2)
 BLEND_FUNC(subtract, sse2)
 BLEND_FUNC(xor, sse2)
+BLEND_FUNC(difference, sse2)
 BLEND_FUNC(difference, ssse3)
+BLEND_FUNC(negation, sse2)
 BLEND_FUNC(negation, ssse3)
 
 av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
@@ -63,6 +65,8 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
         case BLEND_PHOENIX:  param->blend = ff_blend_phoenix_sse2;  break;
         case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
         case BLEND_XOR:      param->blend = ff_blend_xor_sse2;      break;
+        case BLEND_DIFFERENCE: param->blend = ff_blend_difference_sse2; break;
+        case BLEND_NEGATION:   param->blend = ff_blend_negation_sse2;   break;
         }
     }
     if (EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) {



More information about the ffmpeg-cvslog mailing list