[FFmpeg-cvslog] libswscale/x86/yuv2yuvX: Removes unrolling for mmx and mmxext

Alan Kelly git at videolan.org
Thu Apr 1 21:56:53 EEST 2021


ffmpeg | branch: master | Alan Kelly <alankelly at google.com> | Thu Apr  1 12:00:15 2021 +0200| [3ce8d092448827842c451807f03010ad5129fd8f] | committer: Michael Niedermayer

libswscale/x86/yuv2yuvX: Removes unrolling for mmx and mmxext

Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3ce8d092448827842c451807f03010ad5129fd8f
---

 libswscale/x86/yuv2yuvX.asm | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/libswscale/x86/yuv2yuvX.asm b/libswscale/x86/yuv2yuvX.asm
index 521880dabe..b6294cb919 100644
--- a/libswscale/x86/yuv2yuvX.asm
+++ b/libswscale/x86/yuv2yuvX.asm
@@ -37,8 +37,10 @@ SECTION .text
 cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
 %if notcpuflag(sse3)
 %define movr mova
+%define unroll 1
 %else
 %define movr movdqu
+%define unroll 2
 %endif
     movsxdifnidn         dstWq, dstWd
     movsxdifnidn         offsetq, offsetd
@@ -70,8 +72,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
 .outerloop:
     mova                 m4, m7
     mova                 m3, m7
+%if cpuflag(sse3)
     mova                 m6, m7
     mova                 m1, m7
+%endif
 .loop:
 %if cpuflag(avx2)
     vpbroadcastq         m0, [filterSizeq + 8]
@@ -84,28 +88,36 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
     pmulhw               m5, m0, [srcq + offsetq * 2 + mmsize]
     paddw                m3, m3, m2
     paddw                m4, m4, m5
+%if cpuflag(sse3)
     pmulhw               m2, m0, [srcq + offsetq * 2 + 2 * mmsize]
     pmulhw               m5, m0, [srcq + offsetq * 2 + 3 * mmsize]
     paddw                m6, m6, m2
     paddw                m1, m1, m5
+%endif
     add                  filterSizeq, $10
     mov                  srcq, [filterSizeq]
     test                 srcq, srcq
     jnz                  .loop
     psraw                m3, m3, 3
     psraw                m4, m4, 3
+%if cpuflag(sse3)
     psraw                m6, m6, 3
     psraw                m1, m1, 3
+%endif
     packuswb             m3, m3, m4
+%if cpuflag(sse3)
     packuswb             m6, m6, m1
+%endif
     mov                  srcq, [filterq]
 %if cpuflag(avx2)
     vpermq               m3, m3, 216
     vpermq               m6, m6, 216
 %endif
     movr                 [destq + offsetq], m3
+%if cpuflag(sse3)
     movr                 [destq + offsetq + mmsize], m6
-    add                  offsetq, mmsize * 2
+%endif
+    add                  offsetq, mmsize * unroll
     mov                  filterSizeq, filterq
     cmp                  offsetq, dstWq
     jb                  .outerloop



More information about the ffmpeg-cvslog mailing list