[FFmpeg-cvslog] swscale/x86/range_convert: reduce amount of xmm regs clobbered in luma functions

James Almer git at videolan.org
Sun Jun 16 03:03:31 EEST 2024


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Sat Jun 15 21:00:17 2024 -0300| [8a4c9d6bd31f56f588f74f3f06fc78769392fbdd] | committer: James Almer

swscale/x86/range_convert: reduce amount of xmm regs clobbered in luma functions

Signed-off-by: James Almer <jamrial at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8a4c9d6bd31f56f588f74f3f06fc78769392fbdd
---

 libswscale/x86/range_convert.asm | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/libswscale/x86/range_convert.asm b/libswscale/x86/range_convert.asm
index ae51e9d573..572364df50 100644
--- a/libswscale/x86/range_convert.asm
+++ b/libswscale/x86/range_convert.asm
@@ -52,21 +52,21 @@ SECTION .text
 ;-----------------------------------------------------------------------------
 
 %macro LUMCONVERTRANGE 4
-cglobal %1, 2, 2, 7, dst, width
+cglobal %1, 2, 2, 5, dst, width
     shl          widthd, 1
-    VBROADCASTI128   m4, [%2]
-    VBROADCASTI128   m5, [%3]
-    pxor             m6, m6
+    VBROADCASTI128   m2, [%2]
+    VBROADCASTI128   m3, [%3]
+    pxor             m4, m4
     add            dstq, widthq
     neg          widthq
 .loop:
     movu             m0, [dstq+widthq]
-    punpckhwd        m1, m0, m6
-    punpcklwd        m0, m6
-    pmaddwd          m0, m4
-    pmaddwd          m1, m4
-    paddd            m0, m5
-    paddd            m1, m5
+    punpckhwd        m1, m0, m4
+    punpcklwd        m0, m4
+    pmaddwd          m0, m2
+    pmaddwd          m1, m2
+    paddd            m0, m3
+    paddd            m1, m3
     psrad            m0, %4
     psrad            m1, %4
     packssdw         m0, m1



More information about the ffmpeg-cvslog mailing list