[FFmpeg-cvslog] swscale/x86/range_convert: reduce amount of xmm regs clobbered in luma functions
James Almer
git at videolan.org
Sun Jun 16 03:03:31 EEST 2024
ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Sat Jun 15 21:00:17 2024 -0300| [8a4c9d6bd31f56f588f74f3f06fc78769392fbdd] | committer: James Almer
swscale/x86/range_convert: reduce amount of xmm regs clobbered in luma functions
Signed-off-by: James Almer <jamrial at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8a4c9d6bd31f56f588f74f3f06fc78769392fbdd
---
libswscale/x86/range_convert.asm | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/libswscale/x86/range_convert.asm b/libswscale/x86/range_convert.asm
index ae51e9d573..572364df50 100644
--- a/libswscale/x86/range_convert.asm
+++ b/libswscale/x86/range_convert.asm
@@ -52,21 +52,21 @@ SECTION .text
;-----------------------------------------------------------------------------
%macro LUMCONVERTRANGE 4
-cglobal %1, 2, 2, 7, dst, width
+cglobal %1, 2, 2, 5, dst, width
shl widthd, 1
- VBROADCASTI128 m4, [%2]
- VBROADCASTI128 m5, [%3]
- pxor m6, m6
+ VBROADCASTI128 m2, [%2]
+ VBROADCASTI128 m3, [%3]
+ pxor m4, m4
add dstq, widthq
neg widthq
.loop:
movu m0, [dstq+widthq]
- punpckhwd m1, m0, m6
- punpcklwd m0, m6
- pmaddwd m0, m4
- pmaddwd m1, m4
- paddd m0, m5
- paddd m1, m5
+ punpckhwd m1, m0, m4
+ punpcklwd m0, m4
+ pmaddwd m0, m2
+ pmaddwd m1, m2
+ paddd m0, m3
+ paddd m1, m3
psrad m0, %4
psrad m1, %4
packssdw m0, m1
More information about the ffmpeg-cvslog
mailing list