[FFmpeg-cvslog] swscale/x86/output: Fix yuv2planeX_16* with unaligned destination
Michael Niedermayer
git at videolan.org
Sat Mar 26 22:36:03 CET 2016
ffmpeg | branch: release/3.0 | Michael Niedermayer <michael at niedermayer.cc> | Wed Feb 17 00:14:56 2016 +0100| [be5acd6cb1f3437377512bf5683c4998267395e6] | committer: Michael Niedermayer
swscale/x86/output: Fix yuv2planeX_16* with unaligned destination
Reviewed-by: BBB
Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>
(cherry picked from commit f6492a2ea8df80be0ed9591aee4019cef0e36e99)
Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=be5acd6cb1f3437377512bf5683c4998267395e6
---
libswscale/x86/output.asm | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index 9570969..133817c 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -54,8 +54,8 @@ SECTION .text
; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple
; of 2. $offset is either 0 or 3. $dither holds 8 values.
;-----------------------------------------------------------------------------
-%macro yuv2planeX_mainloop 1
-.pixelloop:
+%macro yuv2planeX_mainloop 2
+.pixelloop_%2:
%assign %%i 0
; the rep here is for the 8bit output mmx case, where dither covers
; 8 pixels but we can only handle 2 pixels per register, and thus 4
@@ -82,7 +82,7 @@ SECTION .text
mova m2, m1
%endif ; %1 == 8/9/10/16
movsx cntr_reg, fltsizem
-.filterloop_ %+ %%i:
+.filterloop_%2_ %+ %%i:
; input pixels
mov r6, [srcq+gprsize*cntr_reg-2*gprsize]
%if %1 == 16
@@ -129,7 +129,7 @@ SECTION .text
%endif ; %1 == 8/9/10/16
sub cntr_reg, 2
- jg .filterloop_ %+ %%i
+ jg .filterloop_%2_ %+ %%i
%if %1 == 16
psrad m2, 31 - %1
@@ -156,7 +156,7 @@ SECTION .text
%endif ; mmxext/sse2/sse4/avx
pminsw m2, [yuv2yuvX_%1_upper]
%endif ; %1 == 9/10/16
- mova [dstq+r5*2], m2
+ mov%2 [dstq+r5*2], m2
%endif ; %1 == 8/9/10/16
add r5, mmsize/2
@@ -164,7 +164,7 @@ SECTION .text
%assign %%i %%i+2
%endrep
- jg .pixelloop
+ jg .pixelloop_%2
%endmacro
%macro yuv2planeX_fn 3
@@ -235,7 +235,16 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
xor r5, r5
-yuv2planeX_mainloop %1
+%if mmsize == 8 || %1 == 8
+ yuv2planeX_mainloop %1, a
+%else ; mmsize == 16
+ test dstq, 15
+ jnz .unaligned
+ yuv2planeX_mainloop %1, a
+ REP_RET
+.unaligned:
+ yuv2planeX_mainloop %1, u
+%endif ; mmsize == 8/16
%if %1 == 8
%if ARCH_X86_32
More information about the ffmpeg-cvslog
mailing list