[FFmpeg-cvslog] audiodsp/x86: fix ff_vector_clip_int32_sse2
Anton Khirnov
git at videolan.org
Mon Mar 20 19:43:15 EET 2017
ffmpeg | branch: master | Anton Khirnov <anton at khirnov.net> | Tue Aug 9 14:17:15 2016 +0200| [1d6c76e11febb58738c9647c47079d02b5e10094] | committer: Anton Khirnov
audiodsp/x86: fix ff_vector_clip_int32_sse2
This version, which is the only one doing two processing cycles per loop
iteration, computes the load/store indices incorrectly for the second
cycle.
CC: libav-stable at libav.org
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1d6c76e11febb58738c9647c47079d02b5e10094
---
libavcodec/x86/audiodsp.asm | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index 696a73b..dc38ada 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -80,17 +80,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
SPLATD m4
SPLATD m5
.loop:
-%assign %%i 1
+%assign %%i 0
%rep %2
- mova m0, [srcq+mmsize*0*%%i]
- mova m1, [srcq+mmsize*1*%%i]
- mova m2, [srcq+mmsize*2*%%i]
- mova m3, [srcq+mmsize*3*%%i]
+ mova m0, [srcq + mmsize * (0 + %%i)]
+ mova m1, [srcq + mmsize * (1 + %%i)]
+ mova m2, [srcq + mmsize * (2 + %%i)]
+ mova m3, [srcq + mmsize * (3 + %%i)]
%if %3
- mova m7, [srcq+mmsize*4*%%i]
- mova m8, [srcq+mmsize*5*%%i]
- mova m9, [srcq+mmsize*6*%%i]
- mova m10, [srcq+mmsize*7*%%i]
+ mova m7, [srcq + mmsize * (4 + %%i)]
+ mova m8, [srcq + mmsize * (5 + %%i)]
+ mova m9, [srcq + mmsize * (6 + %%i)]
+ mova m10, [srcq + mmsize * (7 + %%i)]
%endif
CLIPD m0, m4, m5, m6
CLIPD m1, m4, m5, m6
@@ -102,17 +102,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
CLIPD m9, m4, m5, m6
CLIPD m10, m4, m5, m6
%endif
- mova [dstq+mmsize*0*%%i], m0
- mova [dstq+mmsize*1*%%i], m1
- mova [dstq+mmsize*2*%%i], m2
- mova [dstq+mmsize*3*%%i], m3
+ mova [dstq + mmsize * (0 + %%i)], m0
+ mova [dstq + mmsize * (1 + %%i)], m1
+ mova [dstq + mmsize * (2 + %%i)], m2
+ mova [dstq + mmsize * (3 + %%i)], m3
%if %3
- mova [dstq+mmsize*4*%%i], m7
- mova [dstq+mmsize*5*%%i], m8
- mova [dstq+mmsize*6*%%i], m9
- mova [dstq+mmsize*7*%%i], m10
+ mova [dstq + mmsize * (4 + %%i)], m7
+ mova [dstq + mmsize * (5 + %%i)], m8
+ mova [dstq + mmsize * (6 + %%i)], m9
+ mova [dstq + mmsize * (7 + %%i)], m10
%endif
-%assign %%i %%i+1
+%assign %%i (%%i + 4 * (1 + %3))
%endrep
add srcq, mmsize*4*(%2+%3)
add dstq, mmsize*4*(%2+%3)
More information about the ffmpeg-cvslog
mailing list