[FFmpeg-cvslog] x86/lossless_videodsp: use common macro for add and diff int16 loop.
Clément Bœsch
git at videolan.org
Sat Jan 25 16:26:54 CET 2014
ffmpeg | branch: master | Clément Bœsch <u at pkh.me> | Sat Jan 25 11:59:39 2014 +0100| [5267e850563d8c4cbb417fc5f98e140e758a51cf] | committer: Clément Bœsch
x86/lossless_videodsp: use common macro for add and diff int16 loop.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5267e850563d8c4cbb417fc5f98e140e758a51cf
---
libavcodec/x86/lossless_videodsp.asm | 72 ++++++++++++----------------------
1 file changed, 26 insertions(+), 46 deletions(-)
diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm
index 59ca733..0266728 100644
--- a/libavcodec/x86/lossless_videodsp.asm
+++ b/libavcodec/x86/lossless_videodsp.asm
@@ -31,7 +31,7 @@ pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7
SECTION_TEXT
-%macro ADD_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned)
+%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
movd m4, maskd
SPLATW m4, m4
add wq, wq
@@ -39,24 +39,41 @@ SECTION_TEXT
jz %%.tomainloop
%%.wordloop:
sub wq, 2
+%ifidn %2, add
mov ax, [srcq+wq]
add ax, [dstq+wq]
+%else
+ mov ax, [src1q+wq]
+ sub ax, [src2q+wq]
+%endif
and ax, maskw
mov [dstq+wq], ax
test wq, 2*mmsize - 1
jnz %%.wordloop
%%.tomainloop:
+%ifidn %2, add
add srcq, wq
+%else
+ add src1q, wq
+ add src2q, wq
+%endif
add dstq, wq
neg wq
jz %%.end
%%.loop:
+%ifidn %2, add
mov%1 m0, [srcq+wq]
mov%1 m1, [dstq+wq]
mov%1 m2, [srcq+wq+mmsize]
mov%1 m3, [dstq+wq+mmsize]
- paddw m0, m1
- paddw m2, m3
+%else
+ mov%1 m0, [src1q+wq]
+ mov%1 m1, [src2q+wq]
+ mov%1 m2, [src1q+wq+mmsize]
+ mov%1 m3, [src2q+wq+mmsize]
+%endif
+ p%2w m0, m1
+ p%2w m2, m3
pand m0, m4
pand m2, m4
mov%1 [dstq+wq] , m0
@@ -69,7 +86,7 @@ SECTION_TEXT
INIT_MMX mmx
cglobal add_int16, 4,4,5, dst, src, mask, w
- ADD_INT16_LOOP a
+ INT16_LOOP a, add
INIT_XMM sse2
cglobal add_int16, 4,4,5, dst, src, mask, w
@@ -77,50 +94,13 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
jnz .unaligned
test dstq, mmsize-1
jnz .unaligned
- ADD_INT16_LOOP a
+ INT16_LOOP a, add
.unaligned:
- ADD_INT16_LOOP u
-
-%macro DIFF_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned)
- movd m4, maskd
- SPLATW m4, m4
- add wq, wq
- test wq, 2*mmsize - 1
- jz %%.tomainloop
-%%.wordloop:
- sub wq, 2
- mov ax, [src1q+wq]
- sub ax, [src2q+wq]
- and ax, maskw
- mov [dstq+wq], ax
- test wq, 2*mmsize - 1
- jnz %%.wordloop
-%%.tomainloop:
- add src1q, wq
- add src2q, wq
- add dstq, wq
- neg wq
- jz %%.end
-%%.loop:
- mov%1 m0, [src1q+wq]
- mov%1 m1, [src2q+wq]
- mov%1 m2, [src1q+wq+mmsize]
- mov%1 m3, [src2q+wq+mmsize]
- psubw m0, m1
- psubw m2, m3
- pand m0, m4
- pand m2, m4
- mov%1 [dstq+wq] , m0
- mov%1 [dstq+wq+mmsize], m2
- add wq, 2*mmsize
- jl %%.loop
-%%.end:
- RET
-%endmacro
+ INT16_LOOP u, add
INIT_MMX mmx
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
- DIFF_INT16_LOOP a
+ INT16_LOOP a, sub
INIT_XMM sse2
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
@@ -130,9 +110,9 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
jnz .unaligned
test dstq, mmsize-1
jnz .unaligned
- DIFF_INT16_LOOP a
+ INT16_LOOP a, sub
.unaligned:
- DIFF_INT16_LOOP u
+ INT16_LOOP u, sub
%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u)
More information about the ffmpeg-cvslog
mailing list