[FFmpeg-cvslog] x86: hpeldsp: better factorization
Christophe Gisquet
git at videolan.org
Thu May 29 21:53:36 CEST 2014
ffmpeg | branch: master | Christophe Gisquet <christophe.gisquet at gmail.com> | Mon May 26 21:59:14 2014 +0200| [226700398105075d27d07b652a0b67705aa06a1e] | committer: Michael Niedermayer
x86: hpeldsp: better factorization
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=226700398105075d27d07b652a0b67705aa06a1e
---
libavcodec/x86/hpeldsp.asm | 46 +++++++++-----------------------------------
libavutil/x86/x86util.asm | 10 +++++++++-
2 files changed, 18 insertions(+), 38 deletions(-)
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index 76e4632..a702b8b 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -372,16 +372,6 @@ AVG_PIXELS8
; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-%macro PAVGB_MMX 4
- movu %3, %1
- por %3, %2
- pxor %2, %1
- pand %2, %4
- psrlq %2, 1
- psubb %3, %2
- SWAP %2, %3
-%endmacro
-
%macro AVG_PIXELS8_X2 0
%if cpuflag(sse2)
cglobal avg_pixels16_x2, 4,5,4
@@ -396,53 +386,35 @@ cglobal avg_pixels8_x2, 4,5
.loop:
movu m0, [r1]
movu m2, [r1+r2]
-%if notcpuflag(mmxext)
- PAVGB_MMX [r1+1], m0, m3, m5
- PAVGB_MMX [r1+r2+1], m2, m4, m5
- PAVGB_MMX [r0], m0, m3, m5
- PAVGB_MMX [r0+r2], m2, m4, m5
-%else
%if cpuflag(sse2)
movu m1, [r1+1]
movu m3, [r1+r2+1]
pavgb m0, m1
pavgb m2, m3
%else
- PAVGB m0, [r1+1]
- PAVGB m2, [r1+r2+1]
-%endif
- PAVGB m0, [r0]
- PAVGB m2, [r0+r2]
+ PAVGB m0, [r1+1], m3, m5
+ PAVGB m2, [r1+r2+1], m4, m5
%endif
+ PAVGB m0, [r0], m3, m5
+ PAVGB m2, [r0+r2], m4, m5
add r1, r4
mova [r0], m0
mova [r0+r2], m2
movu m0, [r1]
movu m2, [r1+r2]
-%if notcpuflag(mmxext)
- PAVGB_MMX [r1+1], m0, m3, m5
- PAVGB_MMX [r1+r2+1], m2, m4, m5
-%elif cpuflag(sse2)
+%if cpuflag(sse2)
movu m1, [r1+1]
movu m3, [r1+r2+1]
pavgb m0, m1
pavgb m2, m3
%else
- PAVGB m0, [r1+1]
- PAVGB m2, [r1+r2+1]
+ PAVGB m0, [r1+1], m3, m5
+ PAVGB m2, [r1+r2+1], m4, m5
%endif
add r0, r4
add r1, r4
-%if notcpuflag(mmxext)
- PAVGB_MMX [r0], m0, m3, m5
- PAVGB_MMX [r0+r2], m2, m4, m5
-%elif cpuflag(sse2)
- pavgb m0, [r0]
- pavgb m2, [r0+r2]
-%else
- PAVGB m0, [r0]
- PAVGB m2, [r0+r2]
-%endif
+ PAVGB m0, [r0], m3, m5
+ PAVGB m2, [r0+r2], m4, m5
mova [r0], m0
mova [r0+r2], m2
add r0, r4
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 807e87e..1064e9a 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -340,11 +340,19 @@
%endif
%endmacro
-%macro PAVGB 2
+%macro PAVGB 2-4
%if cpuflag(mmxext)
pavgb %1, %2
%elif cpuflag(3dnow)
pavgusb %1, %2
+%elif cpuflag(mmx)
+ movu %3, %2
+ por %3, %1
+ pxor %1, %2
+ pand %1, %4
+ psrlq %1, 1
+ psubb %3, %1
+ SWAP %1, %3
%endif
%endmacro
More information about the ffmpeg-cvslog
mailing list