[FFmpeg-devel] [PATCH] fix for roundup issue 2127
Michael Niedermayer
michaelni
Sun Jan 2 23:48:56 CET 2011
On Sat, Jan 01, 2011 at 11:47:42PM -0500, Daniel Kang wrote:
> On Sat, Jan 1, 2011 at 11:33 PM, Michael Niedermayer <michaelni at gmx.at>wrote:
> >
> > something like this:
> > "movd (%3), %%mm0 \n\t"
> > "add %1, %3 \n\t"
> > "movd (%3), %%mm1 \n\t"
> > "movd (%3,%1), %%mm2 \n\t"
> > "movd (%3,%1,2), %%mm3 \n\t"
> >
> > would replace lea by add which is faster on some CPUs
>
>
> I have removed the leas.
> dsputil_mmx.h | 37 +++++++++++++++++++------------------
> 1 file changed, 19 insertions(+), 18 deletions(-)
> 9d380b677fd5b19ae4c26b9dda9a5e1ba4a3e233 fix.diff
> From 1a1e5a4c664afdf1511d42544b0856f78548500d Mon Sep 17 00:00:00 2001
> From: Daniel Kang <daniel.d.kang at gmail.com>
> Date: Wed, 29 Dec 2010 22:06:38 -0500
> Subject: [PATCH] 2127 fix
>
> ---
> libavcodec/x86/dsputil_mmx.h | 37 +++++++++++++++++++------------------
> 1 files changed, 19 insertions(+), 18 deletions(-)
>
> diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h
> index d9c2f44..2d53032 100644
> --- a/libavcodec/x86/dsputil_mmx.h
> +++ b/libavcodec/x86/dsputil_mmx.h
> @@ -24,6 +24,7 @@
>
> #include <stdint.h>
> #include "libavcodec/dsputil.h"
> +#include "libavutil/x86_cpu.h"
>
> typedef struct { uint64_t a, b; } xmm_reg;
>
> @@ -94,32 +95,32 @@ extern const double ff_pd_2[2];
> SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
> SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
>
> -static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
> +static inline void transpose4x4(uint8_t *dst, uint8_t *src, x86_reg dst_stride, x86_reg src_stride){
> __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
> - "movd %4, %%mm0 \n\t"
> - "movd %5, %%mm1 \n\t"
> - "movd %6, %%mm2 \n\t"
> - "movd %7, %%mm3 \n\t"
> + "movd (%3), %%mm0 \n\t"
> + "add %1, %3 \n\t"
> + "movd (%3), %%mm1 \n\t"
> + "movd (%3,%1,1), %%mm2 \n\t"
> + "movd (%3,%1,2), %%mm3 \n\t"
> "punpcklbw %%mm1, %%mm0 \n\t"
> "punpcklbw %%mm3, %%mm2 \n\t"
> "movq %%mm0, %%mm1 \n\t"
> "punpcklwd %%mm2, %%mm0 \n\t"
> "punpckhwd %%mm2, %%mm1 \n\t"
> - "movd %%mm0, %0 \n\t"
> + "movd %%mm0, (%2) \n\t"
> + "add %0, %2 \n\t"
> "punpckhdq %%mm0, %%mm0 \n\t"
> - "movd %%mm0, %1 \n\t"
> - "movd %%mm1, %2 \n\t"
> + "movd %%mm0, (%2) \n\t"
> + "movd %%mm1, (%2,%0,1) \n\t"
> "punpckhdq %%mm1, %%mm1 \n\t"
> - "movd %%mm1, %3 \n\t"
> -
> - : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
> - "=m" (*(uint32_t*)(dst + 1*dst_stride)),
> - "=m" (*(uint32_t*)(dst + 2*dst_stride)),
> - "=m" (*(uint32_t*)(dst + 3*dst_stride))
> - : "m" (*(uint32_t*)(src + 0*src_stride)),
> - "m" (*(uint32_t*)(src + 1*src_stride)),
> - "m" (*(uint32_t*)(src + 2*src_stride)),
> - "m" (*(uint32_t*)(src + 3*src_stride))
> + "movd %%mm1, (%2,%0,2) \n\t"
> +
> + : "+&r" (dst_stride),
> + "+&r" (src_stride),
> + "+&r" (dst),
> + "+&r" (src)
only 2 of these are written to thus only 2 need a +&
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Those who are too smart to engage in politics are punished by being
governed by those who are dumber. -- Plato
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20110102/a5f4e629/attachment.pgp>
More information about the ffmpeg-devel
mailing list