[Ffmpeg-devel] [PATCH] Intel Mac MMX/SSE2 support
Michael Niedermayer
michaelni
Fri Jun 30 11:23:51 CEST 2006
Hi
On Fri, Jun 30, 2006 at 12:06:37PM +0700, Roine Gustafsson wrote:
[...]
> - ".balign 16 \n\t"
> + BALIGN_16
please use the same macro names as mplayer does
[...]
> #ifdef CONFIG_SNOW_ENCODER
> - if(mm_flags & MM_SSE2){
> +#if defined(__APPLE__)
> + if (0) // alignment issues with SSE2 code with Apple GCC
> +#else
> + if(mm_flags & MM_SSE2)
> +#endif
> + {
the #if should be over the whole code not replacing if(...) by if(0)
[...]
> diff -NaurbB --exclude=.svn ../ffmpeg/libavcodec/i386/fdct_mmx.c ./libavcodec/i386/fdct_mmx.c
> --- ../ffmpeg/libavcodec/i386/fdct_mmx.c 2006-06-30 10:41:13.000000000 +0700
> +++ ./libavcodec/i386/fdct_mmx.c 2006-06-30 09:35:07.000000000 +0700
> @@ -350,6 +350,67 @@
>
> static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
> {
> +#if defined(__APPLE__)
> + // Apple 'as' has a different macro syntax than FSF GCC 'as'
> + asm volatile(
> + ".macro FDCT_ROW_SSE2_H1 \n\t"
> + "movq $0(%0), %%xmm2 \n\t"
> + "movq $0+8(%0), %%xmm0 \n\t"
> + "movdqa $1+32(%1), %%xmm3 \n\t"
> + "movdqa $1+48(%1), %%xmm7 \n\t"
> + "movdqa $1(%1), %%xmm4 \n\t"
> + "movdqa $1+16(%1), %%xmm5 \n\t"
> + ".endmacro \n\t"
> + ".macro FDCT_ROW_SSE2_H2 \n\t"
> + "movq $0(%0), %%xmm2 \n\t"
> + "movq $0+8(%0), %%xmm0 \n\t"
> + "movdqa $1+32(%1), %%xmm3 \n\t"
> + "movdqa $1+48(%1), %%xmm7 \n\t"
> + ".endmacro \n\t"
> + ".macro FDCT_ROW_SSE2 \n\t"
> + "movq %%xmm2, %%xmm1 \n\t"
> + "pshuflw $$0x27, %%xmm0, %%xmm0 \n\t"
> + "paddsw %%xmm0, %%xmm1 \n\t"
> + "psubsw %%xmm0, %%xmm2 \n\t"
> + "punpckldq %%xmm2, %%xmm1 \n\t"
> + "pshufd $$0x78, %%xmm1, %%xmm2 \n\t"
> + "pmaddwd %%xmm2, %%xmm3 \n\t"
> + "pmaddwd %%xmm1, %%xmm7 \n\t"
> + "pmaddwd %%xmm5, %%xmm2 \n\t"
> + "pmaddwd %%xmm4, %%xmm1 \n\t"
> + "paddd %%xmm7, %%xmm3 \n\t"
> + "paddd %%xmm2, %%xmm1 \n\t"
> + "paddd %%xmm6, %%xmm3 \n\t"
> + "paddd %%xmm6, %%xmm1 \n\t"
> + "psrad $%3, %%xmm3 \n\t"
> + "psrad $%3, %%xmm1 \n\t"
> + "packssdw %%xmm3, %%xmm1 \n\t"
> + "movdqa %%xmm1, $0(%4) \n\t"
> + ".endmacro \n\t"
> + "movdqa (%2), %%xmm6 \n\t"
> + "FDCT_ROW_SSE2_H1 0,0 \n\t"
> + "FDCT_ROW_SSE2 0 \n\t"
> + "FDCT_ROW_SSE2_H2 64,0 \n\t"
> + "FDCT_ROW_SSE2 64 \n\t"
> +
> + "FDCT_ROW_SSE2_H1 16,64 \n\t"
> + "FDCT_ROW_SSE2 16 \n\t"
> + "FDCT_ROW_SSE2_H2 112,64 \n\t"
> + "FDCT_ROW_SSE2 112 \n\t"
> +
> + "FDCT_ROW_SSE2_H1 32,128 \n\t"
> + "FDCT_ROW_SSE2 32 \n\t"
> + "FDCT_ROW_SSE2_H2 96,128 \n\t"
> + "FDCT_ROW_SSE2 96 \n\t"
> +
> + "FDCT_ROW_SSE2_H1 48,192 \n\t"
> + "FDCT_ROW_SSE2 48 \n\t"
> + "FDCT_ROW_SSE2_H2 80,192 \n\t"
> + "FDCT_ROW_SSE2 80 \n\t"
> + :
> + : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
> + );
> +#else
> asm volatile(
> ".macro FDCT_ROW_SSE2_H1 i t \n\t"
> "movq \\i(%0), %%xmm2 \n\t"
> @@ -408,6 +469,7 @@
> :
> : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
> );
> +#endif
code duplication is not accpetable
[...]
> diff -NaurbB --exclude=.svn ../ffmpeg/libavcodec/i386/mmx.h ./libavcodec/i386/mmx.h
> --- ../ffmpeg/libavcodec/i386/mmx.h 2006-06-30 10:41:13.000000000 +0700
> +++ ./libavcodec/i386/mmx.h 2006-06-30 09:48:38.000000000 +0700
> @@ -23,6 +23,14 @@
> # define PTR_SIZE "4"
> #endif
>
> +#if defined(__APPLE__)
> +# define BALIGN_8 ".align 3 \n\t"
> +# define BALIGN_16 ".align 4 \n\t"
> +#else
> +# define BALIGN_8 ".balign 8 \n\t"
> +# define BALIGN_16 ".balign 16 \n\t"
> +#endif
completely unaccpetable this is not specific to mmx, so it doesnt belong in
mmx.h, its not even specific to x86
btw, how hard is it for mac-intel users to download and use non-apple gcc?
i mean does unmodified gcc work on mac-intel?
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
In the past you could go to a library and read, borrow or copy any book
Today you'd get arrested for mere telling someone where the library is
More information about the ffmpeg-devel
mailing list