[Ffmpeg-devel] [PATCH] Intel Mac MMX/SSE2 support

Fri Jun 30 11:23:51 CEST 2006

Hi

On Fri, Jun 30, 2006 at 12:06:37PM +0700, Roine Gustafsson wrote:
[...]

> -        ".balign 16                     \n\t"
> +        BALIGN_16

please use the same macro names as mplayer does

[...]

>  #ifdef CONFIG_SNOW_ENCODER
> -        if(mm_flags & MM_SSE2){
> +#if defined(__APPLE__)
> +        if (0)  // alignment issues with SSE2 code with Apple GCC
> +#else
> +        if(mm_flags & MM_SSE2)
> +#endif
> +        {

the #if should be over the whole code not replacing if(...) by if(0)

[...]
> diff -NaurbB --exclude=.svn ../ffmpeg/libavcodec/i386/fdct_mmx.c ./libavcodec/i386/fdct_mmx.c
> --- ../ffmpeg/libavcodec/i386/fdct_mmx.c	2006-06-30 10:41:13.000000000 +0700
> +++ ./libavcodec/i386/fdct_mmx.c	2006-06-30 09:35:07.000000000 +0700
> @@ -350,6 +350,67 @@
>  
>  static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
>  {
> +#if defined(__APPLE__)
> +    // Apple 'as' has a different macro syntax than FSF GCC 'as'
> +    asm volatile(
> +        ".macro FDCT_ROW_SSE2_H1        \n\t"
> +        "movq      $0(%0), %%xmm2       \n\t"
> +        "movq      $0+8(%0), %%xmm0     \n\t"
> +        "movdqa    $1+32(%1), %%xmm3    \n\t"
> +        "movdqa    $1+48(%1), %%xmm7    \n\t"
> +        "movdqa    $1(%1), %%xmm4       \n\t"
> +        "movdqa    $1+16(%1), %%xmm5    \n\t"
> +        ".endmacro                      \n\t"
> +        ".macro FDCT_ROW_SSE2_H2        \n\t"
> +        "movq      $0(%0), %%xmm2       \n\t"
> +        "movq      $0+8(%0), %%xmm0     \n\t"
> +        "movdqa    $1+32(%1), %%xmm3    \n\t"
> +        "movdqa    $1+48(%1), %%xmm7    \n\t"
> +        ".endmacro                      \n\t"
> +        ".macro FDCT_ROW_SSE2           \n\t"
> +        "movq      %%xmm2, %%xmm1       \n\t"
> +        "pshuflw   $$0x27, %%xmm0, %%xmm0  \n\t"
> +        "paddsw    %%xmm0, %%xmm1       \n\t"
> +        "psubsw    %%xmm0, %%xmm2       \n\t"
> +        "punpckldq %%xmm2, %%xmm1       \n\t"
> +        "pshufd    $$0x78, %%xmm1, %%xmm2  \n\t"
> +        "pmaddwd   %%xmm2, %%xmm3       \n\t"
> +        "pmaddwd   %%xmm1, %%xmm7       \n\t"
> +        "pmaddwd   %%xmm5, %%xmm2       \n\t"
> +        "pmaddwd   %%xmm4, %%xmm1       \n\t"
> +        "paddd     %%xmm7, %%xmm3       \n\t"
> +        "paddd     %%xmm2, %%xmm1       \n\t"
> +        "paddd     %%xmm6, %%xmm3       \n\t"
> +        "paddd     %%xmm6, %%xmm1       \n\t"
> +        "psrad     $%3, %%xmm3          \n\t"
> +        "psrad     $%3, %%xmm1          \n\t"
> +        "packssdw  %%xmm3, %%xmm1       \n\t"
> +        "movdqa    %%xmm1, $0(%4)       \n\t"
> +        ".endmacro                      \n\t"
> +        "movdqa    (%2), %%xmm6         \n\t"
> +        "FDCT_ROW_SSE2_H1 0,0           \n\t"
> +        "FDCT_ROW_SSE2 0                \n\t"
> +        "FDCT_ROW_SSE2_H2 64,0          \n\t"
> +        "FDCT_ROW_SSE2 64               \n\t"
> +
> +        "FDCT_ROW_SSE2_H1 16,64         \n\t"
> +        "FDCT_ROW_SSE2 16               \n\t"
> +        "FDCT_ROW_SSE2_H2 112,64        \n\t"
> +        "FDCT_ROW_SSE2 112              \n\t"
> +
> +        "FDCT_ROW_SSE2_H1 32,128        \n\t"
> +        "FDCT_ROW_SSE2 32               \n\t"
> +        "FDCT_ROW_SSE2_H2 96,128        \n\t"
> +        "FDCT_ROW_SSE2 96               \n\t"
> +
> +        "FDCT_ROW_SSE2_H1 48,192        \n\t"
> +        "FDCT_ROW_SSE2 48               \n\t"
> +        "FDCT_ROW_SSE2_H2 80,192        \n\t"
> +        "FDCT_ROW_SSE2 80               \n\t"
> +        :
> +        : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
> +    );
> +#else
>      asm volatile(
>          ".macro FDCT_ROW_SSE2_H1 i t    \n\t"
>          "movq      \\i(%0), %%xmm2      \n\t"
> @@ -408,6 +469,7 @@
>          :
>          : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
>      );
> +#endif

code duplication is not accpetable

[...]
> diff -NaurbB --exclude=.svn ../ffmpeg/libavcodec/i386/mmx.h ./libavcodec/i386/mmx.h
> --- ../ffmpeg/libavcodec/i386/mmx.h	2006-06-30 10:41:13.000000000 +0700
> +++ ./libavcodec/i386/mmx.h	2006-06-30 09:48:38.000000000 +0700
> @@ -23,6 +23,14 @@
>  #  define PTR_SIZE "4"
>  #endif
>  
> +#if defined(__APPLE__)
> +#  define BALIGN_8  ".align 3 \n\t"
> +#  define BALIGN_16 ".align 4 \n\t"
> +#else
> +#  define BALIGN_8  ".balign 8 \n\t"
> +#  define BALIGN_16 ".balign 16 \n\t"
> +#endif

completely unaccpetable this is not specific to mmx, so it doesnt belong in
mmx.h, its not even specific to x86

btw, how hard is it for mac-intel users to download and use non-apple gcc?
i mean does unmodified gcc work on mac-intel?

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

In the past you could go to a library and read, borrow or copy any book
Today you'd get arrested for mere telling someone where the library is