[MPlayer-dev-eng] [PATCH] compiling with Intel C

Guillaume POIRIER guillaume-bzh.poirier at laposte.net
Thu Jan 25 00:39:19 CET 2007


Hi Dmitry

Dmitry Antipov a écrit :
> Hello all,
> 
> an attached patch proposes the bits needed for the first successful :-) 
> compilation
> of svn-22004 with Intel C 9.1.046. It compiles and plays a few movies 
> found on my hard drive, but it's highly experimental.

You did it!!! Weeeh!

Were you able to measure any speed-up?
the option '-benchmark' is your friend.


[...]

A few things about your patch that look very much like cosmetics...

> 
> 
> ------------------------------------------------------------------------
> 
> diff -ur MPlayer-1.0svn22004/configure MPlayer-1.0svn22004-devel/configure
> --- MPlayer-1.0svn22004/configure	2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/configure	2007-01-24 18:18:17.000000000 +0300
> @@ -1597,7 +1597,7 @@
>    _stripbinaries=no
>  elif test -z "$CFLAGS" ; then
>    if test "$cc_vendor" = "intel" ; then
> -    CFLAGS="-O2 $_march $_mcpu $_pipe -fomit-frame-pointer"
> +    CFLAGS="-w0 -O3 $_march $_mcpu $_pipe -fomit-frame-pointer -funroll-loops -fno-math-errno"
>    elif test "$cc_vendor" != "gnu" ; then
>      CFLAGS="-O2 $_march $_mcpu $_pipe"
>    else
> @@ -1609,8 +1609,6 @@
>  if test -n "$LDFLAGS" ; then
>    _ld_extra="$_ld_extra $LDFLAGS"
>    _warn_CFLAGS=yes
> -elif test "$cc_vendor" = "intel" ; then
> -  _ld_extra="$_ld_extra -i-static"
>  fi
>  if test -n "$CPPFLAGS" ; then
>    _inc_extra="$_inc_extra $CPPFLAGS"
> @@ -7236,7 +7234,7 @@
>    echores "no"
>  fi
>  
> -if cc_check -Wdeclaration-after-statement ; then
> +if test "$cc_vendor" != "intel" && cc_check -Wdeclaration-after-statement ; then
>    CFLAGS="-Wdeclaration-after-statement $CFLAGS"
>  fi
>  
> @@ -7721,7 +7719,11 @@
>  #endif
>  
>  /* attribute(used) as needed by some compilers */
> -#if (__GNUC__ * 100 + __GNUC_MINOR__ >= 300)
> +#if __INTEL_COMPILER
> +/* Since Intel C (as of 9.1.046) doesn't understand 'used' attribute, here is a hack
> +   to force it to emit variable which is referenced only in inline assembly code */
> +# define attribute_used __attribute__((section (".data")))
> +#elif (__GNUC__ * 100 + __GNUC_MINOR__ >= 300)
>  # define attribute_used __attribute__((used))
>  #else
>  # define attribute_used
> diff -ur MPlayer-1.0svn22004/libavcodec/h264.c MPlayer-1.0svn22004-devel/libavcodec/h264.c
> --- MPlayer-1.0svn22004/libavcodec/h264.c	2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/libavcodec/h264.c	2007-01-24 17:20:49.000000000 +0300
> @@ -5982,7 +5982,7 @@
>      return ctx + 4 * cat;
>  }
>  
> -static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
> +static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
>      0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
>      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
>      3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
> diff -ur MPlayer-1.0svn22004/libavcodec/i386/dsputil_mmx.c MPlayer-1.0svn22004-devel/libavcodec/i386/dsputil_mmx.c
> --- MPlayer-1.0svn22004/libavcodec/i386/dsputil_mmx.c	2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/libavcodec/i386/dsputil_mmx.c	2007-01-24 17:45:20.000000000 +0300
> @@ -633,6 +633,32 @@
>  }
>  
>  static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
> +#ifdef __INTEL_COMPILER
> +  /* Ugly, but uses a minimal number of operands just to fit into Intel C limits */
> +  asm volatile("movd (%1), %%mm0               \n\t"
> +	       "mov %3, %%eax                  \n\t"
> +	       "movd (%1, %%eax), %%mm1        \n\t"
> +	       "add %3, %%eax                  \n\t"
> +	       "movd (%1, %%eax), %%mm2        \n\t"
> +	       "add %3, %%eax                  \n\t"
> +	       "movd (%1, %%eax), %%mm3        \n\t"
> +	       "punpcklbw %%mm1, %%mm0         \n\t"
> +	       "punpcklbw %%mm3, %%mm2         \n\t"
> +	       "movq %%mm0, %%mm1              \n\t"
> +	       "punpcklwd %%mm2, %%mm0         \n\t"
> +	       "punpckhwd %%mm2, %%mm1         \n\t"
> +	       "movd %%mm0, (%0)               \n\t"
> +	       "punpckhdq %%mm0, %%mm0         \n\t"
> +	       "mov %2, %%eax                  \n\t"
> +	       "movd %%mm0, (%0, %%eax)        \n\t"
> +	       "add %2, %%eax                  \n\t"
> +	       "movd %%mm1, (%0, %%eax)        \n\t"
> +	       "punpckhdq %%mm1, %%mm1         \n\t"
> +	       "add %2, %%eax                  \n\t"
> +	       "movd %%mm1, (%0, %%eax)        \n\t"
> +	       :: "r" (dst), "r" (src), "r"(dst_stride), "r"(src_stride)
> +	       : "eax");
> +#else

Is there a reason why this special version should only be used by ICC? 
Can GCC compile it too? Is it supposed to be slower? By how much?


>  
>  static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
> diff -ur MPlayer-1.0svn22004/libmpcodecs/vf_fspp.c MPlayer-1.0svn22004-devel/libmpcodecs/vf_fspp.c
> --- MPlayer-1.0svn22004/libmpcodecs/vf_fspp.c	2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/libmpcodecs/vf_fspp.c	2007-01-24 17:21:20.000000000 +0300
> @@ -717,7 +717,7 @@
>  
>  #ifdef HAVE_MMX
>  
> -static uint64_t attribute_used __attribute__((aligned(8))) temps[4];//!!
> +static uint64_t attribute_used __attribute__((aligned(8))) temps[4] = {0ULL};//!!

why 0ULL in particular?


>  static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_382683433=FIX64(0.382683433, 14); 
>  static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_541196100=FIX64(0.541196100, 14); 
> diff -ur MPlayer-1.0svn22004/loader/dmo/DMO_AudioDecoder.c MPlayer-1.0svn22004-devel/loader/dmo/DMO_AudioDecoder.c
> --- MPlayer-1.0svn22004/loader/dmo/DMO_AudioDecoder.c	2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/loader/dmo/DMO_AudioDecoder.c	2007-01-24 17:18:31.000000000 +0300
> @@ -35,8 +35,6 @@
>  
>  #include "../../mp_msg.h"
>  
> -#define __MODULE__ "DirectShow audio decoder"
> -

Why ?


> diff -ur MPlayer-1.0svn22004/loader/dmo/DMO_VideoDecoder.c MPlayer-1.0svn22004-devel/loader/dmo/DMO_VideoDecoder.c
> --- MPlayer-1.0svn22004/loader/dmo/DMO_VideoDecoder.c	2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/loader/dmo/DMO_VideoDecoder.c	2007-01-24 17:18:34.000000000 +0300
> @@ -58,8 +58,6 @@
>  // strcmp((const char*)info.dll,...)  is used instead of  (... == ...)
>  // so Arpi could use char* pointer in his simplified DMO_VideoDecoder class
>  
> -#define __MODULE__ "DirectShow_VideoDecoder"
> -

same here


> diff -ur MPlayer-1.0svn22004/loader/dshow/DS_AudioDecoder.c MPlayer-1.0svn22004-devel/loader/dshow/DS_AudioDecoder.c
> --- MPlayer-1.0svn22004/loader/dshow/DS_AudioDecoder.c	2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/loader/dshow/DS_AudioDecoder.c	2007-01-24 17:18:39.000000000 +0300
> @@ -33,8 +33,6 @@
>  #include <stdio.h>
>  #include <stdlib.h>
>  
> -#define __MODULE__ "DirectShow audio decoder"

here too.


> diff -ur MPlayer-1.0svn22004/loader/dshow/DS_VideoDecoder.c MPlayer-1.0svn22004-devel/loader/dshow/DS_VideoDecoder.c
> --- MPlayer-1.0svn22004/loader/dshow/DS_VideoDecoder.c	2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/loader/dshow/DS_VideoDecoder.c	2007-01-24 17:18:41.000000000 +0300
> @@ -58,8 +58,6 @@
>  // strcmp((const char*)info.dll,...)  is used instead of  (... == ...)
>  // so Arpi could use char* pointer in his simplified DS_VideoDecoder class
>  
> -#define __MODULE__ "DirectShow_VideoDecoder"

here also


> diff -ur MPlayer-1.0svn22004/mp3lib/dct64_sse.c MPlayer-1.0svn22004-devel/mp3lib/dct64_sse.c
> --- MPlayer-1.0svn22004/mp3lib/dct64_sse.c	2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/mp3lib/dct64_sse.c	2007-01-24 18:14:00.000000000 +0300
> @@ -70,30 +70,27 @@
>  
>          for (i = 0; i < 0x20; i += 0x10)
>          {
> -            asm(
> -                "movaps    %4, %%xmm1\n\t"
> -                "movaps    %5, %%xmm3\n\t"
> -                "movaps    %6, %%xmm4\n\t"
> -                "movaps    %7, %%xmm6\n\t"
> -                "movaps    %%xmm1, %%xmm7\n\t"
> -                "shufps    $27, %%xmm7, %%xmm7\n\t"
> -                "movaps    %%xmm3, %%xmm5\n\t"
> -                "shufps    $27, %%xmm5, %%xmm5\n\t"
> -                "movaps    %%xmm4, %%xmm2\n\t"
> -                "shufps    $27, %%xmm2, %%xmm2\n\t"
> -                "movaps    %%xmm6, %%xmm0\n\t"
> -                "shufps    $27, %%xmm0, %%xmm0\n\t"
> -                "addps     %%xmm0, %%xmm1\n\t"
> -                "movaps    %%xmm1, %0\n\t"
> -                "addps     %%xmm2, %%xmm3\n\t"
> -                "movaps    %%xmm3, %1\n\t"
> -                "subps     %%xmm4, %%xmm5\n\t"
> -                "movaps    %%xmm5, %2\n\t"
> -                "subps     %%xmm6, %%xmm7\n\t"
> -                "movaps    %%xmm7, %3\n\t"
> -                :"=m"(*(b2 + i)), "=m"(*(b2 + i + 4)), "=m"(*(b2 + i + 8)), "=m"(*(b2 + i + 12))
> -                :"m"(*(b1 + i)), "m"(*(b1 + i + 4)), "m"(*(b1 + i + 8)), "m"(*(b1 + i + 12))
> -                );
> +	    asm("movaps    (%1), %%xmm1\n\t"
> +		"movaps    16(%1), %%xmm3\n\t"
> +		"movaps    32(%1), %%xmm4\n\t"
> +		"movaps    48(%1), %%xmm6\n\t"
> +		"movaps    %%xmm1, %%xmm7\n\t"
> +		"shufps    $27, %%xmm7, %%xmm7\n\t"
> +		"movaps    %%xmm3, %%xmm5\n\t"
> +		"shufps    $27, %%xmm5, %%xmm5\n\t"
> +		"movaps    %%xmm4, %%xmm2\n\t"
> +		"shufps    $27, %%xmm2, %%xmm2\n\t"
> +		"movaps    %%xmm6, %%xmm0\n\t"
> +		"shufps    $27, %%xmm0, %%xmm0\n\t"
> +		"addps     %%xmm0, %%xmm1\n\t"
> +		"movaps    %%xmm1, (%0)\n\t"
> +		"addps     %%xmm2, %%xmm3\n\t"
> +		"movaps    %%xmm3, 16(%0)\n\t"
> +		"subps     %%xmm4, %%xmm5\n\t"
> +		"movaps    %%xmm5, 32(%0)\n\t"
> +		"subps     %%xmm6, %%xmm7\n\t"
> +		"movaps    %%xmm7, 48(%0)\n\t"
> +		: : "r"(b2 + i), "r"(b1 + i));
>          }
>      }
>  

Having this re-indenting the code doesn't help tracking down changes. 
Please split cosmetic and functional changes.

I don't know what others may think, but let me say that it's really a 
huge achievement you've done here!
Thanks!

Guillaume



More information about the MPlayer-dev-eng mailing list