[MPlayer-dev-eng] [PATCH] compiling with Intel C
Guillaume POIRIER
guillaume-bzh.poirier at laposte.net
Thu Jan 25 00:39:19 CET 2007
Hi Dmitry
Dmitry Antipov a écrit :
> Hello all,
>
> an attached patch proposes the bits needed for the first successful :-)
> compilation
> of svn-22004 with Intel C 9.1.046. It compiles and plays a few movies
> found on my hard drive, but it's highly experimental.
You did it!!! Weeeh!
Were you able to measure any speed-up?
the option '-benchmark' is your friend.
[...]
A few things about your patch that look very much like cosmetics...
>
>
> ------------------------------------------------------------------------
>
> diff -ur MPlayer-1.0svn22004/configure MPlayer-1.0svn22004-devel/configure
> --- MPlayer-1.0svn22004/configure 2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/configure 2007-01-24 18:18:17.000000000 +0300
> @@ -1597,7 +1597,7 @@
> _stripbinaries=no
> elif test -z "$CFLAGS" ; then
> if test "$cc_vendor" = "intel" ; then
> - CFLAGS="-O2 $_march $_mcpu $_pipe -fomit-frame-pointer"
> + CFLAGS="-w0 -O3 $_march $_mcpu $_pipe -fomit-frame-pointer -funroll-loops -fno-math-errno"
> elif test "$cc_vendor" != "gnu" ; then
> CFLAGS="-O2 $_march $_mcpu $_pipe"
> else
> @@ -1609,8 +1609,6 @@
> if test -n "$LDFLAGS" ; then
> _ld_extra="$_ld_extra $LDFLAGS"
> _warn_CFLAGS=yes
> -elif test "$cc_vendor" = "intel" ; then
> - _ld_extra="$_ld_extra -i-static"
> fi
> if test -n "$CPPFLAGS" ; then
> _inc_extra="$_inc_extra $CPPFLAGS"
> @@ -7236,7 +7234,7 @@
> echores "no"
> fi
>
> -if cc_check -Wdeclaration-after-statement ; then
> +if test "$cc_vendor" != "intel" && cc_check -Wdeclaration-after-statement ; then
> CFLAGS="-Wdeclaration-after-statement $CFLAGS"
> fi
>
> @@ -7721,7 +7719,11 @@
> #endif
>
> /* attribute(used) as needed by some compilers */
> -#if (__GNUC__ * 100 + __GNUC_MINOR__ >= 300)
> +#if __INTEL_COMPILER
> +/* Since Intel C (as of 9.1.046) doesn't understand 'used' attribute, here is a hack
> + to force it to emit variable which is referenced only in inline assembly code */
> +# define attribute_used __attribute__((section (".data")))
> +#elif (__GNUC__ * 100 + __GNUC_MINOR__ >= 300)
> # define attribute_used __attribute__((used))
> #else
> # define attribute_used
> diff -ur MPlayer-1.0svn22004/libavcodec/h264.c MPlayer-1.0svn22004-devel/libavcodec/h264.c
> --- MPlayer-1.0svn22004/libavcodec/h264.c 2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/libavcodec/h264.c 2007-01-24 17:20:49.000000000 +0300
> @@ -5982,7 +5982,7 @@
> return ctx + 4 * cat;
> }
>
> -static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
> +static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
> 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
> diff -ur MPlayer-1.0svn22004/libavcodec/i386/dsputil_mmx.c MPlayer-1.0svn22004-devel/libavcodec/i386/dsputil_mmx.c
> --- MPlayer-1.0svn22004/libavcodec/i386/dsputil_mmx.c 2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/libavcodec/i386/dsputil_mmx.c 2007-01-24 17:45:20.000000000 +0300
> @@ -633,6 +633,32 @@
> }
>
> static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
> +#ifdef __INTEL_COMPILER
> + /* Ugly, but uses a minimal number of operands just to fit into Intel C limits */
> + asm volatile("movd (%1), %%mm0 \n\t"
> + "mov %3, %%eax \n\t"
> + "movd (%1, %%eax), %%mm1 \n\t"
> + "add %3, %%eax \n\t"
> + "movd (%1, %%eax), %%mm2 \n\t"
> + "add %3, %%eax \n\t"
> + "movd (%1, %%eax), %%mm3 \n\t"
> + "punpcklbw %%mm1, %%mm0 \n\t"
> + "punpcklbw %%mm3, %%mm2 \n\t"
> + "movq %%mm0, %%mm1 \n\t"
> + "punpcklwd %%mm2, %%mm0 \n\t"
> + "punpckhwd %%mm2, %%mm1 \n\t"
> + "movd %%mm0, (%0) \n\t"
> + "punpckhdq %%mm0, %%mm0 \n\t"
> + "mov %2, %%eax \n\t"
> + "movd %%mm0, (%0, %%eax) \n\t"
> + "add %2, %%eax \n\t"
> + "movd %%mm1, (%0, %%eax) \n\t"
> + "punpckhdq %%mm1, %%mm1 \n\t"
> + "add %2, %%eax \n\t"
> + "movd %%mm1, (%0, %%eax) \n\t"
> + :: "r" (dst), "r" (src), "r"(dst_stride), "r"(src_stride)
> + : "eax");
> +#else
Is there a reason why this special version should only be used by ICC?
Can GCC compile it too? Is it supposed to be slower? By how much?
>
> static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
> diff -ur MPlayer-1.0svn22004/libmpcodecs/vf_fspp.c MPlayer-1.0svn22004-devel/libmpcodecs/vf_fspp.c
> --- MPlayer-1.0svn22004/libmpcodecs/vf_fspp.c 2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/libmpcodecs/vf_fspp.c 2007-01-24 17:21:20.000000000 +0300
> @@ -717,7 +717,7 @@
>
> #ifdef HAVE_MMX
>
> -static uint64_t attribute_used __attribute__((aligned(8))) temps[4];//!!
> +static uint64_t attribute_used __attribute__((aligned(8))) temps[4] = {0ULL};//!!
why 0ULL in particular?
> static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_382683433=FIX64(0.382683433, 14);
> static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_541196100=FIX64(0.541196100, 14);
> diff -ur MPlayer-1.0svn22004/loader/dmo/DMO_AudioDecoder.c MPlayer-1.0svn22004-devel/loader/dmo/DMO_AudioDecoder.c
> --- MPlayer-1.0svn22004/loader/dmo/DMO_AudioDecoder.c 2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/loader/dmo/DMO_AudioDecoder.c 2007-01-24 17:18:31.000000000 +0300
> @@ -35,8 +35,6 @@
>
> #include "../../mp_msg.h"
>
> -#define __MODULE__ "DirectShow audio decoder"
> -
Why ?
> diff -ur MPlayer-1.0svn22004/loader/dmo/DMO_VideoDecoder.c MPlayer-1.0svn22004-devel/loader/dmo/DMO_VideoDecoder.c
> --- MPlayer-1.0svn22004/loader/dmo/DMO_VideoDecoder.c 2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/loader/dmo/DMO_VideoDecoder.c 2007-01-24 17:18:34.000000000 +0300
> @@ -58,8 +58,6 @@
> // strcmp((const char*)info.dll,...) is used instead of (... == ...)
> // so Arpi could use char* pointer in his simplified DMO_VideoDecoder class
>
> -#define __MODULE__ "DirectShow_VideoDecoder"
> -
same here
> diff -ur MPlayer-1.0svn22004/loader/dshow/DS_AudioDecoder.c MPlayer-1.0svn22004-devel/loader/dshow/DS_AudioDecoder.c
> --- MPlayer-1.0svn22004/loader/dshow/DS_AudioDecoder.c 2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/loader/dshow/DS_AudioDecoder.c 2007-01-24 17:18:39.000000000 +0300
> @@ -33,8 +33,6 @@
> #include <stdio.h>
> #include <stdlib.h>
>
> -#define __MODULE__ "DirectShow audio decoder"
here too.
> diff -ur MPlayer-1.0svn22004/loader/dshow/DS_VideoDecoder.c MPlayer-1.0svn22004-devel/loader/dshow/DS_VideoDecoder.c
> --- MPlayer-1.0svn22004/loader/dshow/DS_VideoDecoder.c 2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/loader/dshow/DS_VideoDecoder.c 2007-01-24 17:18:41.000000000 +0300
> @@ -58,8 +58,6 @@
> // strcmp((const char*)info.dll,...) is used instead of (... == ...)
> // so Arpi could use char* pointer in his simplified DS_VideoDecoder class
>
> -#define __MODULE__ "DirectShow_VideoDecoder"
here also
> diff -ur MPlayer-1.0svn22004/mp3lib/dct64_sse.c MPlayer-1.0svn22004-devel/mp3lib/dct64_sse.c
> --- MPlayer-1.0svn22004/mp3lib/dct64_sse.c 2007-01-24 18:08:55.000000000 +0300
> +++ MPlayer-1.0svn22004-devel/mp3lib/dct64_sse.c 2007-01-24 18:14:00.000000000 +0300
> @@ -70,30 +70,27 @@
>
> for (i = 0; i < 0x20; i += 0x10)
> {
> - asm(
> - "movaps %4, %%xmm1\n\t"
> - "movaps %5, %%xmm3\n\t"
> - "movaps %6, %%xmm4\n\t"
> - "movaps %7, %%xmm6\n\t"
> - "movaps %%xmm1, %%xmm7\n\t"
> - "shufps $27, %%xmm7, %%xmm7\n\t"
> - "movaps %%xmm3, %%xmm5\n\t"
> - "shufps $27, %%xmm5, %%xmm5\n\t"
> - "movaps %%xmm4, %%xmm2\n\t"
> - "shufps $27, %%xmm2, %%xmm2\n\t"
> - "movaps %%xmm6, %%xmm0\n\t"
> - "shufps $27, %%xmm0, %%xmm0\n\t"
> - "addps %%xmm0, %%xmm1\n\t"
> - "movaps %%xmm1, %0\n\t"
> - "addps %%xmm2, %%xmm3\n\t"
> - "movaps %%xmm3, %1\n\t"
> - "subps %%xmm4, %%xmm5\n\t"
> - "movaps %%xmm5, %2\n\t"
> - "subps %%xmm6, %%xmm7\n\t"
> - "movaps %%xmm7, %3\n\t"
> - :"=m"(*(b2 + i)), "=m"(*(b2 + i + 4)), "=m"(*(b2 + i + 8)), "=m"(*(b2 + i + 12))
> - :"m"(*(b1 + i)), "m"(*(b1 + i + 4)), "m"(*(b1 + i + 8)), "m"(*(b1 + i + 12))
> - );
> + asm("movaps (%1), %%xmm1\n\t"
> + "movaps 16(%1), %%xmm3\n\t"
> + "movaps 32(%1), %%xmm4\n\t"
> + "movaps 48(%1), %%xmm6\n\t"
> + "movaps %%xmm1, %%xmm7\n\t"
> + "shufps $27, %%xmm7, %%xmm7\n\t"
> + "movaps %%xmm3, %%xmm5\n\t"
> + "shufps $27, %%xmm5, %%xmm5\n\t"
> + "movaps %%xmm4, %%xmm2\n\t"
> + "shufps $27, %%xmm2, %%xmm2\n\t"
> + "movaps %%xmm6, %%xmm0\n\t"
> + "shufps $27, %%xmm0, %%xmm0\n\t"
> + "addps %%xmm0, %%xmm1\n\t"
> + "movaps %%xmm1, (%0)\n\t"
> + "addps %%xmm2, %%xmm3\n\t"
> + "movaps %%xmm3, 16(%0)\n\t"
> + "subps %%xmm4, %%xmm5\n\t"
> + "movaps %%xmm5, 32(%0)\n\t"
> + "subps %%xmm6, %%xmm7\n\t"
> + "movaps %%xmm7, 48(%0)\n\t"
> + : : "r"(b2 + i), "r"(b1 + i));
> }
> }
>
Having this re-indenting the code doesn't help tracking down changes.
Please split cosmetic and functional changes.
I don't know what others may think, but let me say that it's really a
huge achievement you've done here!
Thanks!
Guillaume
More information about the MPlayer-dev-eng
mailing list