[MPlayer-dev-eng] [PATCH] x86_64 mmx/sse/3dnow optimisation support

Reimar Döffinger Reimar.Doeffinger at stud.uni-karlsruhe.de
Tue Oct 12 12:18:42 CEST 2004


Hi,

> -#ifdef ARCH_X86
> +#if defined(ARCH_X86) || defined(ARCH_X86_64)

There are lots of these. From what I know, I find it incorrect to define 
X86_64 as a separate architecture, those are "only" 64bit extensions 
after all...
Think you would need to change less if you put the x86_64 case to the 
x86 case, and just adding something like:

if ("$host_arch" == x86_64); then
_def_arch += "#define ARCH_X86_64 1\n$_def_arch"
fi

so that both ARCH_X86 and ARCH_X86_64 are defined then.

> -       unsigned char *u, unsigned char *v, int w, int us, int vs)
> +       unsigned char *u, unsigned char *v, int w, long us, long vs)
>  {
>         asm volatile (""
> -               "pushl %%ebp \n\t"
> -               "movl 4(%%edx), %%ebp \n\t"
> -               "movl (%%edx), %%edx \n\t"
> +               "push %%"REG_BP" \n\t"
> +#ifdef ARCH_X86_64
> +               "mov %6, %%"REG_BP" \n\t"
> +#else
> +               "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
> +               "movl (%%"REG_d"), %%"REG_d" \n\t"
> +#endif
[...]
> -               : "S" (y), "D" (dst), "a" (u), "b" (v), "d" (&us), "c" (w/16)
> +               : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
> +#ifdef ARCH_X86_64
> +               "d" (us), "r" (vs)
> +#else
> +               "d" (&us)
> +#endif

Weird. Why was this done this way? Because your X86_64 version works on 
my AMD Athlon, too... I think this difference shouldn't be there...

> @@ -48,18 +48,18 @@
>  #undef HAVE_MMX
>  #undef HAVE_MMX2
>  #undef HAVE_3DNOW
> -#undef ARCH_X86
> +
> +#ifndef CAN_COMPILE_X86_ASM
> 
>  #ifdef COMPILE_C
>  #undef HAVE_MMX
>  #undef HAVE_MMX2
>  #undef HAVE_3DNOW
> -#undef ARCH_X86
>  #define RENAME(a) a ## _C
>  #include "osd_template.c"
>  #endif
> 
> -#ifdef CAN_COMPILE_X86_ASM
> +#else
> 
>  //X86 noMMX versions
>  #ifdef COMPILE_C
> @@ -67,7 +67,6 @@
>  #undef HAVE_MMX
>  #undef HAVE_MMX2
>  #undef HAVE_3DNOW
> -#define ARCH_X86
>  #define RENAME(a) a ## _X86
>  #include "osd_template.c"
>  #endif
> @@ -78,7 +77,6 @@
>  #define HAVE_MMX
>  #undef HAVE_MMX2
>  #undef HAVE_3DNOW
> -#define ARCH_X86
>  #define RENAME(a) a ## _MMX
>  #include "osd_template.c"
>  #endif

I don't think this is correct. Without #undef ARCH_X86 you won't get the 
pure C versions of some functions AFAIK... You should add #undef 
ARCH_X86_64 instead. Also, have you checked that
+#ifndef CAN_COMPILE_X86_ASM doesn't break runtime cpudetection?

What are the changes in libvo/osd_template.c for?

> @@ -2582,7 +2581,8 @@
>                         int srcStride1, int srcStride2,
>                         int srcStride3, int dstStride)
>  {
> -    unsigned y,x,w,h;
> +    unsigned y,w,h;
> +    unsigned long x;

Huh? I'd say either change them all or modify only
> : "+r" (x)

> +#undef REAL_MOVNTQ
>  #undef MOVNTQ
>  #undef PAVGB
>  #undef PREFETCH
> @@ -54,29 +55,30 @@
>  #endif
> 
>  #ifdef HAVE_MMX2
> -#define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
> +#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
>  #else
> -#define MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
> +#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
>  #endif
> +#define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)

You added a lot of such REAL_* defines to postproc/swscale_template.c, 
what are they there for? They don't seem to be used anywhere...

Reducing the number of changed code line as far as possible and maybe 
also adding comments on the non-obvious part/changes will greatly 
increase the probability of getting this applied.

Btw: I'd expect there was some reason why "addl"/"popl" etc. were used 
in the original code and not "add"/"pop" etc. Can somebody explain?

Greetings,
Reimar Döffinger




More information about the MPlayer-dev-eng mailing list