[Ffmpeg-devel] Still struggling with -fPIC and MMX asm-code

Thu Jul 14 22:20:29 CEST 2005

Hi!

I'm still struggling with the asm blocks, trying to make the code
compile with -fPIC. I need to split these asm blocks in order to use
less registers for the operands. But I think, I'm doing something
completely wrong, maybe anyone here can help me?

This is the code from mpegvideo_mmx_template,c:

asm volatile(
     "movd %%"REG_a", %%mm3        \n\t" // last_non_zero_p1
     SPREADW(%%mm3)
     "pxor %%mm7, %%mm7            \n\t" // 0
     "pxor %%mm4, %%mm4            \n\t" // 0
     "mov $-128, %%"REG_a"         \n\t"
     ".balign 16                   \n\t"
     "1:                           \n\t"
     "pxor %%mm1, %%mm1            \n\t" // 0
     "movq (%1, %%"REG_a"), %%mm0  \n\t" // block[i]
     "pcmpgtw %%mm0, %%mm1         \n\t" // block[i] <= 0 ? 0xFF : 0x00
     "pxor %%mm1, %%mm0            \n\t"
     "psubw %%mm1, %%mm0           \n\t" // ABS(block[i])
     "movq (%3, %%"REG_a"), %%mm6  \n\t" // bias[0]
     "paddusw %%mm6, %%mm0         \n\t" // ABS(block[i]) + bias[0]
     "movq (%2, %%"REG_a"), %%mm5  \n\t" // qmat[i]
     "pmulhw %%mm5, %%mm0          \n\t" // (ABS(block[i])*qmat[0] +
bias[0]*qmat[0])>>16
     "por %%mm0, %%mm4             \n\t"
     "pxor %%mm1, %%mm0            \n\t"
     "psubw %%mm1, %%mm0           \n\t" // out=((ABS(block[i])*qmat[0]
- bias[0]*qmat[0])>>16)*sign(block[i])
     "movq %%mm0, (%5, %%"REG_a")  \n\t"
     "pcmpeqw %%mm7, %%mm0         \n\t" // out==0 ? 0xFF : 0x00
     "movq (%4, %%"REG_a"), %%mm1  \n\t"
     "movq %%mm7, (%1, %%"REG_a")  \n\t" // 0
     "pandn %%mm1, %%mm0           \n\t"
     PMAXW(%%mm0, %%mm3)
     "add $8, %%"REG_a"            \n\t"
     " js 1b                       \n\t"
     "movq %%mm3, %%mm0            \n\t"
     "psrlq $32, %%mm3             \n\t"
     PMAXW(%%mm0, %%mm3)
     "movq %%mm3, %%mm0            \n\t"
     "psrlq $16, %%mm3             \n\t"
     PMAXW(%%mm0, %%mm3)
     "movd %%mm3, %%"REG_a"        \n\t"
     "movzb %%al, %%"REG_a"        \n\t" // last_non_zero_p1
     : "+a" (last_non_zero_p1)
     : "r" (block+64), "r" (qmat+64), "r" (bias+64),
       "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
);

And that's how I thought this can be splitted into two asm-blocks, Bit
it simply doesn't work anymore with this change, What am I doing wrong?

asm volatile(
    "movd %%"REG_a", %%mm3        \n\t" // last_non_zero_p1
    SPREADW(%%mm3)
    "pxor %%mm7, %%mm7            \n\t" // 0
    "pxor %%mm4, %%mm4            \n\t" // 0
    "mov $-128, %%"REG_a"         \n\t"
    ".balign 16                   \n\t"
    "1:                           \n\t"
    "pxor %%mm1, %%mm1            \n\t" // 0
    "movq (%0, %%"REG_a"), %%mm0  \n\t" // block[i]
    "pcmpgtw %%mm0, %%mm1         \n\t" // block[i] <= 0 ? 0xFF : 0x00
    "pxor %%mm1, %%mm0            \n\t"
    "psubw %%mm1, %%mm0           \n\t" // ABS(block[i])
    "movq (%1, %%"REG_a"), %%mm6  \n\t" // bias[0]
    "paddusw %%mm6, %%mm0         \n\t" // ABS(block[i]) + bias[0]
    :: "r" (block+64), "r" (bias+64)
);
asm volatile(
    "movq (%2, %%"REG_a"), %%mm5  \n\t" // qmat[i]
    "pmulhw %%mm5, %%mm0          \n\t" // (ABS(block[i])*qmat[0] +
bias[0]*qmat[0])>>16
    "por %%mm0, %%mm4             \n\t"
    "pxor %%mm1, %%mm0            \n\t"
    "psubw %%mm1, %%mm0           \n\t" // out=((ABS(block[i])*qmat[0] -
bias[0]*qmat[0])>>16)*sign(block[i])
    "movq %%mm0, (%4, %%"REG_a")  \n\t"
    "pcmpeqw %%mm7, %%mm0         \n\t" // out==0 ? 0xFF : 0x00
    "movq (%3, %%"REG_a"), %%mm1  \n\t"
    "movq %%mm7, (%1, %%"REG_a")  \n\t" // 0
    "pandn %%mm1, %%mm0           \n\t"
    PMAXW(%%mm0, %%mm3)
    "add $8, %%"REG_a"            \n\t"
    " js 1b                       \n\t"
    "movq %%mm3, %%mm0            \n\t"
    "psrlq $32, %%mm3             \n\t"
    PMAXW(%%mm0, %%mm3)
    "movq %%mm3, %%mm0            \n\t"
    "psrlq $16, %%mm3             \n\t"
    PMAXW(%%mm0, %%mm3)
    "movd %%mm3, %%"REG_a"        \n\t"
    "movzb %%al, %%"REG_a"        \n\t" // last_non_zero_p1
    : "+a" (last_non_zero_p1)
    : "r" (block+64), "r" (qmat+64),
      "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
);

bye,

Tobias