[FFmpeg-devel] [PATCH] Optimization for add_8x8basis

Guillaume Poirier gpoirier
Tue May 15 14:11:31 CEST 2007


Hi,

Zuxy Meng wrote:

> Who's the author of the original add_8x8basis_mmx and try_8x8basis_mmx
> so I can put his/her name in the copyright header?
> 

svn annotate says that it's Michael's baby :-)

  2750    michael static int try_8x8basis_mmx(int16_t rem[64], int16_t
weight[64], int16_t basis[64], int scale){
  3578    michael     long i=0;
  4749      diego
  6666      diego     assert(FFABS(scale) < 256);
  2750    michael     scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT;
  2750    michael
  2750    michael     asm volatile(
  4764      diego         "pcmpeqw %%mm6, %%mm6           \n\t" // -1w
  4764      diego         "psrlw $15, %%mm6               \n\t" //  1w
  4764      diego         "pxor %%mm7, %%mm7              \n\t"
  4764      diego         "movd  %4, %%mm5                \n\t"
  4764      diego         "punpcklwd %%mm5, %%mm5         \n\t"
  4764      diego         "punpcklwd %%mm5, %%mm5         \n\t"
  4764      diego         "1:                             \n\t"
  4764      diego         "movq  (%1, %0), %%mm0          \n\t"
  4764      diego         "movq  8(%1, %0), %%mm1         \n\t"
  4764      diego         "pmulhw %%mm5, %%mm0            \n\t"
  4764      diego         "pmulhw %%mm5, %%mm1            \n\t"
  4764      diego         "paddw %%mm6, %%mm0             \n\t"
  4764      diego         "paddw %%mm6, %%mm1             \n\t"
  4764      diego         "psraw $1, %%mm0                \n\t"
  4764      diego         "psraw $1, %%mm1                \n\t"
  4764      diego         "paddw (%2, %0), %%mm0          \n\t"
  4764      diego         "paddw 8(%2, %0), %%mm1         \n\t"
  4764      diego         "psraw $6, %%mm0                \n\t"
  4764      diego         "psraw $6, %%mm1                \n\t"
  4764      diego         "pmullw (%3, %0), %%mm0         \n\t"
  4764      diego         "pmullw 8(%3, %0), %%mm1        \n\t"
  4764      diego         "pmaddwd %%mm0, %%mm0           \n\t"
  4764      diego         "pmaddwd %%mm1, %%mm1           \n\t"
  4764      diego         "paddd %%mm1, %%mm0             \n\t"
  4764      diego         "psrld $4, %%mm0                \n\t"
  4764      diego         "paddd %%mm0, %%mm7             \n\t"
  4764      diego         "add $16, %0                    \n\t"
  4764      diego         "cmp $128, %0                   \n\t"
//FIXME optimize & bench
  4764      diego         " jb 1b                         \n\t"
  4764      diego         "movq %%mm7, %%mm6              \n\t"
  4764      diego         "psrlq $32, %%mm7               \n\t"
  4764      diego         "paddd %%mm6, %%mm7             \n\t"
  4764      diego         "psrld $2, %%mm7                \n\t"
  4764      diego         "movd %%mm7, %0                 \n\t"
  4749      diego
  2750    michael         : "+r" (i)
  2750    michael         : "r"(basis), "r"(rem), "r"(weight), "g"(scale)
  2750    michael     );
  2750    michael     return i;
  2750    michael }


  2750    michael static void add_8x8basis_mmx(int16_t rem[64],
int16_t basis[64], int scale){
  3578    michael     long i=0;
  4749      diego
  6666      diego     if(FFABS(scale) < 256){
  2750    michael         scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT;
  2750    michael         asm volatile(
  4764      diego                 "pcmpeqw %%mm6, %%mm6   \n\t" // -1w
  4764      diego                 "psrlw $15, %%mm6       \n\t" //  1w
  4764      diego                 "movd  %3, %%mm5        \n\t"
  4764      diego                 "punpcklwd %%mm5, %%mm5 \n\t"
  4764      diego                 "punpcklwd %%mm5, %%mm5 \n\t"
  4764      diego                 "1:                     \n\t"
  4764      diego                 "movq  (%1, %0), %%mm0  \n\t"
  4764      diego                 "movq  8(%1, %0), %%mm1 \n\t"
  4764      diego                 "pmulhw %%mm5, %%mm0    \n\t"
  4764      diego                 "pmulhw %%mm5, %%mm1    \n\t"
  4764      diego                 "paddw %%mm6, %%mm0     \n\t"
  4764      diego                 "paddw %%mm6, %%mm1     \n\t"
  4764      diego                 "psraw $1, %%mm0        \n\t"
  4764      diego                 "psraw $1, %%mm1        \n\t"
  4764      diego                 "paddw (%2, %0), %%mm0  \n\t"
  4764      diego                 "paddw 8(%2, %0), %%mm1 \n\t"
  4764      diego                 "movq %%mm0, (%2, %0)   \n\t"
  4764      diego                 "movq %%mm1, 8(%2, %0)  \n\t"
  4764      diego                 "add $16, %0            \n\t"
  4764      diego                 "cmp $128, %0           \n\t"
//FIXME optimize & bench
  4764      diego                 " jb 1b                 \n\t"
  4749      diego
  2750    michael                 : "+r" (i)
  2750    michael                 : "r"(basis), "r"(rem), "g"(scale)
  2750    michael         );
  2750    michael     }else{
  2750    michael         for(i=0; i<8*8; i++){
  2750    michael             rem[i] += (basis[i]*scale +
(1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
  4749      diego         }
  2750    michael     }
  2750    michael }




More information about the ffmpeg-devel mailing list