[FFmpeg-devel] [PATCH] Optimization for add_8x8basis
Guillaume Poirier
gpoirier
Tue May 15 14:11:31 CEST 2007
Hi,
Zuxy Meng wrote:
> Who's the author of the original add_8x8basis_mmx and try_8x8basis_mmx
> so I can put his/her name in the copyright header?
>
svn annotate says that it's Michael's baby :-)
2750 michael static int try_8x8basis_mmx(int16_t rem[64], int16_t
weight[64], int16_t basis[64], int scale){
3578 michael long i=0;
4749 diego
6666 diego assert(FFABS(scale) < 256);
2750 michael scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT;
2750 michael
2750 michael asm volatile(
4764 diego "pcmpeqw %%mm6, %%mm6 \n\t" // -1w
4764 diego "psrlw $15, %%mm6 \n\t" // 1w
4764 diego "pxor %%mm7, %%mm7 \n\t"
4764 diego "movd %4, %%mm5 \n\t"
4764 diego "punpcklwd %%mm5, %%mm5 \n\t"
4764 diego "punpcklwd %%mm5, %%mm5 \n\t"
4764 diego "1: \n\t"
4764 diego "movq (%1, %0), %%mm0 \n\t"
4764 diego "movq 8(%1, %0), %%mm1 \n\t"
4764 diego "pmulhw %%mm5, %%mm0 \n\t"
4764 diego "pmulhw %%mm5, %%mm1 \n\t"
4764 diego "paddw %%mm6, %%mm0 \n\t"
4764 diego "paddw %%mm6, %%mm1 \n\t"
4764 diego "psraw $1, %%mm0 \n\t"
4764 diego "psraw $1, %%mm1 \n\t"
4764 diego "paddw (%2, %0), %%mm0 \n\t"
4764 diego "paddw 8(%2, %0), %%mm1 \n\t"
4764 diego "psraw $6, %%mm0 \n\t"
4764 diego "psraw $6, %%mm1 \n\t"
4764 diego "pmullw (%3, %0), %%mm0 \n\t"
4764 diego "pmullw 8(%3, %0), %%mm1 \n\t"
4764 diego "pmaddwd %%mm0, %%mm0 \n\t"
4764 diego "pmaddwd %%mm1, %%mm1 \n\t"
4764 diego "paddd %%mm1, %%mm0 \n\t"
4764 diego "psrld $4, %%mm0 \n\t"
4764 diego "paddd %%mm0, %%mm7 \n\t"
4764 diego "add $16, %0 \n\t"
4764 diego "cmp $128, %0 \n\t"
//FIXME optimize & bench
4764 diego " jb 1b \n\t"
4764 diego "movq %%mm7, %%mm6 \n\t"
4764 diego "psrlq $32, %%mm7 \n\t"
4764 diego "paddd %%mm6, %%mm7 \n\t"
4764 diego "psrld $2, %%mm7 \n\t"
4764 diego "movd %%mm7, %0 \n\t"
4749 diego
2750 michael : "+r" (i)
2750 michael : "r"(basis), "r"(rem), "r"(weight), "g"(scale)
2750 michael );
2750 michael return i;
2750 michael }
2750 michael static void add_8x8basis_mmx(int16_t rem[64],
int16_t basis[64], int scale){
3578 michael long i=0;
4749 diego
6666 diego if(FFABS(scale) < 256){
2750 michael scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT;
2750 michael asm volatile(
4764 diego "pcmpeqw %%mm6, %%mm6 \n\t" // -1w
4764 diego "psrlw $15, %%mm6 \n\t" // 1w
4764 diego "movd %3, %%mm5 \n\t"
4764 diego "punpcklwd %%mm5, %%mm5 \n\t"
4764 diego "punpcklwd %%mm5, %%mm5 \n\t"
4764 diego "1: \n\t"
4764 diego "movq (%1, %0), %%mm0 \n\t"
4764 diego "movq 8(%1, %0), %%mm1 \n\t"
4764 diego "pmulhw %%mm5, %%mm0 \n\t"
4764 diego "pmulhw %%mm5, %%mm1 \n\t"
4764 diego "paddw %%mm6, %%mm0 \n\t"
4764 diego "paddw %%mm6, %%mm1 \n\t"
4764 diego "psraw $1, %%mm0 \n\t"
4764 diego "psraw $1, %%mm1 \n\t"
4764 diego "paddw (%2, %0), %%mm0 \n\t"
4764 diego "paddw 8(%2, %0), %%mm1 \n\t"
4764 diego "movq %%mm0, (%2, %0) \n\t"
4764 diego "movq %%mm1, 8(%2, %0) \n\t"
4764 diego "add $16, %0 \n\t"
4764 diego "cmp $128, %0 \n\t"
//FIXME optimize & bench
4764 diego " jb 1b \n\t"
4749 diego
2750 michael : "+r" (i)
2750 michael : "r"(basis), "r"(rem), "g"(scale)
2750 michael );
2750 michael }else{
2750 michael for(i=0; i<8*8; i++){
2750 michael rem[i] += (basis[i]*scale +
(1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
4749 diego }
2750 michael }
2750 michael }
More information about the ffmpeg-devel
mailing list