CVS: main/libac3/downmix downmix_3dnow.S,NONE,1.1
Update of /cvsroot/mplayer/main/libac3/downmix In directory usw-pr-cvs1:/tmp/cvs-serv25710/main/libac3/downmix Added Files: downmix_3dnow.S Log Message: libac3 3dnow optimizations! It speed up my Duron from 4.5 up to 3.5 prcnts of cpu loading --- NEW FILE --- /* * downmix_3dnow.S * * Replacement of downmix_kni.S with AMD's 3DNow! SIMD operations support * * Modified by Nick Kurshev <nickols_k@mail.ru> * * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000 * * downmix_3dnow.S is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * downmix_3dnow.S is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU Make; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * */ .section .rodata .align 4 sqrt2: .float 0f0.7071068 .p2align 5,0, .section .text .align 4 .global downmix_3f_2r_to_2ch .type downmix_3f_2r_to_2ch, @function downmix_3f_2r_to_2ch: pushl %ebp movl %esp, %ebp pushl %eax pushl %ebx pushl %ecx femms movl 8(%ebp), %eax /* samples[] */ movl 12(%ebp), %ebx /* &dm_par */ movl $128, %ecx /* loop counter */ movd (%ebx), %mm5 /* unit */ movq %mm5, %mm0 punpckldq %mm0, %mm5 /* unit | unit */ movd 4(%ebx), %mm6 /* clev */ movq %mm6, %mm0 punpckldq %mm0, %mm6 /* clev | clev */ movd 8(%ebx), %mm7 /* slev */ movq %mm7, %mm0 punpckldq %mm0, %mm7 /* slev | slev */ .loop: movq (%eax), %mm0 /* left */ movq 2048(%eax), %mm1 /* right */ movq 1024(%eax), %mm2 /* center */ pfmul %mm5, %mm0 pfmul %mm5, %mm1 pfmul %mm6, %mm2 movq 3072(%eax), %mm3 /* leftsur */ movq 4096(%eax), %mm4 /* rithgsur */ pfadd %mm2, %mm0 pfadd %mm2, %mm1 pfmul %mm7, %mm3 pfmul %mm7, %mm4 pfadd %mm3, %mm0 pfadd %mm4, %mm1 movq %mm0, (%eax) movq %mm1, 1024(%eax) addl $8, %eax decl %ecx jnz .loop popl %ecx popl %ebx popl %eax femms leave ret .p2align 4,,7 .global downmix_2f_2r_to_2ch .type downmix_2f_2r_to_2ch, @function downmix_2f_2r_to_2ch: pushl %ebp movl %esp, %ebp pushl %eax pushl %ebx pushl %ecx femms movl 8(%ebp), %eax /* samples[] */ movl 12(%ebp), %ebx /* &dm_par */ movl $128, %ecx /* loop counter */ movd (%ebx), %mm5 /* unit */ movq %mm5, %mm0 punpckldq %mm0, %mm5 /* unit | unit */ movd 8(%ebx), %mm7 /* slev */ movq %mm7, %mm0 punpckldq %mm0, %mm7 /* slev | slev */ .loop3: movq (%eax), %mm0 /* left */ movq 1024(%eax), %mm1 /* right */ movq 2048(%eax), %mm3 /* leftsur */ pfmul %mm5, %mm0 pfmul %mm5, %mm1 movq 3072(%eax), %mm4 /* rightsur */ pfmul %mm7, %mm3 pfmul %mm7, %mm4 pfadd %mm3, %mm0 pfadd %mm4, %mm1 movq %mm0, (%eax) movq %mm1, 1024(%eax) addl $8, %eax decl %ecx jnz .loop3 popl %ecx popl %ebx popl %eax femms leave ret .p2align 4,,7 .global downmix_3f_1r_to_2ch .type downmix_3f_1r_to_2ch, @function downmix_3f_1r_to_2ch: pushl %ebp movl %esp, %ebp pushl %eax pushl %ebx pushl %ecx femms movl 8(%ebp), %eax /* samples[] */ movl 12(%ebp), %ebx /* &dm_par */ movl $128, %ecx /* loop counter */ movd (%ebx), %mm5 /* unit */ movq %mm5, %mm0 punpckldq %mm0, %mm5 /* unit | unit */ movd 4(%ebx), %mm6 /* clev */ movq %mm6, %mm0 punpckldq %mm0, %mm6 /* clev | clev */ movd 8(%ebx), %mm7 /* slev */ movq %mm7, %mm0 punpckldq %mm0, %mm7 /* slev | slev */ .loop4: movq (%eax), %mm0 /* left */ movq 2048(%eax), %mm1 /* right */ movq 1024(%eax), %mm2 /* center */ pfmul %mm5, %mm0 pfmul %mm5, %mm1 pfmul %mm6, %mm2 movq 3072(%eax), %mm3 /* sur */ pfadd %mm2, %mm0 pfmul %mm7, %mm3 pfadd %mm2, %mm1 pfsub %mm3, %mm0 pfadd %mm3, %mm1 movq %mm0, (%eax) movq %mm1, 1024(%eax) addl $8, %eax decl %ecx jnz .loop4 popl %ecx popl %ebx popl %eax femms leave ret .p2align 4,,7 .global downmix_2f_1r_to_2ch .type downmix_2f_1r_to_2ch, @function downmix_2f_1r_to_2ch: pushl %ebp movl %esp, %ebp pushl %eax pushl %ebx pushl %ecx femms movl 8(%ebp), %eax /* samples[] */ movl 12(%ebp), %ebx /* &dm_par */ movl $128, %ecx /* loop counter */ movd (%ebx), %mm5 /* unit */ movq %mm5, %mm0 punpckldq %mm0, %mm5 /* unit | unit */ movd 8(%ebx), %mm7 /* slev */ movq %mm7, %mm0 punpckldq %mm0, %mm7 /* slev | slev */ .loop5: movq (%eax), %mm0 /* left */ movq 1024(%eax), %mm1 /* right */ pfmul %mm5, %mm0 pfmul %mm5, %mm1 movq 2048(%eax), %mm3 /* sur */ pfmul %mm7, %mm3 pfsub %mm3, %mm0 pfadd %mm3, %mm1 movq %mm0, (%eax) movq %mm1, 1024(%eax) addl $8, %eax decl %ecx jnz .loop5 popl %ecx popl %ebx popl %eax femms leave ret .p2align 4,,7 .global downmix_3f_0r_to_2ch .type downmix_3f_0r_to_2ch, @function downmix_3f_0r_to_2ch: pushl %ebp movl %esp, %ebp pushl %eax pushl %ebx pushl %ecx femms movl 8(%ebp), %eax /* samples[] */ movl 12(%ebp), %ebx /* &dm_par */ movl $128, %ecx /* loop counter */ movd (%ebx), %mm5 /* unit */ movq %mm5, %mm0 punpckldq %mm0, %mm5 /* unit | unit */ movd 4(%ebx), %mm6 /* clev */ movq %mm6, %mm0 punpckldq %mm0, %mm6 /* clev | clev */ .loop6: movq (%eax), %mm0 /* left */ movq 2048(%eax), %mm1 /* right */ movq 1024(%eax), %mm2 /* center */ pfmul %mm5, %mm0 pfmul %mm5, %mm1 pfmul %mm6, %mm2 pfadd %mm2, %mm0 pfadd %mm2, %mm1 movq %mm0, (%eax) movq %mm1, 1024(%eax) addl $8, %eax decl %ecx jnz .loop6 popl %ecx popl %ebx popl %eax femms leave ret .p2align 4,,7 .global stream_sample_2ch_to_s16 .type stream_sample_2ch_to_s16, @function stream_sample_2ch_to_s16: pushl %ebp movl %esp, %ebp pushl %eax pushl %ebx pushl %edx pushl %ecx femms movl 8(%ebp), %eax /* s16_samples */ movl 12(%ebp), %ebx /* left */ movl 16(%ebp), %edx /* right */ movl $128, %ecx .loop1: movq (%ebx), %mm0 /* l1 | l0 */ movq (%edx), %mm1 /* r1 | r0 */ movq %mm0, %mm2 /* l1 | l0 */ psrlq $32, %mm2 /* l1 */ movq %mm1, %mm3 /* r1 | r0 */ psrlq $32, %mm3 /* r1 */ punpckldq %mm1, %mm0 /* r0 | l0 */ punpckldq %mm3, %mm2 /* r1 | l1 */ pf2id %mm0, %mm0 /* r0 | l0 --> int_32 */ pf2id %mm2, %mm2 /* r1 | l1 --> int_32 */ packssdw %mm2, %mm0 /* r1 l1 r0 l0 --> int_16 */ movq %mm0, (%eax) addl $8, %eax addl $8, %ebx addl $8, %edx decl %ecx jnz .loop1 popl %ecx popl %edx popl %ebx popl %eax femms leave ret .p2align 4,,7 .global stream_sample_1ch_to_s16 .type stream_sample_1ch_to_s16, @function stream_sample_1ch_to_s16: pushl %ebp movl %esp, %ebp pushl %eax pushl %ebx pushl %ecx femms movl $sqrt2, %eax movd (%eax), %mm7 movl 8(%ebp), %eax /* s16_samples */ movl 12(%ebp), %ebx /* left */ movq %mm7, %mm0 punpckldq %mm0, %mm7 /* sqrt2 | sqrt2 */ movl $128, %ecx .loop2: movq (%ebx), %mm0 /* c1 | c0 */ pfmul %mm7, %mm0 pf2id %mm0, %mm0 /* c1 c0 --> int_32 */ packssdw %mm0, %mm0 /* c1 c1 c0 c0 --> int_16 */ movq %mm0, (%eax) addl $8, %eax addl $8, %ebx decl %ecx jnz .loop2 popl %ecx popl %ebx popl %eax femms leave ret _______________________________________________ Mplayer-cvslog mailing list Mplayer-cvslog@lists.sourceforge.net http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog
participants (1)
-
Nick Kurshev