[MPlayer-dev-eng] [PATCH] Saturation & PSWAPD bugfix in mp3lib/dct64_3dnow.c & mp3lib/dct64_k7.c
Zuxy Meng
zuxy.meng at gmail.com
Wed Jun 21 14:50:37 CEST 2006
Hello,
3DNow! version of DCT64 in mp3lib use PF2ID to convert float to int,
then MOVW to convert int to short, which produces bad result when
overflow occurs. If you listen carefully you'll notice the difference
in the played sound.
This patch replace all PF2ID with PF2IW.
Original behavior under 3DNow! (erroneous):
1. Floating point DCT
2. Saturate to 32-bit int (PF2ID)
3. Truncate to 16-bit short (MOVW)
Now (same results with dct64_MMX):
1. Floating point DCT
2. Saturate to 16-bit short (PF2IW)
In addition, a false dependency of MMX2 in dct64_k7.c is also fixed
(replace pshufw $78 with pswapd) so it can be used on a K6-2/3+.
--
Zuxy
Beauty is truth,
While truth is beauty.
PGP KeyID: E8555ED6
-------------- next part --------------
--- mplayer/mp3lib/dct64_3dnow.c 2006-06-07 12:53:34.000000000 +0800
+++ mplayer.new/mp3lib/dct64_3dnow.c 2006-06-21 20:36:20.000000000 +0800
@@ -745,7 +745,7 @@
" pxor %%mm7, %%mm1\n\t"
" pfacc %%mm1, %%mm0\n\t"
" pfmul %%mm6, %%mm0\n\t"
-" pf2id %%mm0, %%mm0\n\t"
+" pf2iw %%mm0, %%mm0\n\t"
" movd %%mm0, %%eax\n\t"
" movw %%ax, 512(%%esi)\n\t"
" psrlq $32, %%mm0\n\t"
@@ -755,12 +755,12 @@
" movd 12(%%ecx), %%mm0\n\t"
" pfsub 8(%%ecx), %%mm0\n\t"
" pfmul 120(%%ebx), %%mm0\n\t"
-" pf2id %%mm0, %%mm7\n\t"
+" pf2iw %%mm0, %%mm7\n\t"
" movd %%mm7, %%eax\n\t"
" movw %%ax, 256(%%edi)\n\t"
" pfadd 12(%%ecx), %%mm0\n\t"
" pfadd 8(%%ecx), %%mm0\n\t"
-" pf2id %%mm0, %%mm0\n\t"
+" pf2iw %%mm0, %%mm0\n\t"
" movd %%mm0, %%eax\n\t"
" movw %%ax, 256(%%esi)\n\t"
@@ -774,7 +774,7 @@
" pfmul 120(%%ebx), %%mm2\n\t"
" movq %%mm2, %%mm1\n\t"
-" pf2id %%mm2, %%mm7\n\t"
+" pf2iw %%mm2, %%mm7\n\t"
" movd %%mm7, %%eax\n\t"
" movw %%ax, 384(%%edi)\n\t"
@@ -784,15 +784,15 @@
" pfadd 16(%%ecx), %%mm0\n\t"
" pfadd 20(%%ecx), %%mm0\n\t"
-" pf2id %%mm0, %%mm0\n\t"
+" pf2iw %%mm0, %%mm0\n\t"
" movd %%mm0, %%eax\n\t"
" movw %%ax, 384(%%esi)\n\t"
" pfadd %%mm3, %%mm1\n\t"
-" pf2id %%mm1, %%mm1\n\t"
+" pf2iw %%mm1, %%mm1\n\t"
" movd %%mm1, %%eax\n\t"
" movw %%ax, 128(%%esi)\n\t"
" pfadd %%mm3, %%mm2\n\t"
-" pf2id %%mm2, %%mm2\n\t"
+" pf2iw %%mm2, %%mm2\n\t"
" movd %%mm2, %%eax\n\t"
" movw %%ax, 128(%%edi)\n\t"
@@ -802,8 +802,8 @@
" movq 48(%%edx), %%mm1\n\t"
" pfadd 48(%%edx), %%mm0\n\t"
" pfadd 40(%%edx), %%mm1\n\t"
-" pf2id %%mm0, %%mm0\n\t"
-" pf2id %%mm1, %%mm1\n\t"
+" pf2iw %%mm0, %%mm0\n\t"
+" pf2iw %%mm1, %%mm1\n\t"
" movd %%mm0, %%eax\n\t"
" movd %%mm1, %%ecx\n\t"
" movw %%ax, 448(%%esi)\n\t"
@@ -827,14 +827,14 @@
" punpckldq 92(%%edx), %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" pfadd %%mm4, %%mm3\n\t"
-" pf2id %%mm0, %%mm1\n\t"
-" pf2id %%mm3, %%mm3\n\t"
+" pf2iw %%mm0, %%mm1\n\t"
+" pf2iw %%mm3, %%mm3\n\t"
" pfadd 88(%%edx), %%mm5\n\t"
" movd %%mm1, %%eax\n\t"
" movd %%mm3, %%ecx\n\t"
" movw %%ax, 448(%%edi)\n\t"
" movw %%cx, 192(%%esi)\n\t"
-" pf2id %%mm5, %%mm5\n\t"
+" pf2iw %%mm5, %%mm5\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm3\n\t"
" movd %%mm5, %%ebx\n\t"
@@ -844,12 +844,12 @@
" movw %%ax, 480(%%edi)\n\t"
" movw %%cx, 64(%%esi)\n\t"
" pfadd %%mm2, %%mm0\n\t"
-" pf2id %%mm0, %%mm0\n\t"
+" pf2iw %%mm0, %%mm0\n\t"
" movd %%mm0, %%eax\n\t"
" pfadd 68(%%edx), %%mm6\n\t"
" movw %%ax, 320(%%edi)\n\t"
" psrlq $32, %%mm0\n\t"
-" pf2id %%mm6, %%mm6\n\t"
+" pf2iw %%mm6, %%mm6\n\t"
" movd %%mm0, %%eax\n\t"
" movd %%mm6, %%ebx\n\t"
" movw %%ax, 416(%%edi)\n\t"
@@ -867,9 +867,9 @@
" pfadd 64(%%edx), %%mm0\n\t"
" pfadd 80(%%edx), %%mm2\n\t"
" pfadd 72(%%edx), %%mm4\n\t"
-" pf2id %%mm0, %%mm0\n\t"
-" pf2id %%mm2, %%mm2\n\t"
-" pf2id %%mm4, %%mm4\n\t"
+" pf2iw %%mm0, %%mm0\n\t"
+" pf2iw %%mm2, %%mm2\n\t"
+" pf2iw %%mm4, %%mm4\n\t"
" movd %%mm0, %%eax\n\t"
" movd %%mm2, %%ecx\n\t"
" movd %%mm4, %%ebx\n\t"
@@ -888,9 +888,9 @@
" pfadd 80(%%edx), %%mm1\n\t"
" pfadd 72(%%edx), %%mm3\n\t"
" pfadd 88(%%edx), %%mm5\n\t"
-" pf2id %%mm1, %%mm1\n\t"
-" pf2id %%mm3, %%mm3\n\t"
-" pf2id %%mm5, %%mm5\n\t"
+" pf2iw %%mm1, %%mm1\n\t"
+" pf2iw %%mm3, %%mm3\n\t"
+" pf2iw %%mm5, %%mm5\n\t"
" movd %%mm1, %%eax\n\t"
" movd %%mm3, %%ecx\n\t"
" movd %%mm5, %%ebx\n\t"
--- mplayer/mp3lib/dct64_k7.c 2006-06-07 12:53:34.000000000 +0800
+++ mplayer.new/mp3lib/dct64_k7.c 2006-06-21 20:39:34.000000000 +0800
@@ -32,8 +32,8 @@
" movq 8(%%eax), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 120(%%eax), %%mm1\n\t"
-" pshufw $78, 112(%%eax), %%mm5\n\t"
+" pswapd 120(%%eax), %%mm1\n\t"
+" pswapd 112(%%eax), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, (%%edx)\n\t"
@@ -51,8 +51,8 @@
" movq 24(%%eax), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 104(%%eax), %%mm1\n\t"
-" pshufw $78, 96(%%eax), %%mm5\n\t"
+" pswapd 104(%%eax), %%mm1\n\t"
+" pswapd 96(%%eax), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 16(%%edx)\n\t"
@@ -70,8 +70,8 @@
" movq 40(%%eax), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 88(%%eax), %%mm1\n\t"
-" pshufw $78, 80(%%eax), %%mm5\n\t"
+" pswapd 88(%%eax), %%mm1\n\t"
+" pswapd 80(%%eax), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 32(%%edx)\n\t"
@@ -89,8 +89,8 @@
" movq 56(%%eax), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 72(%%eax), %%mm1\n\t"
-" pshufw $78, 64(%%eax), %%mm5\n\t"
+" pswapd 72(%%eax), %%mm1\n\t"
+" pswapd 64(%%eax), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 48(%%edx)\n\t"
@@ -110,8 +110,8 @@
" movq 8(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 56(%%edx), %%mm1\n\t"
-" pshufw $78, 48(%%edx), %%mm5\n\t"
+" pswapd 56(%%edx), %%mm1\n\t"
+" pswapd 48(%%edx), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, (%%ecx)\n\t"
@@ -129,8 +129,8 @@
" movq 24(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 40(%%edx), %%mm1\n\t"
-" pshufw $78, 32(%%edx), %%mm5\n\t"
+" pswapd 40(%%edx), %%mm1\n\t"
+" pswapd 32(%%edx), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 16(%%ecx)\n\t"
@@ -150,8 +150,8 @@
" movq 72(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 120(%%edx), %%mm1\n\t"
-" pshufw $78, 112(%%edx), %%mm5\n\t"
+" pswapd 120(%%edx), %%mm1\n\t"
+" pswapd 112(%%edx), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 64(%%ecx)\n\t"
@@ -169,8 +169,8 @@
" movq 88(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 104(%%edx), %%mm1\n\t"
-" pshufw $78, 96(%%edx), %%mm5\n\t"
+" pswapd 104(%%edx), %%mm1\n\t"
+" pswapd 96(%%edx), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 80(%%ecx)\n\t"
@@ -193,8 +193,8 @@
" movq 8(%%ecx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 24(%%ecx), %%mm1\n\t"
-" pshufw $78, 16(%%ecx), %%mm5\n\t"
+" pswapd 24(%%ecx), %%mm1\n\t"
+" pswapd 16(%%ecx), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, (%%edx)\n\t"
@@ -212,8 +212,8 @@
" movq 40(%%ecx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 56(%%ecx), %%mm1\n\t"
-" pshufw $78, 48(%%ecx), %%mm5\n\t"
+" pswapd 56(%%ecx), %%mm1\n\t"
+" pswapd 48(%%ecx), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 32(%%edx)\n\t"
@@ -231,8 +231,8 @@
" movq 72(%%ecx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 88(%%ecx), %%mm1\n\t"
-" pshufw $78, 80(%%ecx), %%mm5\n\t"
+" pswapd 88(%%ecx), %%mm1\n\t"
+" pswapd 80(%%ecx), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 64(%%edx)\n\t"
@@ -250,8 +250,8 @@
" movq 104(%%ecx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 120(%%ecx), %%mm1\n\t"
-" pshufw $78, 112(%%ecx), %%mm5\n\t"
+" pswapd 120(%%ecx), %%mm1\n\t"
+" pswapd 112(%%ecx), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 96(%%edx)\n\t"
@@ -273,8 +273,8 @@
" movq 16(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 8(%%edx), %%mm1\n\t"
-" pshufw $78, 24(%%edx), %%mm5\n\t"
+" pswapd 8(%%edx), %%mm1\n\t"
+" pswapd 24(%%edx), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, (%%ecx)\n\t"
@@ -292,8 +292,8 @@
" movq 48(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 40(%%edx), %%mm1\n\t"
-" pshufw $78, 56(%%edx), %%mm5\n\t"
+" pswapd 40(%%edx), %%mm1\n\t"
+" pswapd 56(%%edx), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 32(%%ecx)\n\t"
@@ -311,8 +311,8 @@
" movq 80(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 72(%%edx), %%mm1\n\t"
-" pshufw $78, 88(%%edx), %%mm5\n\t"
+" pswapd 72(%%edx), %%mm1\n\t"
+" pswapd 88(%%edx), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 64(%%ecx)\n\t"
@@ -330,8 +330,8 @@
" movq 112(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
-" pshufw $78, 104(%%edx), %%mm1\n\t"
-" pshufw $78, 120(%%edx), %%mm5\n\t"
+" pswapd 104(%%edx), %%mm1\n\t"
+" pswapd 120(%%edx), %%mm5\n\t"
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 96(%%ecx)\n\t"
@@ -591,7 +591,7 @@
" pxor %%mm7, %%mm1\n\t"
" pfacc %%mm1, %%mm0\n\t"
" pfmul %%mm6, %%mm0\n\t"
-" pf2id %%mm0, %%mm0\n\t"
+" pf2iw %%mm0, %%mm0\n\t"
" movd %%mm0, %%eax\n\t"
" movw %%ax, 512(%%esi)\n\t"
" psrlq $32, %%mm0\n\t"
@@ -601,12 +601,12 @@
" movd 12(%%ecx), %%mm0\n\t"
" pfsub 8(%%ecx), %%mm0\n\t"
" pfmul 120(%%ebx), %%mm0\n\t"
-" pf2id %%mm0, %%mm7\n\t"
+" pf2iw %%mm0, %%mm7\n\t"
" movd %%mm7, %%eax\n\t"
" movw %%ax, 256(%%edi)\n\t"
" pfadd 12(%%ecx), %%mm0\n\t"
" pfadd 8(%%ecx), %%mm0\n\t"
-" pf2id %%mm0, %%mm0\n\t"
+" pf2iw %%mm0, %%mm0\n\t"
" movd %%mm0, %%eax\n\t"
" movw %%ax, 256(%%esi)\n\t"
@@ -620,7 +620,7 @@
" pfmul 120(%%ebx), %%mm2\n\t"
" movq %%mm2, %%mm1\n\t"
-" pf2id %%mm2, %%mm7\n\t"
+" pf2iw %%mm2, %%mm7\n\t"
" movd %%mm7, %%eax\n\t"
" movw %%ax, 384(%%edi)\n\t"
@@ -630,15 +630,15 @@
" pfadd 16(%%ecx), %%mm0\n\t"
" pfadd 20(%%ecx), %%mm0\n\t"
-" pf2id %%mm0, %%mm0\n\t"
+" pf2iw %%mm0, %%mm0\n\t"
" movd %%mm0, %%eax\n\t"
" movw %%ax, 384(%%esi)\n\t"
" pfadd %%mm3, %%mm1\n\t"
-" pf2id %%mm1, %%mm1\n\t"
+" pf2iw %%mm1, %%mm1\n\t"
" movd %%mm1, %%eax\n\t"
" movw %%ax, 128(%%esi)\n\t"
" pfadd %%mm3, %%mm2\n\t"
-" pf2id %%mm2, %%mm2\n\t"
+" pf2iw %%mm2, %%mm2\n\t"
" movd %%mm2, %%eax\n\t"
" movw %%ax, 128(%%edi)\n\t"
@@ -648,8 +648,8 @@
" movq 48(%%edx), %%mm1\n\t"
" pfadd 48(%%edx), %%mm0\n\t"
" pfadd 40(%%edx), %%mm1\n\t"
-" pf2id %%mm0, %%mm0\n\t"
-" pf2id %%mm1, %%mm1\n\t"
+" pf2iw %%mm0, %%mm0\n\t"
+" pf2iw %%mm1, %%mm1\n\t"
" movd %%mm0, %%eax\n\t"
" movd %%mm1, %%ecx\n\t"
" movw %%ax, 448(%%esi)\n\t"
@@ -673,14 +673,14 @@
" punpckldq 92(%%edx), %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" pfadd %%mm4, %%mm3\n\t"
-" pf2id %%mm0, %%mm1\n\t"
-" pf2id %%mm3, %%mm3\n\t"
+" pf2iw %%mm0, %%mm1\n\t"
+" pf2iw %%mm3, %%mm3\n\t"
" pfadd 88(%%edx), %%mm5\n\t"
" movd %%mm1, %%eax\n\t"
" movd %%mm3, %%ecx\n\t"
" movw %%ax, 448(%%edi)\n\t"
" movw %%cx, 192(%%esi)\n\t"
-" pf2id %%mm5, %%mm5\n\t"
+" pf2iw %%mm5, %%mm5\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm3\n\t"
" movd %%mm5, %%ebx\n\t"
@@ -690,12 +690,12 @@
" movw %%ax, 480(%%edi)\n\t"
" movw %%cx, 64(%%esi)\n\t"
" pfadd %%mm2, %%mm0\n\t"
-" pf2id %%mm0, %%mm0\n\t"
+" pf2iw %%mm0, %%mm0\n\t"
" movd %%mm0, %%eax\n\t"
" pfadd 68(%%edx), %%mm6\n\t"
" movw %%ax, 320(%%edi)\n\t"
" psrlq $32, %%mm0\n\t"
-" pf2id %%mm6, %%mm6\n\t"
+" pf2iw %%mm6, %%mm6\n\t"
" movd %%mm0, %%eax\n\t"
" movd %%mm6, %%ebx\n\t"
" movw %%ax, 416(%%edi)\n\t"
@@ -713,9 +713,9 @@
" pfadd 64(%%edx), %%mm0\n\t"
" pfadd 80(%%edx), %%mm2\n\t"
" pfadd 72(%%edx), %%mm4\n\t"
-" pf2id %%mm0, %%mm0\n\t"
-" pf2id %%mm2, %%mm2\n\t"
-" pf2id %%mm4, %%mm4\n\t"
+" pf2iw %%mm0, %%mm0\n\t"
+" pf2iw %%mm2, %%mm2\n\t"
+" pf2iw %%mm4, %%mm4\n\t"
" movd %%mm0, %%eax\n\t"
" movd %%mm2, %%ecx\n\t"
" movd %%mm4, %%ebx\n\t"
@@ -734,9 +734,9 @@
" pfadd 80(%%edx), %%mm1\n\t"
" pfadd 72(%%edx), %%mm3\n\t"
" pfadd 88(%%edx), %%mm5\n\t"
-" pf2id %%mm1, %%mm1\n\t"
-" pf2id %%mm3, %%mm3\n\t"
-" pf2id %%mm5, %%mm5\n\t"
+" pf2iw %%mm1, %%mm1\n\t"
+" pf2iw %%mm3, %%mm3\n\t"
+" pf2iw %%mm5, %%mm5\n\t"
" movd %%mm1, %%eax\n\t"
" movd %%mm3, %%ecx\n\t"
" movd %%mm5, %%ebx\n\t"
More information about the MPlayer-dev-eng
mailing list