[MN-dev] [mndiff]: r59 - in trunk/2010: 2010.cpp 2_all_a.asm 2_cryp_a.asm 2_cryp_a.h 2_crypt.cpp 2_glob_a.asm 2_hw.cpp 2_hw_a.asm 2_hw_a.h 2_hw_mem.cpp 2_nag.cpp 2_nag_a.asm 2_vc.cpp 2_vc_a.asm makefile
michael
subversion at mplayerhq.hu
Wed Jul 4 10:39:35 CEST 2007
Author: michael
Date: Wed Jul 4 10:39:35 2007
New Revision: 59
Log:
next version from unknown date
Added:
trunk/2010/2_hw_a.asm
trunk/2010/2_hw_a.h
Modified:
trunk/2010/2010.cpp
trunk/2010/2_all_a.asm
trunk/2010/2_cryp_a.asm
trunk/2010/2_cryp_a.h
trunk/2010/2_crypt.cpp
trunk/2010/2_glob_a.asm
trunk/2010/2_hw.cpp
trunk/2010/2_hw_mem.cpp
trunk/2010/2_nag.cpp
trunk/2010/2_nag_a.asm
trunk/2010/2_vc.cpp
trunk/2010/2_vc_a.asm
trunk/2010/makefile
Modified: trunk/2010/2010.cpp
==============================================================================
--- trunk/2010/2010.cpp (original)
+++ trunk/2010/2010.cpp Wed Jul 4 10:39:35 2007
@@ -1,3 +1,4 @@
+
//2010 0.1 Copyright (c) 1998 Michael Niedermayer
#include <stdlib.h>
@@ -55,7 +56,7 @@ int _crt0_startup_flags = _CRT0_FLAG_NEA
byte font[4096];
int vid=0;
int bpp;
- int *yuv2RgbLut;
+ unsigned short *yuv2RgbLut;
int edge_lut[256];
MENULEVEL menuLevel=mLNorm;
int infoPosX;
@@ -174,7 +175,8 @@ int main(int argc, char **argv){
NoFlip=1;
}
else if(stricmp(argv[i],"-NORGB")==0){
- yuvMode=2;
+ yuvMode=
+ grabYuvMode=2;
}
else if(stricmp(argv[i],"-maxBuf")==0){
i++;
@@ -228,35 +230,23 @@ int main(int argc, char **argv){
}
printf("Building YUV to RGB LookUp Tables!\n");
- yuv2RgbLut=new int[128*4*2];
- for(int i=0; i<128; i++){
- int j;
- if(i>=64) j=i-128;
- else j=i;
- double uv= double(j);
-
- int vr= int( uv * 1.375 ) & 0x1FF;
- int vg= int(-uv * 0.703125 * 2) & 0x3FF;
- int ug= int(-uv * 0.34375 * 2) & 0x3FF;
- int ub= int( uv * 1.734375 ) & 0x1FF;
- int yn= i;
- int yg= i<<1;
-
-// 565565
-// 655655
+ yuv2RgbLut=new unsigned short[256*128];
+ for(int u=-16; u<=15; u++)
+ {
+ for(int v=-16; v<=15; v++)
+ {
+ for(int y=0; y<=31; y++)
+ {
+ int r= minmax(0, int( y + double(v) * 1.375 ), 31);
+ int g= minmax(0, int((y - double(v) * 0.703 - double(u) * 0.343)*2), 63);
+ int b= minmax(0, int( y + double(u) * 1.734 ), 31);
-// B R G
-// G B R
+ const int xLut= y + ((u&0x1F)<<5) + ((v&0x1F)<<10);
+ yuv2RgbLut[ xLut ]= b + (g<<5) + (r<<11);
- yuv2RgbLut[(i<<1) ]= ub +(ug<<21) ;
- yuv2RgbLut[(i<<1) + 1 ]= ug +(ub<<11) ;
- yuv2RgbLut[(i<<1) + 256 ]= yn +(yn<<11) ;
- yuv2RgbLut[(i<<1) + 257 ]= yg ;
- yuv2RgbLut[(i<<1) + 512 ]= (vr<<11) +(vg<<21) ;
- yuv2RgbLut[(i<<1) + 513 ]= vg +(vr<<22);
- yuv2RgbLut[(i<<1) + 768 ]= +(yg<<21) ;
- yuv2RgbLut[(i<<1) + 769 ]= +(yn<<11) +(yn<<22);
- }
+ }
+ }
+ }
if((vib=get_vesa_info())==0) error(NoVESA);
if(vib->ver < 0x200) error(VESAVer);
@@ -294,11 +284,6 @@ int main(int argc, char **argv){
vgax=mib->Xres;
vgay=mib->Yres;
-/* if(yuvMode!=0) mc=1;
- else mc=2;
- if(yuvMode==2) mc2=1;
- else mc2=2;
-*/
mc=mc2=1;
xresc=vgax<<mc;
bpp=mib->bpp;
@@ -436,7 +421,7 @@ int main(int argc, char **argv){
case 'I' : iState= !iState;
break;
case 'v' :
- case 'V' : iVSmooth++; iVSmooth%=3;
+ case 'V' : iVSmooth++; iVSmooth%=3;
break;
case 'h' :
case 'H' : helpState= !helpState;
@@ -531,7 +516,7 @@ int main(int argc, char **argv){
cryptStd= vc;
if(yuvMode==0) yuvMode=1;
grabYuvMode=yuvMode;
- iVSmooth=0;
+// iVSmooth=2;
setStdScale();
contGrab();
break;
@@ -539,7 +524,7 @@ int main(int argc, char **argv){
cryptStd= nag;
if(yuvMode==0) yuvMode=1;
grabYuvMode=yuvMode;
- iVSmooth=0;
+// iVSmooth=2;
setStdScale();
contGrab();
break;
Modified: trunk/2010/2_all_a.asm
==============================================================================
--- trunk/2010/2_all_a.asm (original)
+++ trunk/2010/2_all_a.asm Wed Jul 4 10:39:35 2007
@@ -17,7 +17,38 @@
;%define int3 db 0xCC
+%define lt(a,b) ( ((a)-(b)) >>31)
+%define gt(a,b) ( ((b)-(a)) >>31)
+%define le(a,b) ( ((a)-(b)-1) >>31)
+%define ge(a,b) ( ((b)-(a)-1) >>31)
+%define eq(a,b) (ge(a,b) & le(a,b))
+
+
%define align2 times ($$-$) & 1 nop
-%define align4 times ($$-$) & 3 nop
-%define align8 times ($$-$) & 7 nop
-%define align16 times ($$-$) & 15 nop
+;%define align4 times ($$-$) & 3 nop
+;%define align8 times ($$-$) & 7 nop
+;%define align16 times ($$-$) & 15 nop
+
+%macro align4 0
+ times ge(($$-$)&3, 3) cmp ebx, byte 0
+ times ge(($$-$)&3, 2) cmp ebx, ebx
+ times ge(($$-$)&3, 1) nop
+%endmacro
+
+%macro align8 0
+ times ge(($$-$)&7, 6) cmp ebx, 0
+ times ge(($$-$)&7, 5) cmp eax, 0
+ times ge(($$-$)&7, 3) cmp ebx, byte 0
+ times ge(($$-$)&7, 2) cmp ebx, ebx
+ times ge(($$-$)&7, 1) nop
+%endmacro
+
+%macro align16 0
+ times eq(($$-$)&15, 10) cmp eax, 0
+ times ge(($$-$)&15, 6) cmp ebx, 0
+ times ge(($$-$)&15, 6) cmp ebx, 0
+ times ge(($$-$)&15, 5) cmp eax, 0
+ times ge(($$-$)&15, 3) cmp ebx, byte 0
+ times ge(($$-$)&15, 2) cmp ebx, ebx
+ times ge(($$-$)&15, 1) nop
+%endmacro
Modified: trunk/2010/2_cryp_a.asm
==============================================================================
--- trunk/2010/2_cryp_a.asm (original)
+++ trunk/2010/2_cryp_a.asm Wed Jul 4 10:39:35 2007
@@ -1,11 +1,13 @@
-;2010 0.1 Copyright (C) Michael Niedermayer 1998
+;2010 0.1 Copyright (C) Michael Niedermayer 1999
%include "2_all_a.asm"
-segment .data
+segment .data
global _VSmooth1RGB1555__Fiii
global _VSmooth2RGB1555__Fiii
+global _Decomb1__Fiii
+global _Decomb2__Fiii
%define pBuf1 par1
%define iVgaX21 par2
@@ -15,6 +17,109 @@ global _VSmooth2RGB1555__Fiii
%define iVgaX22 par3
%define nOp2 par4
+%define pYuv3 par1
+%define nOp3 par2
+%define pBuf3 par3
+
+_Decomb1__Fiii:
+ push eax
+ push ebx
+
+ push ecx
+ push edx
+
+ push esi
+ push edi
+
+ mov esi, [pYuv3]
+ mov edi, [pBuf3]
+ mov ecx, [nOp3]
+ add esi, ecx
+ add edi, ecx
+ xor ecx, byte -1
+ inc ecx
+
+ movq mm7, [decombInc]
+align16
+Decomb1Loop:
+ movq mm0, [esi + ecx]
+ psllw mm0, 8
+ movq mm2, [edi + ecx]
+ psubsw mm0, mm2
+ pcmpgtw mm2, mm0
+ pand mm2, mm7
+ paddsw mm0, mm2
+ movq [edi + ecx], mm0
+ add ecx, byte 8
+ jnc Decomb1Loop
+
+ pop edi
+ pop esi
+
+ pop edx
+ pop ecx
+
+ pop ebx
+ pop eax
+
+ret
+
+_Decomb2__Fiii:
+ push eax
+ push ebx
+
+ push ecx
+ push edx
+
+ push esi
+ push edi
+
+ mov esi, [pYuv3]
+ mov edi, [pBuf3]
+ mov ecx, [nOp3]
+ add esi, ecx
+ add edi, ecx
+ xor ecx, byte -1
+ inc ecx
+
+ movq mm6, [decombYMask]
+ movq mm7, [decombInc]
+align16
+Decomb2Loop:
+ movq mm0, [esi + ecx] ;0p2 wMM0rESIECX 1
+ movq mm1, mm0 ; p01wMM1rMM0 0 test mov mem
+ pand mm1, mm6 ; p01wMM1rMM6MM1 1
+ psllw mm0, 8 ;0p1 wMM0rMM0 0
+ movq mm2, [edi + ecx] ; p2 wMM2rEDIECX 1
+ psubsw mm0, mm2 ; p01wMM0rMM2MM0 0
+ pcmpgtw mm2, mm0 ;0p01wMM2rMM0MM2 0
+ pand mm2, mm7 ; p01wMM2rMM7MM2 1
+ paddsw mm0, mm2 ; p01wMM0rMM2MM0 0
+ movq [edi + ecx], mm0 ;0p3 rEDIECX 2 p4 rMM0 0
+ psrlw mm0, 7 ; p1 wMM0rMM0 0
+ por mm1, mm0 ; p01wMM1rMM0MM1 ?0
+ movq [esi + ecx], mm1 ;0p3 rESIECX 2 p4 rMM1 0
+ add ecx, byte 8 ; p01wECXrECX (1)
+ jnc Decomb2Loop ; p1 rFLAG 0
+
+ ; 8 0 3 2 2 2 -> exec 6 cyc
+ ; deco 5 cyc
+
+
+
+
+ pop edi
+ pop esi
+
+ pop edx
+ pop ecx
+
+ pop ebx
+ pop eax
+
+ret
+
+
_VSmooth1RGB1555__Fiii:
push eax
push ebx
@@ -26,12 +131,12 @@ _VSmooth1RGB1555__Fiii:
push edi
mov esi, [pBuf1]
- add esi, 8
+ add esi, byte 8
mov ecx, [nOp1]
add esi, ecx
- xor ecx, -1
+ xor ecx, byte -1
mov ebx, [iVgaX21]
inc ecx
@@ -51,6 +156,7 @@ _VSmooth1RGB1555__Fiii:
; more opt possible (+2reg -1psrlw)
+align16
MMXLoop1:
movq mm5, mm0
movq mm4, mm1
@@ -88,7 +194,7 @@ MMXLoop1:
movq mm1, [ebx+ecx+8]
por mm3, mm5
- add ecx, 8
+ add ecx, byte 8
jnc MMXLoop1
pop edi
@@ -100,8 +206,6 @@ MMXLoop1:
pop ebx
pop eax
- emms
-
ret
_VSmooth2RGB1555__Fiii:
@@ -118,10 +222,10 @@ _VSmooth2RGB1555__Fiii:
mov esi, [pBuf2]
- add esi, 8
+ add esi, byte 8
mov ebp, [nOp2]
- xor ebp, -1
+ xor ebp, byte -1
mov ebx, [iVgaX22]
add esi, ebx
@@ -139,7 +243,7 @@ _VSmooth2RGB1555__Fiii:
mov ecx, eax
add edi, esi
- xor ecx, -1
+ xor ecx, byte -1
inc ecx
@@ -148,6 +252,7 @@ _VSmooth2RGB1555__Fiii:
movq mm7, [mask2]
; lots of optimize possible
+align16
MMXLoop2:
;AGI
movq mm0, [esi+ecx]
@@ -199,12 +304,14 @@ MMXLoop2:
pop ebx
pop eax
- emms
-
ret
-xyz times 8 dd 0
+xyz times 8 dd 0
align8
mask1 times 2 dd 0x7C1F03E0
mask2 times 2 dd 0x03E07C1F
+decombInc times 2 dd 0x01000100
+decombYMask times 2 dd 0xFF00FF00
+
+align16
Modified: trunk/2010/2_cryp_a.h
==============================================================================
--- trunk/2010/2_cryp_a.h (original)
+++ trunk/2010/2_cryp_a.h Wed Jul 4 10:39:35 2007
@@ -5,4 +5,7 @@
void VSmooth1RGB1555(int, int, int);
void VSmooth2RGB1555(int, int, int);
+void Decomb1(int, int, int);
+void Decomb2(int, int, int);
+
#endif
Modified: trunk/2010/2_crypt.cpp
==============================================================================
--- trunk/2010/2_crypt.cpp (original)
+++ trunk/2010/2_crypt.cpp Wed Jul 4 10:39:35 2007
@@ -5,6 +5,7 @@
#include <time.h>
#include "2_all.h"
#include "2_crypt.h"
+#include "2_hw_mem.h"
#include "2_cryp_a.h"
#include "2_vc.h"
#include "2_nag.h"
@@ -30,7 +31,7 @@ extern int some;
static void VSmooth(void);
static void Decomb(int q);
-int iVSmooth=1;
+int iVSmooth=2;
bool showPoints=false;
asm("__esp: .long 0\n\t");
@@ -82,149 +83,35 @@ void decrypt(void){
static void Decomb(int q){
- int temp[max_x];
+ int *temp= (int*)newAlign(max_x * sizeof(int), 8);
const int outy1= outy>>1;
const int stride=oneField ? (vgax<<1) : (vgax<<2);
const int o=oneField ? 0 : 1;
const int end=oneField ? outy : outy1;
- for(int i=0; i<wndx; i++) temp[i]=128<<8;
+// for(int i=0; i<wndx; i++) temp[i]=128<<8;
+ for(int i=0; i<wndx; i++) temp[i]=0;
if(q==1){
int linep= stride*256;
if(o) linep+=vgax<<1;
for(int line=256; line>=0; line--){
- asm(//"int $3 \n\t"
- "pushl %%ebp \n\t" //U
- "movl %%eax, %%ebp \n\t" // V 1
- "addl %%ebp, %%edi \n\t" //U
- "addl %%ebp, %%esi \n\t" // V 1
- "addl %%ebp, %%esi \n\t" //U 1
- "negl %%ebp \n\t" //UV 1
- "1: \n\t"
- "xorl %%eax, %%eax \n\t" //U
- "xorl %%ebx, %%ebx \n\t" // V 1
- "movb (%%edi, %%ebp ), %%al \n\t" //U
- "movb 2(%%edi, %%ebp ), %%bl \n\t" // V 1
- "addb $128, %%al \n\t" //U
- "addb $128, %%bl \n\t" // V 1
- "shll $9, %%eax \n\t" //U
- "movl (%%esi, %%ebp, 2), %%ecx \n\t" // V 1
- "shll $9, %%ebx \n\t" //U
- "subl %%ecx, %%eax \n\t" // V 1
- "movl 4(%%esi, %%ebp, 2), %%edx \n\t" //U
- "cmpl %%ecx, %%eax \n\t" // V 1
- "sbbl %%ecx, %%ecx \n\t" //U
- "subl %%edx, %%ebx \n\t" // V 1
- "andl %0, %%ecx \n\t" //U
- "cmpl %%edx, %%ebx \n\t" // V 1
- "sbbl %%edx, %%edx \n\t" //U
- "addl %%ecx, %%eax \n\t" // V 1
- "andl %0, %%edx \n\t" //U
- "movl %%eax, (%%esi, %%ebp, 2) \n\t" // V 1
- "addl %%edx, %%ebx \n\t" //U 1
- "movl %%ebx, 4(%%esi, %%ebp, 2) \n\t" //U
-
- "addl $4, %%ebp \n\t" // V 1
- " jnc 1b \n\t" //U 1
- "popl %%ebp \n\t" //U
- :
- : "i" (COMB_FIX), "a" ((wndx<<1) - 2),
- "D" (actVid2MemBufp->b + linep + 2), "S" (int(temp) + 4)
- : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi");
- linep-=stride;
+ Decomb1(int(actVid2MemBufp->b + linep), (wndx<<1), int(temp));
+ linep-=stride;
}
}
int linep= 0;
if(o) linep+=vgax<<1;
for(int line=0; line<end; line++){
- asm(//"int $3 \n\t"
- "pushl %%ebp \n\t" //U
- "movl %%eax, %%ebp \n\t" // V 1
- "addl %%ebp, %%edi \n\t" //U
- "addl %%ebp, %%esi \n\t" // V 1
- "addl %%ebp, %%esi \n\t" //U 1
- "negl %%ebp \n\t" //UV 1
- "1: \n\t"
- "xorl %%eax, %%eax \n\t" //U
- "xorl %%ebx, %%ebx \n\t" // V 1
- "movb (%%edi, %%ebp ), %%al \n\t" //U
- "movb 2(%%edi, %%ebp ), %%bl \n\t" // V 1
- "addb $128, %%al \n\t" //U
- "addb $128, %%bl \n\t" // V 1
- "shll $9, %%eax \n\t" //U
- "movl (%%esi, %%ebp, 2), %%ecx \n\t" // V 1
- "shll $9, %%ebx \n\t" //U
- "subl %%ecx, %%eax \n\t" // V 1
- "movl 4(%%esi, %%ebp, 2), %%edx \n\t" //U
- "cmpl %%ecx, %%eax \n\t" // V 1
- "sbbl %%ecx, %%ecx \n\t" //U
- "subl %%edx, %%ebx \n\t" // V 1
- "andl %0, %%ecx \n\t" //U
- "cmpl %%edx, %%ebx \n\t" // V 1
- "sbbl %%edx, %%edx \n\t" //U
- "addl %%ecx, %%eax \n\t" // V 1
- "andl %0, %%edx \n\t" //U
- "movl %%eax, (%%esi, %%ebp, 2) \n\t" // V 1
- "addl %%edx, %%ebx \n\t" //U
- "subb $128, %%ah \n\t" // V 1
- "movl %%ebx, 4(%%esi, %%ebp, 2) \n\t" //U
- "subb $128, %%bh \n\t" // V 1
- "movb %%ah, (%%edi, %%ebp ) \n\t" //U
- "movb %%bh, 2(%%edi, %%ebp ) \n\t" // V 1
-
- "addl $4, %%ebp \n\t" //U
- " jnc 1b \n\t" // V 1
- "popl %%ebp \n\t" //U
- :
- : "i" (COMB_FIX), "a" ((wndx<<1) - 2),
- "D" (actVid2MemBufp->b + linep + 2), "S" (int(temp) + 4)
- : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi");
+ Decomb2(int(actVid2MemBufp->b + linep), (wndx<<1), int(temp));
linep+=stride;
}
+ asm("emms\n\t");
-
-
-/*
- asm(//"int $3 \n\t"
- "1: \n\t"
- "movl (%%esi ), %%eax \n\t" //U LINE-1
- "movl (%%esi,%%edi), %%ebx \n\t" // V 1 LINE
- "movl %%ebx, %%ecx \n\t" //U
- "andl $0x00FF00FF, %%eax \n\t" // V 1
- "andl $0x00FF00FF, %%ebx \n\t" //U
- "addl $0x00800080, %%eax \n\t" // V 1
- "xorl $0x00800080, %%eax \n\t" //U
- "addl %%ebx, %%ebx \n\t" // V 1
- "orl $0x02000200, %%ebx \n\t" //U
- "andl $0xFF00FF00, %%ecx \n\t" // V 1
- "subl %%eax, %%ebx \n\t" //U
- "addl $4, %%esi \n\t" // V 1
- "movl %%ebx, %%eax \n\t" //U
- "andl $0x01F801F8, %%ebx \n\t" // V 1
- "shrl $3, %%ebx \n\t" //U
- "nop \n\t" // V 1 FIX ME
- "addl $0x00600060, %%ebx \n\t" //U
- "nop \n\t" // V 1 FIX ME
- "xorl $0x00600060, %%ebx \n\t" //U
- "nop \n\t" // V 1 FIX ME
- "subl %%ebx, %%eax \n\t" //U
- "nop \n\t" // V 1 FIX ME
- "andl $0x00FF00FF, %%eax \n\t" //U
- "nop \n\t" // V 1 FIX ME
- "orl %%ecx, %%eax \n\t" //U
- "decl %%edx \n\t" // V 1
- "movl %%eax, -4(%%esi, %%edi)\n\t" //U
- " jnz 1b \n\t" // V 1
-
- :
- : "D" (oneField ? (vgax<<1) : (vgax<<2)), "S" (actVid2MemBufp->b),
- "d" (vgax * wndy>>1)
- : "%eax", "%ebx", "%ecx", "%edx", "%esi");
- */
+ deleteAlign(temp);
}
static void VSmooth(void){
@@ -236,40 +123,5 @@ static void VSmooth(void){
else
VSmooth2RGB1555(int(actVid2MemBufp->b), vgax<<1, vgax*(wndy-3)<<1);
}
-/*
- else{
- asm(//"int $3 \n\t"
- "1: \n\t"
- "movl (%%esi ), %%eax \n\t" //U LINE
- "movl (%%esi,%%edi,2), %%ebx \n\t" // V 1 LINE + 2
- "shrl $2, %%eax \n\t" //U
- "andl $0xFCFCFCFC, %%ebx \n\t" // V 1
- "shrl $2, %%ebx \n\t" //U
- "andl $0x3F3F3F3F, %%eax \n\t" // V 1
- "addl $0x00200020, %%eax \n\t" //U
- "addl $0x00200020, %%ebx \n\t" // V 1
- "xorl $0x00200020, %%eax \n\t" //U
- "xorl $0x00200020, %%ebx \n\t" // V 1
- "addl %%eax, %%ebx \n\t" //U
- "movl (%%esi,%%edi ), %%eax \n\t" // V 1 LINE + 1
- "shrl $1, %%eax \n\t" //U
- "andl $0x7F7F7F7F, %%ebx \n\t" // V 1
- "andl $0x7F7F7F7F, %%eax \n\t" //U
- "addl $0x00400040, %%ebx \n\t" // V 1
- "addl $0x00400040, %%eax \n\t" //U
- "xorl $0x00400040, %%ebx \n\t" // V 1
- "xorl $0x00400040, %%eax \n\t" //U
- "addl $4, %%esi \n\t" // V 1
- "addl %%eax, %%ebx \n\t" //U
- "decl %%ecx \n\t" // V 1
- "movl %%ebx, -4(%%esi ) \n\t" //U LINE
- " jnz 1b \n\t" // V 1
- :
- : "D" (vgax<<1), "S" (actVid2MemBufp->b),
- "c" (vgax * (wndy-2) >> 1)
- : "%eax", "%ebx", "%ecx", "%esi");
-
-
- }
- */
+ asm("emms\n\t");
}
Modified: trunk/2010/2_glob_a.asm
==============================================================================
--- trunk/2010/2_glob_a.asm (original)
+++ trunk/2010/2_glob_a.asm Wed Jul 4 10:39:35 2007
@@ -12,7 +12,7 @@ global _asm_temp4
segment .bss
-align4
+;align4
esp_save resd 1
_asm_temp resd 1
Modified: trunk/2010/2_hw.cpp
==============================================================================
--- trunk/2010/2_hw.cpp (original)
+++ trunk/2010/2_hw.cpp Wed Jul 4 10:39:35 2007
@@ -11,6 +11,7 @@
#include <time.h>
#include "2_all.h"
#include "2_hw.h"
+#include "2_hw_a.h"
#include "2_hw_asm.h"
#include "2_hw_mem.h"
#include "2_71x6.h"
@@ -26,13 +27,13 @@
extern volatile AllocList *alloclist;
extern volatile VID2MEMBUF *vid2MemBuf, *actVid2MemBufp;
extern volatile int grabVid2MemBuf, actVid2MemBuf;
- extern volatile int one_field;
+ extern volatile bool oneField;
extern volatile int active_meteor;
extern volatile meteor meteors[8];
extern volatile int fields;
extern byte *vidbuf;
- extern int vgax, vgay, xresc, xresvb, vb, mc, wndy, wndx;
- extern int *yuv2RgbLut;
+ extern int vgax, vgay, xresc, xresvb, vb, mc, wndy, wndx, outy;
+ extern unsigned short *yuv2RgbLut;
extern int single;
extern int in_int;
extern volatile int frames;
@@ -40,6 +41,7 @@
extern bool iState;
extern int infoPosX, infoPosY;
extern TVSTD TVStd;
+ extern int iVSmooth;
bool drop=false;
bool allowDrop=true;
@@ -51,133 +53,13 @@
int VID2MEMBUF::num;
-void mem2vid(byte *to, byte *from, int num, int bpp){
+void mem2vid(byte *to, byte *from, int num, int bpp, int y){
if(yuvMode!=0){
- asm(//"int $3 \n\t"
- "pushl %%ebp \n\t"
- "movl %%eax, %%ebp \n\t"
- "movl %%esp, (__esp) \n\t"
-
- "addl %%ecx, %%esi \n\t"
- "addl %%ecx, %%edi \n\t"
- "negl %%ecx \n\t"
- "movl (%%esi, %%ecx), %%eax \n\t"
- "movl %%esi, (__temp1) \n\t"
-
- "1: \n\t"
- "movl %%eax, %%edx \n\t"
- "andl $0xFE, %%eax \n\t"
-
- "movl %%edx, %%ebp \n\t"
- "andl $0xFE00, %%edx \n\t"
-
- "shrl $8, %%edx \n\t"
- "movl (%%ebx, %%eax, 4), %%esp \n\t"
-
- "shrl $16, %%ebp \n\t"
- "movl 4(%%ebx, %%eax, 4), %%eax \n\t"
-
- "movl 1024(%%ebx, %%edx, 4), %%esi \n\t"
- "movl 1028(%%ebx, %%edx, 4), %%edx \n\t"
-
- "addl %%esi, %%esp \n\t"
- "addl %%edx, %%eax \n\t"
-
- "movl %%ebp, %%edx \n\t"
- "andl $0xFE, %%ebp \n\t"
-
- "shrl $8, %%edx \n\t"
- "andl $0x7FEFF9FF, %%esp \n\t"
-
- "andl $0xFE, %%edx \n\t"
- "movl 2048(%%ebx, %%ebp, 4), %%esi \n\t"
-
- "andl $0x7FCFFBFF, %%eax \n\t"
- "movl 2052(%%ebx, %%ebp, 4), %%ebp \n\t"
-
- "addl %%esi, %%esp \n\t"
- "addl %%ebp, %%eax \n\t"
-
- "andl $0x7FEFF9FF, %%esp \n\t"
- "movl 3072(%%ebx, %%edx, 4), %%esi \n\t"
-
- "andl $0x7FCFFBFF, %%eax \n\t"
- "movl 3076(%%ebx, %%edx, 4), %%edx \n\t"
-
- "addl %%esi, %%esp \n\t"
- "addl %%edx, %%eax \n\t"
-
- "test $0x600C0180, %%esp \n\t"
- " jz 2f \n\t"
-
- "movl %%esp, %%esi \n\t"
- "movl %%esp, %%edx \n\t"
-
- "shrl $7, %%esi \n\t"
- "andl $0x40080100, %%edx \n\t"
-
- "shrl $8, %%edx \n\t"
- "andl $0x00400801, %%esi \n\t"
-
- "addl $0xEFDFFBFF, %%edx \n\t"
- "addl $0xEFDFFBFF, %%esi \n\t"
-
- "xorl $0x10200400, %%edx \n\t"
- "xorl $0xEFDFFBFF, %%esi \n\t"
-
- "orl %%esi, %%esp \n\t"
-
- "andl %%edx, %%esp \n\t"
-
- "2: \n\t"
- "test $0x600C0300, %%eax \n\t"
- " jz 3f \n\t"
-
- "movl %%eax, %%esi \n\t"
- "movl %%eax, %%edx \n\t"
-
- "shrl $8, %%esi \n\t"
- "andl $0x40080200, %%edx \n\t"
-
- "shrl $9, %%edx \n\t"
- "andl $0x00200401, %%esi \n\t"
-
- "addl $0xEFEFFDFF, %%edx \n\t"
- "addl $0xEFEFFDFF, %%esi \n\t"
-
- "xorl $0x10100200, %%edx \n\t"
- "xorl $0xEFEFFDFF, %%esi \n\t"
-
- "orl %%esi, %%eax \n\t"
-
- "andl %%edx, %%eax \n\t"
-
- "3: \n\t"
-
- "shrl $2, %%esp \n\t"
- "movl (__temp1), %%esi \n\t"
-
- "shll $3, %%eax \n\t"
- "andl $0x07E0F81F, %%esp \n\t"
-
- "andl $0xF81F07E0, %%eax \n\t"
-
- "orl %%eax, %%esp \n\t"
-
- "movl %%esp, (%%edi, %%ecx) \n\t"
-
- "movl 4(%%esi, %%ecx), %%eax \n\t"
- "addl $4, %%ecx \n\t"
-
- "jnc 1b \n\t"
-
- "movl (__esp), %%esp \n\t"
- "popl %%ebp \n\t"
- :
- : "b" (yuv2RgbLut), "S" (from), "D" (to), "c" (num<<1)
- : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi");
-
+ if((y&1) && !oneField && wndy==outy)
+ CopyYuv2Rgb2(int(from), int(to), num<<1, int(yuv2RgbLut));
+ else
+ CopyYuv2Rgb1(int(from), int(to), num<<1, int(yuv2RgbLut));
}else{
if(strangeRgb16){
asm("addl %%ecx, %%esi \n\t"
@@ -316,13 +198,13 @@ void copy_vidbuffer(void){
if(page_flip==0){
int p=0;
for(int y=0; y<goody; y++){
- mem2vid(vidbuf+p, actVid2MemBufp->b+p, goodx, bpp);
+ mem2vid(vidbuf+p, actVid2MemBufp->b+p, goodx, bpp, y);
p+=vgax<<1;
}
}else if(page_flip==2){
int p=0;
for(int y=0; y<goody; y++){
- mem2vid(vidbuf+xresvb*vgay+p, actVid2MemBufp->b+p, goodx, bpp);
+ mem2vid(vidbuf+xresvb*vgay+p, actVid2MemBufp->b+p, goodx, bpp, y);
p+=vgax<<1;
}
page_flip=1;
@@ -330,7 +212,7 @@ void copy_vidbuffer(void){
}else{
int p=0;
for(int y=0; y<goody; y++){
- mem2vid(vidbuf+p, actVid2MemBufp->b+p, goodx, bpp);
+ mem2vid(vidbuf+p, actVid2MemBufp->b+p, goodx, bpp, y);
p+=vgax<<1;
}
page_flip=2;
@@ -339,6 +221,8 @@ void copy_vidbuffer(void){
}
drop=false;
+ asm("emms\n\t");
+
if(iState){
T2=uclock();
}
Added: trunk/2010/2_hw_a.asm
==============================================================================
--- (empty file)
+++ trunk/2010/2_hw_a.asm Wed Jul 4 10:39:35 2007
@@ -0,0 +1,231 @@
+;2010 0.1 Copyright (C) Michael Niedermayer 1999
+
+%include "2_all_a.asm"
+
+segment .data
+
+extern esp_save
+
+global _CopyYuv2Rgb1__Fiiii
+global _CopyYuv2Rgb2__Fiiii
+
+%define pYuv par2
+%define pRgb par3
+%define nOp par4
+%define pLut par5
+%define nbX par6
+
+_CopyYuv2Rgb1__Fiiii:
+ push eax
+ push ebx
+ push ecx
+ push edx
+ push esi
+ push edi
+ push ebp
+
+ mov [esp_save], esp
+
+ mov esi, [pYuv]
+ mov edi, [pRgb]
+ mov ecx, [nOp]
+ mov esp, [pLut]
+ add esi, ecx
+ add edi, ecx
+ xor ecx, byte -1
+ inc ecx
+
+ movq mm6, [uMask]
+ movq mm7, [vMask]
+align16
+CopyLoop1:
+ movq mm0, [esi+ecx] ;4p2 wMM0rESIECX 1 0 3
+ movq mm1, [esi+ecx] ;4p2 wMM1rESIECX 10 1 4
+ movq mm2, [esi+ecx] ;4p2 wMM2rESIECX 10 2 5
+
+ psrlw mm0, 11 ;4p1 wMM0rMM0 0 1
+ ;iFetch
+ pand mm1, mm6 ;3p01wMM1rMM6MM1 1 2
+ pand mm2, mm7 ;3p01wMM2rMM7MM2 1 3
+ pslld mm1, 2 ;4p1 wMM1rMM1 0 3
+
+ psrld mm2, 9 ;4p1 wMM2rMM2 0 4
+ ;iFetch
+ por mm1, mm2 ;3p01wMM1rMM2MM1 0 5
+ movq mm2, mm1 ;3p01wMM2rMM1 0
+ pslld mm1, 16 ;4p1 wMM1rMM1 0
+
+ por mm0, mm2 ;3p01wMM0rMM2MM0 0
+ por mm0, mm1 ;3p01wMM0rMM1MM0 ?0
+ ;iFetch
+ movd eax, mm0 ;3p01wEAXrMM0 0
+ movzx ebx, ax ;3p01wEBXrEAX 0
+ shr eax, 16 ;3p0 wEAXrEAX 0
+
+ movzx eax, word [esp + eax*2] ;4p2 wEAXrESPEAX 10
+ shl eax, 16 ;3p0 wEAXrEAX 0
+ ;iFetch
+ movzx ebx, word [esp + ebx*2] ;4p2 wEBXrESPEBX 1
+ psrlq mm0, 32 ;4p1 wMM0rMM0 0
+ or eax, ebx ;2p01wEAXrEBXEAX 0
+
+ mov [edi+ecx], eax ;3p3 rEDIECX 2 0 p4 rEAX 0
+ movd eax, mm0 ;3p01wMM0rMM1MM0 ?00
+ ;iFetch
+ movzx ebx, ax ;3p01wEBXrEAX 0
+ shr eax, 16 ;3p0 wEAXrEAX 0
+ movzx ebx, word [esp + ebx*2] ;4p2 wEBXrESPEBX 1
+
+ movzx eax, word [esp + eax*2] ;4p2 wEAXrESPEAX 10
+ ;iFetch
+ shl eax, 16 ;3p0 wEAXrEAX 0
+ or eax, ebx ;2p01wEAXrEBXEAX 0
+
+ mov [edi+ecx+4], eax ;3p3 rEDIECX 2 1 p4 rEAX 0
+ add ecx, byte 8 ;3p01wECXrECX 110 0
+ jnc CopyLoop1 ;2p1 rFLAG 0
+ ; 13 4 6 7 2 2 12+ cyc
+ ; fetch/decode 14 cyc
+
+
+
+
+; movzx eax, byte [esi+ecx ] ;p2 wEAXrESIECX
+; movzx ebp, byte [esi+ecx+1] ;p2 wEBPrESIECX
+; movzx edx, byte [esi+ecx+2] ;p2 wEDXrESIECX
+; movzx ebx, byte [esi+ecx+3] ;p2 wEBXrESIECX
+; and edx, 0xF8 ;p01wEDXrEDX
+; and eax, 0xF8 ;p01wEAXrEAX
+; shr ebp, 3 ;p0 wEBPrEBP
+; shr ebx, 3 ;p0 wEBXrEBX
+; shl edx, 7 ;p0 wEDXrEDX
+; lea eax, [edx + eax*4] ;p0 wEAXrEDXEAX
+; or ebp, eax ;p01wEBPrEAXEBP
+; or ebx, eax ;p01wEBXrEAXEBX
+; movzx ebp, word [esp + ebp*2] ;p2 wEBPrESPEBP
+; movzx ebx, word [esp + ebx*2] ;p2 wEBXrESPEBX
+; shl ebx, 16 ;p0 wEBXrEBX
+; or ebp, ebx ;p01wEBPrEBXEBP
+; mov [edi+ecx], ebp ;p3 rEDIECX p4 rEBP
+; add ecx, byte 4 ;p01wECXrECX
+; jnc NEAR CopyLoop1 ;p1 rFLAG
+ ; 6 5 1 6 1 1 7+ cyc asy
+
+ mov esp, [esp_save]
+
+ pop ebp
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop ebx
+ pop eax
+ret
+
+_CopyYuv2Rgb2__Fiiii:
+ push eax
+ push ebx
+ push ecx
+ push edx
+ push esi
+ push edi
+ push ebp
+
+ mov [esp_save], esp
+
+ mov esi, [pYuv]
+ mov edi, [pRgb]
+ mov ecx, [nOp]
+ mov esp, [pLut]
+ add esi, ecx
+ add edi, ecx
+ xor ecx, byte -1
+ inc ecx
+
+ movq mm7, [antiSigned]
+ movq mm6, [bit0Mask]
+ movq mm5, [bit20Mask]
+
+align16
+CopyLoop2:
+ movq mm0, [esi+ecx-1600]
+ movq mm1, [esi+ecx+1600]
+ paddb mm0, mm7
+ paddb mm1, mm7
+ pand mm0, mm6
+ pand mm1, mm6
+ psrlw mm0, 1
+ psrlw mm1, 1
+ paddb mm0, mm1
+ psubb mm0, mm7
+ pand mm0, mm5
+ movd ebx, mm0
+ psrlq mm0, 32
+
+ movzx eax, bl
+ movzx ebp, bh
+ shr ebx, 16
+ movzx edx, bl
+ movzx ebx, bh
+
+ shr ebp, 3
+ shr ebx, 3
+ shl edx, 7
+ lea eax, [edx + eax*4]
+ or ebp, eax
+ or ebx, eax
+
+ movzx ebp, word [esp + ebp*2]
+ movzx ebx, word [esp + ebx*2]
+ shl ebx, 16
+ or ebp, ebx
+
+ mov [edi+ecx], ebp
+
+
+ movd ebx, mm0
+
+ movzx eax, bl
+ movzx ebp, bh
+ shr ebx, 16
+ movzx edx, bl
+ movzx ebx, bh
+
+ shr ebp, 3
+ shr ebx, 3
+ shl edx, 7
+ lea eax, [edx + eax*4]
+ or ebp, eax
+ or ebx, eax
+
+ movzx ebp, word [esp + ebp*2]
+ movzx ebx, word [esp + ebx*2]
+ shl ebx, 16
+ or ebp, ebx
+
+ mov [edi+ecx+4], ebp
+
+ add ecx, byte 8
+ jnc NEAR CopyLoop2
+
+ mov esp, [esp_save]
+
+ pop ebp
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop ebx
+ pop eax
+ret
+
+xyz times 8 dd 0
+
+align8
+antiSigned times 2 dd 0x00800080
+bit0Mask times 2 dd 0xFEFEFEFE
+bit20Mask times 2 dd 0xF8F8F8F8
+uMask times 2 dd 0x000000F8
+vMask times 2 dd 0x00F80000
+
+align16
Added: trunk/2010/2_hw_a.h
==============================================================================
--- (empty file)
+++ trunk/2010/2_hw_a.h Wed Jul 4 10:39:35 2007
@@ -0,0 +1,8 @@
+//2010 0.1 Copyright (C) Michael Niedermayer 1998
+#ifndef n2_hw_a_h
+#define n2_hw_a_h
+
+void CopyYuv2Rgb1(int, int, int, int);
+void CopyYuv2Rgb2(int, int, int, int);
+
+#endif
Modified: trunk/2010/2_hw_mem.cpp
==============================================================================
--- trunk/2010/2_hw_mem.cpp (original)
+++ trunk/2010/2_hw_mem.cpp Wed Jul 4 10:39:35 2007
@@ -25,14 +25,18 @@ bool checkAlign(void *p, int align){
void *newAlign(int size, int align){
int v= int(new char[size + align + 3]);
+#ifdef debug
printf("anew %d\n", v);
+#endif
int v2= ( (v+align-1+4) & ~(align-1) );
((int*)v2)[-1]= v;
return (void*)v2;
}
void deleteAlign(void *v){
+#ifdef debug
printf("adel %d\n", int(((char**)v)[-1]) );
+#endif
delete [] ((char**)v)[-1];
}
@@ -360,7 +364,6 @@ void vds_free(dds *vds_info){
vd=vd->next;
if(vd==vdslist && vd->vds_info.phys==vds_info->phys ){
- printf("HALLO1\n");
vdslist= vdslist->next;
delete vd;
}else if(vd!=vdslist && vd->next!=NULL){
Modified: trunk/2010/2_nag.cpp
==============================================================================
--- trunk/2010/2_nag.cpp (original)
+++ trunk/2010/2_nag.cpp Wed Jul 4 10:39:35 2007
@@ -18,7 +18,7 @@
#include "2_hw_mem.h"
#include "2010.h"
-#define dwnSmp 1
+#define dwnSmp 3
#define FREQ_CHROM 4433618.75
#define FREQ_HS 15625.0
@@ -244,9 +244,10 @@ void nag_decrypt(void){
if(nagSizeAll!=NAG_LINES) error(Nagra);
+#ifdef debug
printf("%d %d\n", sizeof(u_short), sizeof(BEST2) );
+#endif
- printf("HALLOX\n");
static bool first=true;
static char *sinPerLineMulX=new char[(NAG_LINES+32)<<8];
@@ -626,7 +627,7 @@ void nag_decrypt(void){
if(!isWss) wssLine=32;
byte * const lowRes=(byte*) newAlign( wndx>>dwnSmpSize, 8);
- for(int line=0; line<NAG_LINES; line++){
+ for(int line=0; line<NAG_LINES; line++){ // vtune some btb misspredicts
byte * const linep= actVid2MemBufp->b + nagLineLogi2Phys(line)*(vgax<<1);
// printf("%d\n", line);
doDwnSmp(lowRes, linep);
@@ -639,11 +640,9 @@ void nag_decrypt(void){
}
}
if(mmx) asm("emms\n\t");
- printf("H1\n");
deleteAlign(decoLowRes);
deleteAlign(lowRes);
- printf("H2\n");
if(iState){
T2=uclock();
@@ -678,7 +677,6 @@ void nag_decrypt(void){
keysListPos[keysListPoses]= best2[l].keyNdx;
keysListPoses++;
}
- printf("H3\n");
if(iState){
T2=uclock();
@@ -736,7 +734,9 @@ void nag_decrypt(void){
int perm[NAG_LINES];
getPerm(bestKey, perm);
+#ifdef debug
printf("%X\n", bestKey);
+#endif
delete [] bestCoeffs; // kill this
@@ -915,7 +915,6 @@ void nag_decrypt(void){
T1=T2;
}
- printf("H4\n");
/*
for(int i=0; i<NAG_LINES; i++){
const int p=nagLineLogi2Phys(i);
@@ -992,12 +991,12 @@ void nag_decrypt(void){
}
- if(outy == wndy){
+/* if(outy == wndy){
for(int i=0; i<outy; i+=2){
memcpy(actVid2MemBufp->b + ((i+1) * vgax<<1),
actVid2MemBufp->b + (i * vgax<<1) , wndx << 1);
- }
- }
+ }
+ } */
delete [] copyBuff;
Modified: trunk/2010/2_nag_a.asm
==============================================================================
--- trunk/2010/2_nag_a.asm (original)
+++ trunk/2010/2_nag_a.asm Wed Jul 4 10:39:35 2007
@@ -36,12 +36,12 @@ _nagraCorr__Fiiiii:
and edx, 0xFFFFFFF8
- sub edx, 8
+ sub edx, byte 8
add esi, edx
add edi, edx
- xor edx, -1
+ xor edx, byte -1
mov eax, [_mmx]
inc edx
@@ -68,21 +68,26 @@ _nagraCorr__Fiiiii:
movq mm6, [mmx_clip]
+align16
MMXloop:
- psubusb mm2, mm1 ; U
- psubusb mm1, mm0 ; V 1
- movq mm0, [esp + esi + 8] ; U
- por mm2, mm1 ; V 1
- movq mm1, [esp + edi + 8] ; U
- paddusb mm2, mm6 ; V 1
- movq mm4, mm2 ; U
- punpcklbw mm2, mm3 ; V 1
- punpckhbw mm4, mm3 ; U
- paddusw mm5, mm2 ; V 1
- paddusw mm5, mm4 ; U
- movq mm2, mm0 ; V 1
- add esp, 8 ; U
- jnc MMXloop ; V 1
+ psubusb mm2, mm1 ; p01wMM2rMM1MM2 0
+ psubusb mm1, mm0 ; p01wMM1rMM0MM1 1
+ por mm2, mm1 ; p01wMM2rMM1MM2 0
+ movq mm0, [esp + esi + 8] ; p2 wMM0rESPESI 1
+ paddusb mm2, mm6 ; p01wMM2rMM6MM2 1
+ movq mm4, mm2 ; p01wMM4rMM2 0
+ punpcklbw mm2, mm3 ; p1 wMM2rMM3MM2 1
+ punpckhbw mm4, mm3 ; p1 wMM4rMM3MM4 (1)
+ paddusw mm5, mm2 ; p01wMM5rMM2MM5 1
+ paddusw mm5, mm4 ; p01wMM5rMM4MM5 (1)
+ movq mm2, mm0 ; p01wMM2rMM0 0
+ movq mm1, [esp + edi + 8] ; p2 wMM1rESPEDI 2
+ add esp, byte 8 ; p01wESPrESP 1
+ jnc MMXloop ; p1 rFLAG
+ ; 5+ cyc deco
+ ; 5- cyc rat
+ ; 6 cyc exec (0/1 satur)
+
psubusb mm2, mm1
psubusb mm1, mm0
@@ -201,3 +206,5 @@ xyz times 8 dd 0
align8
mmx_clip times 8 db 0
+
+align16
Modified: trunk/2010/2_vc.cpp
==============================================================================
--- trunk/2010/2_vc.cpp (original)
+++ trunk/2010/2_vc.cpp Wed Jul 4 10:39:35 2007
@@ -37,15 +37,14 @@
#define BAD_THRESHOLD1 0.8
#define BAD_THRESHOLD2 200
#define EDGE_LIMIT 70
-#define MAX_EDGE 500
-#define EDGE_LOG_COEFF 10
+#define MAX_EDGE 500 // ? 2000 ?
#define PHASE_EXP 4
#define PHASE_LIMIT 40
#define MAX_PHASE_PENALTY 1000
#define AMP_THRESHOLD 200
#define PHASE_COEFF (MAX_PHASE_PENALTY/pow(PHASE_LIMIT, PHASE_EXP))
-#define EDGE_COEFF (MAX_EDGE/log(EDGE_LIMIT*EDGE_LOG_COEFF))
+#define EDGE_COEFF (MAX_EDGE/log(EDGE_LIMIT))
#define NUM_PHASE_SMP (vcDiff>>PHASE_CMP_STEP)
@@ -85,15 +84,18 @@ void vc_decrypt(void){
const int lowResSize= mmx ? DWN_SMP : (DWN_SMP-1);
+#ifdef debug
printf("xx%f %f %f\n", PHASE_POINT/PI*180, PHASE_SEG/PI*180, PHASE_SMP/PI*180);
+#endif
int vScore=0;
// showPoints=true;
const double freqPix2= double(wndx) / double(x_field) * FREQ_PIX;
+#ifdef debug
printf("xy%d %d\n", wndx, x_field);
-
+#endif
// 18
// const int vcStartX=int(double(16 -scales_x+3)/x_field*wndx + .5) + some - 10;
const int vcStartX=int(double(16 -scales_x+3)/x_field*wndx + .5);
@@ -121,7 +123,9 @@ void vc_decrypt(void){
const int vcEndX= vcStartX + int( double(POINT_LAND + NO_POINT_LAND)
* freqPix2 / FREQ_SMP + .5);
+#ifdef debug
printf("vcEndX %d\n",vcEndX);
+#endif
if(vcEndX>wndx) error(error_code(-123));
const int vcDiff=vcEndX - vcStartX;
@@ -147,7 +151,7 @@ void vc_decrypt(void){
static int phaseLut[256];
static bool first= true;
- static byte phasePerCutP[256];
+ static int phasePerCutP[256];
static char sin1PerCutP[256];
static char cos1PerCutP[256];
static char sin2PerCutP[256];
@@ -158,11 +162,11 @@ void vc_decrypt(void){
fields++;
if(first){
- some=10;
+ if(first) some=10;
for(int i=0; i<512; i++){
const int j= min(i+1, EDGE_LIMIT);
- edgeLut[i]=int( log(j*EDGE_LOG_COEFF)*EDGE_COEFF );
+ edgeLut[i]=int( log(j)*EDGE_COEFF );
}
for(int i=0; i<256; i++){
@@ -291,6 +295,7 @@ void vc_decrypt(void){
|| bestDiff+(bestDiff>>1) > addapBuff[ cutP - (step>>1) ]
|| bestDiff+(bestDiff>>1) > addapBuff[ cutP + (step>>1) ]){
+
int diff;
if(mmx){
diff = vc_corr_mmx(
@@ -525,14 +530,15 @@ void vc_decrypt(void){
const byte a= (line & 1) ^ ( vScore>0 ) ? -ang: ang;
- const byte phase1= a + phasePerCutP[ lastCutP ];
+ const int phase1= (a + phasePerCutP[ lastCutP ]) & 0xFF;
+ const int lastPhaseErrLastCutP= lastPhaseErr[lastCutP] + 128;
// FIX (ASM_OPTIMIZE)
if(chromAmp>AMP_THRESHOLD){
for(int newCutP= newCutPStart; newCutP <= newCutPEnd; newCutP++){
- const byte phaseErr= phase1 + phasePerCutP[ newCutP ];
+ const int phaseErr= (phase1 + phasePerCutP[ newCutP ]) & 0xFF;
const int val= lastVal[lastCutP] - driftPenalty[drift]
- phaseLut[ phaseErr ]
- - phaseLut[ (phaseErr - lastPhaseErr[lastCutP] + 128) & 0xFF];// useless without phase errors from decomb-filter
+ - phaseLut[ (phaseErr - lastPhaseErrLastCutP) & 0xFF];// useless without phase errors from decomb-filter
if(newVal[newCutP] < val){
newVal[newCutP]= val;
@@ -547,7 +553,7 @@ void vc_decrypt(void){
else{
linep[ ((vcStartX - 1)<<1) + 1]=255;
for(int newCutP= newCutPStart; newCutP <= newCutPEnd; newCutP++){
- const byte phaseErr= phase1 + phasePerCutP[ newCutP ];
+ const int phaseErr= (phase1 + phasePerCutP[ newCutP ]) & 0xFF;
const int val= lastVal[lastCutP] - driftPenalty[drift];
if(newVal[newCutP] < val){
@@ -773,10 +779,12 @@ void vc_decrypt(void){
bestCutP= dir[(line<<8) + bestCutP];
}
+#ifdef debug
for(int line=1; line<wndy-1; line++)
{
fprintf(pF, "%d\n", cutpp[line]);
}
+#endif
/*
float avgCutP=0;
@@ -788,8 +796,9 @@ void vc_decrypt(void){
printf("avg%f\n", avgCutP-float(vcDiff)/2.0);
*/
+#ifdef debug
printf("vScore %d\n", vScore);
-
+#endif
if(iState){
T2=uclock();
@@ -811,11 +820,11 @@ void vc_decrypt(void){
infoPosY+=5;
T1=T2;
}
-
+/*
c.init(255, 0, 0, false);
sprintf(textbuf[6],"%d l %d f", cutPs, fields);
gprint(infoPosX, infoPosY+=10, c.col, textbuf[6]);
-
+ */
delete [] bestDir;
delete [] dir;
Modified: trunk/2010/2_vc_a.asm
==============================================================================
--- trunk/2010/2_vc_a.asm (original)
+++ trunk/2010/2_vc_a.asm Wed Jul 4 10:39:35 2007
@@ -45,7 +45,7 @@ _vc_corr__Fiiiiii:
xor ebp, ebp
xor edx, edx
- cmp esp, 4
+ cmp esp, byte 4
jb skip1
mov eax, [esp + esi]
@@ -91,7 +91,7 @@ skip1:
mov esp, [count2]
xor edx, edx
- cmp esp, 4
+ cmp esp, byte 4
jb skip2
mov eax, [esp + esi]
@@ -123,7 +123,7 @@ NoMMXloop2:
or eax, 0x80008000 ; V 1
xor edx, ecx ; U
mov ebx, [esp + edi + 8] ; V 1
- add esp, 8 ; U
+ add esp, byte 8 ; U
jnc NoMMXloop2 ; V 1
skip2:
@@ -147,7 +147,6 @@ skip2:
ret
-
_vc_corr_mmx__Fiiiiii:
; int3
push ebx
@@ -162,29 +161,29 @@ _vc_corr_mmx__Fiiiiii:
mov ebx, edi
mov ecx, [mcount1]
- and ecx, -8
- and ebx, 7
+ and ecx, byte -8
+ and ebx, byte 7
shl ebx, 3
- and edi, -8
+ and edi, byte -8
add esi, ecx
movd mm6, ebx
pxor mm7, mm7
- xor ebx, -1
+ xor ebx, byte -1
add edi, ecx
- add ebx, 65
- xor ecx, -1
+ add ebx, byte 65
+ xor ecx, byte -1
inc ecx
movd mm5, ebx
pxor mm4,mm4
- cmp ecx, 8
+ cmp ecx, byte 8
jb skipMMX1
movq mm3, [ecx + edi ]
@@ -197,24 +196,28 @@ _vc_corr_mmx__Fiiiiii:
por mm1, mm3
+align16
MMXLoop:
- movq mm2, mm0 ; U
- psubusb mm0, mm1 ; V 1
- movq mm3, [ecx + edi + 8 ] ; U
- psubusb mm1, mm2 ; V 1
- por mm1, mm0 ; U
- psrlq mm3, mm6 ; V 1
- movq mm2, mm1 ; U
- punpcklbw mm1, mm4 ; V 1
- punpckhbw mm2, mm4 ; U
- paddusw mm7, mm1 ; V 1
- movq mm1, [ecx + edi + 16] ; U
- paddusw mm7, mm2 ; V 1
- movq mm0, [ecx + esi + 8 ] ; U
- psllq mm1, mm5 ; V 1
- por mm1, mm3 ; U
- add ecx, 8 ; V 1
- jnc MMXLoop ; U 1
+ movq mm2, [ecx + esi ] ;0p2 wMM0rECXESI 1
+ psubusb mm0, mm1 ; p01wMM0rMM1MM0 0
+ psubusb mm1, mm2 ; p01wMM1rMM2MM1 0
+ movq mm3, [ecx + edi + 8 ] ;0p2 wMM3rECXEDI 1
+ por mm1, mm0 ; p01wMM1rMM0MM1 0
+ movq mm0, [ecx + esi + 8 ] ; p2 wMM0rECXESI 1
+ psrlq mm3, mm6 ;0p0 wMM3rMM6 1
+ movq mm2, mm1 ; p01wMM2rMM1 0
+ punpcklbw mm1, mm4 ; p0 wMM1rMM4MM1 1
+ punpckhbw mm2, mm4 ;0p0 wMM2rMM4MM2 10
+ paddusw mm7, mm1 ; p01wMM7rMM1MM7 1
+ paddusw mm7, mm2 ; p01wMM7rMM2MM7 10
+ movq mm1, [ecx + edi + 16] ;0p2 wMM1rECXEDI 2
+ psllq mm1, mm5 ; p0 wMM1rMM5 1
+ por mm1, mm3 ; p01wMM1rMM3MM1 1?
+ add ecx, byte 8 ;0p01wECXrECX 1
+ jnc MMXLoop ; p1 rFLAG
+; 8 4 1 4 7 cyc asy
+; 6+ cyc deco
+; 7 cyc rat
skipMMX1:
@@ -224,27 +227,27 @@ skipMMX1:
mov ebx, edi
mov ecx, [mcount2]
- and ecx, -8
- and ebx, 7
+ and ecx, byte -8
+ and ebx, byte 7
shl ebx, 3
- and edi, -8
+ and edi, byte -8
add esi, ecx
movd mm6, ebx
- xor ebx, -1
+ xor ebx, byte -1
add edi, ecx
- add ebx, 65
- xor ecx, -1
+ add ebx, byte 65
+ xor ecx, byte -1
inc ecx
movd mm5, ebx
- cmp ecx, 8
+ cmp ecx, byte 8
jb skipMMX2
movq mm3, [ecx + edi ]
@@ -257,24 +260,25 @@ skipMMX1:
por mm1, mm3
+align16
MMXLoop2:
- movq mm2, mm0 ; U
- psubusb mm0, mm1 ; V 1
- movq mm3, [ecx + edi + 8 ] ; U
- psubusb mm1, mm2 ; V 1
- por mm1, mm0 ; U
- psrlq mm3, mm6 ; V 1
- movq mm2, mm1 ; U
- punpcklbw mm1, mm4 ; V 1
- punpckhbw mm2, mm4 ; U
- paddusw mm7, mm1 ; V 1
- movq mm1, [ecx + edi + 16] ; U
- paddusw mm7, mm2 ; V 1
- movq mm0, [ecx + esi + 8 ] ; U
- psllq mm1, mm5 ; V 1
- por mm1, mm3 ; U
- add ecx, 8 ; V 1
- jnc MMXLoop2 ; U 1
+ movq mm2, [ecx + esi ] ;0p2 wMM0rECXESI 1
+ psubusb mm0, mm1 ; p01wMM0rMM1MM0 0
+ psubusb mm1, mm2 ; p01wMM1rMM2MM1 0
+ movq mm3, [ecx + edi + 8 ] ;0p2 wMM3rECXEDI 1
+ por mm1, mm0 ; p01wMM1rMM0MM1 0
+ movq mm0, [ecx + esi + 8 ] ; p2 wMM0rECXESI 1
+ psrlq mm3, mm6 ;0p0 wMM3rMM6 1
+ movq mm2, mm1 ; p01wMM2rMM1 0
+ punpcklbw mm1, mm4 ; p0 wMM1rMM4MM1 1
+ punpckhbw mm2, mm4 ;0p0 wMM2rMM4MM2 10
+ paddusw mm7, mm1 ; p01wMM7rMM1MM7 1
+ paddusw mm7, mm2 ; p01wMM7rMM2MM7 10
+ movq mm1, [ecx + edi + 16] ;0p2 wMM1rECXEDI 2
+ psllq mm1, mm5 ; p0 wMM1rMM5 1
+ por mm1, mm3 ; p01wMM1rMM3MM1 1?
+ add ecx, byte 8 ;0p01wECXrECX 1
+ jnc MMXLoop2 ; p1 rFLAG
skipMMX2:
@@ -302,3 +306,4 @@ ret
+align16
Modified: trunk/2010/makefile
==============================================================================
--- trunk/2010/makefile (original)
+++ trunk/2010/makefile Wed Jul 4 10:39:35 2007
@@ -1,7 +1,7 @@
.SUFFIXES: .c .cc .cpp .h .o .asm .S .s
OPTS = -g
-OPTS = -O3 -mpentium -fomit-frame-pointer -g -Wall -Wstrict-prototypes -malign-double -ffast-math -funroll-loops -fno-default-inline
+OPTS = -O5 -mpentiumpro -fomit-frame-pointer -g -Wall -Wstrict-prototypes -malign-double -ffast-math -funroll-loops -fno-default-inline
AFLAGS = -f coff
.c.o: ; gcc $(CFLAGS) -c $<
@@ -14,13 +14,15 @@ AFLAGS = -f coff
.asm.o: ; nasm $(AFLAGS) $<
-O = 2010.o 2_file.o 2_grafix.o 2_hw.o 2_gfunc.o \
- 2_hw_asm.o 2_71x6.o 2_hw_mem.o 2_crypt.o\
- 2_glob_a.o 2_txt.o 2_cryp_a.o
+O = 2_glob_a.o 2_cryp_a.o 2_hw_a.o\
+ 2010.o 2_file.o 2_grafix.o 2_hw.o 2_gfunc.o \
+ 2_hw_asm.o 2_71x6.o 2_hw_mem.o 2_crypt.o 2_txt.o
+
-OC = 2010.o 2_file.o 2_grafix.o 2_hw.o 2_gfunc.o \
- 2_hw_asm.o 2_71x6.o 2_hw_mem.o 2_crypt.o 2_vc.o 2_nag.o\
- 2_nag_a.o 2_vc_a.o 2_glob_a.o 2_txt.o 2_cryp_a.o
+OC = 2_nag_a.o 2_vc_a.o 2_glob_a.o 2_cryp_a.o 2_hw_a.o\
+ 2010.o 2_file.o 2_grafix.o 2_hw.o 2_gfunc.o \
+ 2_hw_asm.o 2_71x6.o 2_hw_mem.o 2_crypt.o 2_vc.o 2_nag.o 2_txt.o
+
default: ;$(MAKE) 2010 CFLAGS="$(OPTS)"
@@ -36,7 +38,7 @@ clean: ; del $(O) 2010
2_gfunc.o : 2_gfunc.cpp 2_all.h 2_gfunc.h 2010.h
2_grafix.o : 2_grafix.cpp 2_all.h 2_grafix.h 2010.h 2_gfunc.h 2_hw.h 2_file.h
2_hw.o : 2_hw.cpp 2_all.h 2_hw.h 2010.h 2_hw_asm.h 2_hw_mem.h\
- 2_71x6.h 2_mmx.h
+ 2_71x6.h 2_mmx.h 2_hw_a.h
2_hw_mem.o : 2_hw_mem.cpp 2_all.h 2_hw_mem.h 2010.h
2_71x6.o : 2_71x6.cpp 2_all.h 2_71x6.h 2_hw.h 2_hw_mem.h 2010.h 2_hw_asm.h
2_crypt.o : 2_crypt.cpp 2_all.h 2_crypt.h 2_71x6.h 2_vc.h 2_nag.h 2_txt.h
@@ -51,4 +53,5 @@ clean: ; del $(O) 2010
2_nag_a.o : 2_nag_a.asm 2_all_a.asm
2_vc_a.o : 2_vc_a.asm 2_all_a.asm
2_cryp_a.o : 2_cryp_a.asm 2_all_a.asm
+2_hw_a.o : 2_hw_a.asm 2_all_a.asm
More information about the Mndiff-dev
mailing list