[MN-dev] [mndiff]: r59 - in trunk/2010: 2010.cpp 2_all_a.asm 2_cryp_a.asm 2_cryp_a.h 2_crypt.cpp 2_glob_a.asm 2_hw.cpp 2_hw_a.asm 2_hw_a.h 2_hw_mem.cpp 2_nag.cpp 2_nag_a.asm 2_vc.cpp 2_vc_a.asm makefile

michael subversion at mplayerhq.hu
Wed Jul 4 10:39:35 CEST 2007


Author: michael
Date: Wed Jul  4 10:39:35 2007
New Revision: 59

Log:
next version from unknown date


Added:
   trunk/2010/2_hw_a.asm
   trunk/2010/2_hw_a.h
Modified:
   trunk/2010/2010.cpp
   trunk/2010/2_all_a.asm
   trunk/2010/2_cryp_a.asm
   trunk/2010/2_cryp_a.h
   trunk/2010/2_crypt.cpp
   trunk/2010/2_glob_a.asm
   trunk/2010/2_hw.cpp
   trunk/2010/2_hw_mem.cpp
   trunk/2010/2_nag.cpp
   trunk/2010/2_nag_a.asm
   trunk/2010/2_vc.cpp
   trunk/2010/2_vc_a.asm
   trunk/2010/makefile

Modified: trunk/2010/2010.cpp
==============================================================================
--- trunk/2010/2010.cpp	(original)
+++ trunk/2010/2010.cpp	Wed Jul  4 10:39:35 2007
@@ -1,3 +1,4 @@
+
 //2010 0.1 Copyright (c) 1998 Michael Niedermayer
 
 #include <stdlib.h>
@@ -55,7 +56,7 @@ int _crt0_startup_flags = _CRT0_FLAG_NEA
  byte font[4096];
  int vid=0;
  int bpp;
- int *yuv2RgbLut;
+ unsigned short *yuv2RgbLut;
  int edge_lut[256];
  MENULEVEL menuLevel=mLNorm;
  int infoPosX;
@@ -174,7 +175,8 @@ int main(int argc, char **argv){
     NoFlip=1;
   }
   else if(stricmp(argv[i],"-NORGB")==0){
-    yuvMode=2;
+    yuvMode=
+    grabYuvMode=2;
   }
   else if(stricmp(argv[i],"-maxBuf")==0){
     i++;
@@ -228,35 +230,23 @@ int main(int argc, char **argv){
  }
 
  printf("Building YUV to RGB LookUp Tables!\n");
- yuv2RgbLut=new int[128*4*2];
- for(int i=0; i<128; i++){
-   int j;
-   if(i>=64) j=i-128;
-   else      j=i;                        
-   double uv= double(j);
-
-   int vr= int( uv * 1.375       ) & 0x1FF;
-   int vg= int(-uv * 0.703125 * 2) & 0x3FF;
-   int ug= int(-uv * 0.34375  * 2) & 0x3FF;
-   int ub= int( uv * 1.734375    ) & 0x1FF;
-   int yn= i;
-   int yg=  i<<1;
-
-// 565565
-// 655655
+ yuv2RgbLut=new unsigned short[256*128];
+ for(int u=-16; u<=15; u++)
+    {
+    for(int v=-16; v<=15; v++)
+        {
+        for(int y=0; y<=31; y++)
+            {
+            int r= minmax(0, int( y + double(v) * 1.375                       ), 31); 
+            int g= minmax(0, int((y - double(v) * 0.703 - double(u) * 0.343)*2), 63); 
+            int b= minmax(0, int( y                     + double(u) * 1.734   ), 31); 
 
-//   B R G
-//    G B R
+            const int xLut= y + ((u&0x1F)<<5) + ((v&0x1F)<<10);
+            yuv2RgbLut[ xLut ]= b + (g<<5) + (r<<11);
 
-   yuv2RgbLut[(i<<1)       ]= ub                     +(ug<<21)         ;
-   yuv2RgbLut[(i<<1) + 1   ]=    ug         +(ub<<11)                  ;
-   yuv2RgbLut[(i<<1) + 256 ]= yn   +(yn<<11)                           ;
-   yuv2RgbLut[(i<<1) + 257 ]=    yg                                    ;
-   yuv2RgbLut[(i<<1) + 512 ]=       (vr<<11)         +(vg<<21)         ;
-   yuv2RgbLut[(i<<1) + 513 ]=    vg                           +(vr<<22);
-   yuv2RgbLut[(i<<1) + 768 ]=                        +(yg<<21)         ;
-   yuv2RgbLut[(i<<1) + 769 ]=               +(yn<<11)         +(yn<<22);
- }
+            }
+        }
+    }
 
  if((vib=get_vesa_info())==0) error(NoVESA);
  if(vib->ver < 0x200) error(VESAVer);
@@ -294,11 +284,6 @@ int main(int argc, char **argv){
 
  vgax=mib->Xres;
  vgay=mib->Yres;
-/* if(yuvMode!=0) mc=1;
- else mc=2;
- if(yuvMode==2) mc2=1;
- else mc2=2;
-*/
  mc=mc2=1;
  xresc=vgax<<mc;
  bpp=mib->bpp;
@@ -436,7 +421,7 @@ int main(int argc, char **argv){
                        case 'I'  : iState= !iState;
                        break;
                        case 'v'  :
-                       case 'V'  : iVSmooth++; iVSmooth%=3; 
+                       case 'V'  : iVSmooth++; iVSmooth%=3;
                        break;
                        case 'h'  :
                        case 'H'  : helpState= !helpState;
@@ -531,7 +516,7 @@ int main(int argc, char **argv){
                                    cryptStd= vc;
                                    if(yuvMode==0) yuvMode=1;
                                    grabYuvMode=yuvMode;
-                                   iVSmooth=0;
+//                                   iVSmooth=2;
                                    setStdScale();
                                    contGrab();
                        break;
@@ -539,7 +524,7 @@ int main(int argc, char **argv){
                                    cryptStd= nag;
                                    if(yuvMode==0) yuvMode=1;
                                    grabYuvMode=yuvMode;
-                                   iVSmooth=0;
+//                                   iVSmooth=2;
                                    setStdScale();
                                    contGrab();
                        break;

Modified: trunk/2010/2_all_a.asm
==============================================================================
--- trunk/2010/2_all_a.asm	(original)
+++ trunk/2010/2_all_a.asm	Wed Jul  4 10:39:35 2007
@@ -17,7 +17,38 @@
 
 ;%define int3 db 0xCC
 
+%define lt(a,b) ( ((a)-(b)) >>31)
+%define gt(a,b) ( ((b)-(a)) >>31)
+%define le(a,b) ( ((a)-(b)-1) >>31)
+%define ge(a,b) ( ((b)-(a)-1) >>31)
+%define eq(a,b) (ge(a,b) & le(a,b))
+
+
 %define align2  times ($$-$) & 1  nop
-%define align4  times ($$-$) & 3  nop
-%define align8  times ($$-$) & 7  nop
-%define align16 times ($$-$) & 15 nop
+;%define align4  times ($$-$) & 3  nop
+;%define align8  times ($$-$) & 7  nop
+;%define align16 times ($$-$) & 15 nop
+
+%macro align4 0
+ times ge(($$-$)&3, 3) cmp ebx, byte 0
+ times ge(($$-$)&3, 2) cmp ebx, ebx
+ times ge(($$-$)&3, 1) nop
+%endmacro
+
+%macro align8 0
+ times ge(($$-$)&7, 6) cmp ebx, 0
+ times ge(($$-$)&7, 5) cmp eax, 0
+ times ge(($$-$)&7, 3) cmp ebx, byte 0
+ times ge(($$-$)&7, 2) cmp ebx, ebx
+ times ge(($$-$)&7, 1) nop
+%endmacro
+
+%macro align16 0
+ times eq(($$-$)&15, 10) cmp eax, 0
+ times ge(($$-$)&15, 6) cmp ebx, 0
+ times ge(($$-$)&15, 6) cmp ebx, 0
+ times ge(($$-$)&15, 5) cmp eax, 0
+ times ge(($$-$)&15, 3) cmp ebx, byte 0
+ times ge(($$-$)&15, 2) cmp ebx, ebx
+ times ge(($$-$)&15, 1) nop
+%endmacro

Modified: trunk/2010/2_cryp_a.asm
==============================================================================
--- trunk/2010/2_cryp_a.asm	(original)
+++ trunk/2010/2_cryp_a.asm	Wed Jul  4 10:39:35 2007
@@ -1,11 +1,13 @@
-;2010 0.1 Copyright (C) Michael Niedermayer 1998
+;2010 0.1 Copyright (C) Michael Niedermayer 1999
 
 %include "2_all_a.asm"
 
-segment .data
+segment .data 
 
 global _VSmooth1RGB1555__Fiii
 global _VSmooth2RGB1555__Fiii
+global _Decomb1__Fiii
+global _Decomb2__Fiii
 
 %define pBuf1    par1
 %define iVgaX21  par2
@@ -15,6 +17,109 @@ global _VSmooth2RGB1555__Fiii
 %define iVgaX22  par3
 %define nOp2     par4
 
+%define pYuv3    par1
+%define nOp3     par2
+%define pBuf3    par3
+
+_Decomb1__Fiii:
+ push eax
+ push ebx
+
+ push ecx
+ push edx
+
+ push esi
+ push edi
+
+ mov esi, [pYuv3]
+ mov edi, [pBuf3]
+ mov ecx, [nOp3]
+ add esi, ecx
+ add edi, ecx
+ xor ecx, byte -1
+ inc ecx
+
+ movq mm7, [decombInc]
+align16
+Decomb1Loop:
+ movq mm0, [esi + ecx]
+ psllw mm0, 8
+ movq mm2, [edi + ecx]
+ psubsw mm0, mm2
+ pcmpgtw mm2, mm0
+ pand mm2, mm7
+ paddsw mm0, mm2
+ movq [edi + ecx], mm0
+ add ecx, byte 8
+  jnc Decomb1Loop
+
+ pop edi
+ pop esi
+
+ pop edx
+ pop ecx
+
+ pop ebx
+ pop eax
+
+ret
+
+_Decomb2__Fiii:
+ push eax
+ push ebx
+
+ push ecx
+ push edx
+
+ push esi
+ push edi
+
+ mov esi, [pYuv3]
+ mov edi, [pBuf3]
+ mov ecx, [nOp3]
+ add esi, ecx
+ add edi, ecx
+ xor ecx, byte -1
+ inc ecx
+
+ movq mm6, [decombYMask]
+ movq mm7, [decombInc]
+align16
+Decomb2Loop:
+ movq mm0, [esi + ecx]           ;0p2 wMM0rESIECX 1
+ movq mm1, mm0                   ; p01wMM1rMM0    0   test mov mem
+ pand mm1, mm6                   ; p01wMM1rMM6MM1 1
+ psllw mm0, 8                    ;0p1 wMM0rMM0    0
+ movq mm2, [edi + ecx]           ; p2 wMM2rEDIECX 1
+ psubsw mm0, mm2                 ; p01wMM0rMM2MM0 0
+ pcmpgtw mm2, mm0                ;0p01wMM2rMM0MM2 0
+ pand mm2, mm7                   ; p01wMM2rMM7MM2 1
+ paddsw mm0, mm2                 ; p01wMM0rMM2MM0 0
+ movq [edi + ecx], mm0           ;0p3     rEDIECX 2    p4     rMM0 0
+ psrlw mm0, 7                    ; p1 wMM0rMM0    0
+ por mm1, mm0                    ; p01wMM1rMM0MM1 ?0
+ movq [esi + ecx], mm1           ;0p3     rESIECX 2    p4     rMM1 0
+ add ecx, byte 8                 ; p01wECXrECX   (1)
+  jnc Decomb2Loop                ; p1     rFLAG   0
+
+ ; 8 0 3 2 2 2 -> exec 6 cyc
+ ; deco 5 cyc
+
+
+
+
+ pop edi
+ pop esi
+
+ pop edx
+ pop ecx
+
+ pop ebx
+ pop eax
+
+ret
+
+
 _VSmooth1RGB1555__Fiii:
  push eax
  push ebx
@@ -26,12 +131,12 @@ _VSmooth1RGB1555__Fiii:
  push edi
 
  mov esi, [pBuf1]
- add esi, 8
+ add esi, byte 8
 
  mov ecx, [nOp1]
  add esi, ecx
 
- xor ecx, -1
+ xor ecx, byte -1
  mov ebx, [iVgaX21]
 
  inc ecx
@@ -51,6 +156,7 @@ _VSmooth1RGB1555__Fiii:
 
 
  ; more opt possible (+2reg -1psrlw)
+align16
 MMXLoop1:
  movq mm5, mm0
  movq mm4, mm1
@@ -88,7 +194,7 @@ MMXLoop1:
  movq mm1, [ebx+ecx+8]
  por mm3, mm5
 
- add ecx, 8
+ add ecx, byte 8
   jnc MMXLoop1
 
  pop edi
@@ -100,8 +206,6 @@ MMXLoop1:
  pop ebx
  pop eax
 
- emms
-
 ret
 
 _VSmooth2RGB1555__Fiii:
@@ -118,10 +222,10 @@ _VSmooth2RGB1555__Fiii:
 
  mov esi, [pBuf2]
 
- add esi, 8
+ add esi, byte 8
  mov ebp, [nOp2]
 
- xor ebp, -1
+ xor ebp, byte -1
  mov ebx, [iVgaX22]
 
  add esi, ebx
@@ -139,7 +243,7 @@ _VSmooth2RGB1555__Fiii:
  mov ecx, eax
  add edi, esi
 
- xor ecx, -1
+ xor ecx, byte -1
 
  inc ecx
 
@@ -148,6 +252,7 @@ _VSmooth2RGB1555__Fiii:
  movq mm7, [mask2]
 
  ; lots of optimize possible 
+align16
 MMXLoop2:
  ;AGI
  movq mm0, [esi+ecx]
@@ -199,12 +304,14 @@ MMXLoop2:
  pop ebx
  pop eax
 
- emms
-
 ret
 
-xyz times 8 dd 0           
+xyz times 8 dd 0
 
 align8
 mask1 times 2 dd 0x7C1F03E0
 mask2 times 2 dd 0x03E07C1F
+decombInc   times 2 dd 0x01000100
+decombYMask times 2 dd 0xFF00FF00
+
+align16

Modified: trunk/2010/2_cryp_a.h
==============================================================================
--- trunk/2010/2_cryp_a.h	(original)
+++ trunk/2010/2_cryp_a.h	Wed Jul  4 10:39:35 2007
@@ -5,4 +5,7 @@
 void VSmooth1RGB1555(int, int, int);
 void VSmooth2RGB1555(int, int, int);
 
+void Decomb1(int, int, int);
+void Decomb2(int, int, int);
+
 #endif

Modified: trunk/2010/2_crypt.cpp
==============================================================================
--- trunk/2010/2_crypt.cpp	(original)
+++ trunk/2010/2_crypt.cpp	Wed Jul  4 10:39:35 2007
@@ -5,6 +5,7 @@
 #include <time.h>
 #include "2_all.h"
 #include "2_crypt.h"
+#include "2_hw_mem.h"
 #include "2_cryp_a.h"
 #include "2_vc.h"
 #include "2_nag.h"
@@ -30,7 +31,7 @@ extern int some;
 static void VSmooth(void);
 static void Decomb(int q);
 
-int iVSmooth=1;
+int iVSmooth=2;
 bool showPoints=false;
 
  asm("__esp: .long 0\n\t");
@@ -82,149 +83,35 @@ void decrypt(void){
 
 
 static void Decomb(int q){
- int temp[max_x];
+ int *temp= (int*)newAlign(max_x * sizeof(int), 8);
  const int outy1= outy>>1;
 
  const int stride=oneField ? (vgax<<1) : (vgax<<2);
  const int o=oneField ? 0 : 1;
  const int end=oneField ? outy : outy1;
 
- for(int i=0; i<wndx; i++) temp[i]=128<<8;
+// for(int i=0; i<wndx; i++) temp[i]=128<<8;
+ for(int i=0; i<wndx; i++) temp[i]=0;
 
  if(q==1){
   int linep= stride*256;
   if(o) linep+=vgax<<1;
   for(int line=256; line>=0; line--){
-   asm(//"int $3                       \n\t"
-       "pushl %%ebp                    \n\t"    //U
-       "movl %%eax, %%ebp              \n\t"    // V 1
-       "addl %%ebp, %%edi              \n\t"    //U
-       "addl %%ebp, %%esi              \n\t"    // V 1
-       "addl %%ebp, %%esi              \n\t"    //U  1
-       "negl %%ebp                     \n\t"    //UV 1
-       "1:                             \n\t"
-       "xorl %%eax, %%eax              \n\t"    //U
-       "xorl %%ebx, %%ebx              \n\t"    // V 1
-       "movb  (%%edi, %%ebp   ), %%al  \n\t"    //U
-       "movb 2(%%edi, %%ebp   ), %%bl  \n\t"    // V 1
-       "addb $128, %%al                \n\t"    //U
-       "addb $128, %%bl                \n\t"    // V 1
-       "shll $9, %%eax                 \n\t"    //U
-       "movl  (%%esi, %%ebp, 2), %%ecx \n\t"    // V 1
-       "shll $9, %%ebx                 \n\t"    //U
-       "subl %%ecx, %%eax              \n\t"    // V 1
-       "movl 4(%%esi, %%ebp, 2), %%edx \n\t"    //U
-       "cmpl %%ecx, %%eax              \n\t"    // V 1
-       "sbbl %%ecx, %%ecx              \n\t"    //U
-       "subl %%edx, %%ebx              \n\t"    // V 1
-       "andl %0, %%ecx                 \n\t"    //U
-       "cmpl %%edx, %%ebx              \n\t"    // V 1
-       "sbbl %%edx, %%edx              \n\t"    //U
-       "addl %%ecx, %%eax              \n\t"    // V 1
-       "andl %0, %%edx                 \n\t"    //U
-       "movl %%eax,  (%%esi, %%ebp, 2) \n\t"    // V 1
-       "addl %%edx, %%ebx              \n\t"    //U  1
-       "movl %%ebx, 4(%%esi, %%ebp, 2) \n\t"    //U
-
-       "addl $4, %%ebp                 \n\t"    // V 1
-       " jnc 1b                        \n\t"    //U  1
-       "popl %%ebp                     \n\t"    //U
-           :
-           : "i" (COMB_FIX), "a" ((wndx<<1) - 2),
-             "D" (actVid2MemBufp->b + linep + 2), "S" (int(temp) + 4)
-           : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi");
-   linep-=stride;
+    Decomb1(int(actVid2MemBufp->b + linep), (wndx<<1), int(temp));
+    linep-=stride;
   }
  }
 
  int linep= 0;
  if(o) linep+=vgax<<1;
  for(int line=0; line<end; line++){
-   asm(//"int $3                       \n\t"
-       "pushl %%ebp                    \n\t"    //U
-       "movl %%eax, %%ebp              \n\t"    // V 1
-       "addl %%ebp, %%edi              \n\t"    //U
-       "addl %%ebp, %%esi              \n\t"    // V 1
-       "addl %%ebp, %%esi              \n\t"    //U  1
-       "negl %%ebp                     \n\t"    //UV 1
-       "1:                             \n\t"
-       "xorl %%eax, %%eax              \n\t"    //U
-       "xorl %%ebx, %%ebx              \n\t"    // V 1
-       "movb  (%%edi, %%ebp   ), %%al  \n\t"    //U
-       "movb 2(%%edi, %%ebp   ), %%bl  \n\t"    // V 1
-       "addb $128, %%al                \n\t"    //U
-       "addb $128, %%bl                \n\t"    // V 1
-       "shll $9, %%eax                 \n\t"    //U
-       "movl  (%%esi, %%ebp, 2), %%ecx \n\t"    // V 1
-       "shll $9, %%ebx                 \n\t"    //U
-       "subl %%ecx, %%eax              \n\t"    // V 1
-       "movl 4(%%esi, %%ebp, 2), %%edx \n\t"    //U
-       "cmpl %%ecx, %%eax              \n\t"    // V 1
-       "sbbl %%ecx, %%ecx              \n\t"    //U
-       "subl %%edx, %%ebx              \n\t"    // V 1
-       "andl %0, %%ecx                 \n\t"    //U
-       "cmpl %%edx, %%ebx              \n\t"    // V 1
-       "sbbl %%edx, %%edx              \n\t"    //U
-       "addl %%ecx, %%eax              \n\t"    // V 1
-       "andl %0, %%edx                 \n\t"    //U
-       "movl %%eax,  (%%esi, %%ebp, 2) \n\t"    // V 1
-       "addl %%edx, %%ebx              \n\t"    //U
-       "subb $128, %%ah                \n\t"    // V 1
-       "movl %%ebx, 4(%%esi, %%ebp, 2) \n\t"    //U
-       "subb $128, %%bh                \n\t"    // V 1
-       "movb %%ah,   (%%edi, %%ebp   ) \n\t"    //U
-       "movb %%bh,  2(%%edi, %%ebp   ) \n\t"    // V 1
-
-       "addl $4, %%ebp                 \n\t"    //U
-       " jnc 1b                        \n\t"    // V 1
-       "popl %%ebp                     \n\t"    //U
-           :
-           : "i" (COMB_FIX), "a" ((wndx<<1) - 2),
-             "D" (actVid2MemBufp->b + linep + 2), "S" (int(temp) + 4)
-           : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi");
+   Decomb2(int(actVid2MemBufp->b + linep), (wndx<<1), int(temp));
    linep+=stride;
  }
 
+ asm("emms\n\t");
 
-
-
-/*
- asm(//"int $3                    \n\t"
-     "1:                          \n\t"
-     "movl (%%esi      ), %%eax   \n\t"    //U         LINE-1
-     "movl (%%esi,%%edi), %%ebx   \n\t"    // V 1      LINE
-     "movl %%ebx, %%ecx           \n\t"    //U
-     "andl $0x00FF00FF, %%eax     \n\t"    // V 1
-     "andl $0x00FF00FF, %%ebx     \n\t"    //U
-     "addl $0x00800080, %%eax     \n\t"    // V 1
-     "xorl $0x00800080, %%eax     \n\t"    //U
-     "addl %%ebx, %%ebx           \n\t"    // V 1
-     "orl  $0x02000200, %%ebx     \n\t"    //U
-     "andl $0xFF00FF00, %%ecx     \n\t"    // V 1
-     "subl %%eax, %%ebx           \n\t"    //U
-     "addl $4, %%esi              \n\t"    // V 1 
-     "movl %%ebx, %%eax           \n\t"    //U
-     "andl $0x01F801F8, %%ebx     \n\t"    // V 1
-     "shrl $3, %%ebx              \n\t"    //U
-     "nop                         \n\t"    // V 1 FIX ME
-     "addl $0x00600060, %%ebx     \n\t"    //U
-     "nop                         \n\t"    // V 1 FIX ME
-     "xorl $0x00600060, %%ebx     \n\t"    //U
-     "nop                         \n\t"    // V 1 FIX ME
-     "subl %%ebx, %%eax           \n\t"    //U
-     "nop                         \n\t"    // V 1 FIX ME
-     "andl $0x00FF00FF, %%eax     \n\t"    //U
-     "nop                         \n\t"    // V 1 FIX ME
-     "orl %%ecx, %%eax            \n\t"    //U
-     "decl %%edx                  \n\t"    // V 1
-     "movl %%eax, -4(%%esi, %%edi)\n\t"    //U
-     " jnz 1b                     \n\t"    // V 1
-
-         :
-         : "D" (oneField ? (vgax<<1) : (vgax<<2)), "S" (actVid2MemBufp->b),
-           "d" (vgax * wndy>>1)
-         : "%eax", "%ebx", "%ecx", "%edx", "%esi");
- */
+ deleteAlign(temp);
 }
 
 static void VSmooth(void){
@@ -236,40 +123,5 @@ static void VSmooth(void){
     else
         VSmooth2RGB1555(int(actVid2MemBufp->b), vgax<<1, vgax*(wndy-3)<<1);
     }
-/*
- else{
-   asm(//"int $3                      \n\t"
-       "1:                          \n\t"
-       "movl (%%esi        ), %%eax \n\t"    //U         LINE
-       "movl (%%esi,%%edi,2), %%ebx \n\t"    // V 1      LINE + 2
-       "shrl $2, %%eax              \n\t"    //U
-       "andl $0xFCFCFCFC, %%ebx     \n\t"    // V 1        
-       "shrl $2, %%ebx              \n\t"    //U
-       "andl $0x3F3F3F3F, %%eax     \n\t"    // V 1
-       "addl $0x00200020, %%eax     \n\t"    //U
-       "addl $0x00200020, %%ebx     \n\t"    // V 1
-       "xorl $0x00200020, %%eax     \n\t"    //U
-       "xorl $0x00200020, %%ebx     \n\t"    // V 1
-       "addl %%eax, %%ebx           \n\t"    //U
-       "movl (%%esi,%%edi  ), %%eax \n\t"    // V 1      LINE + 1
-       "shrl $1, %%eax              \n\t"    //U
-       "andl $0x7F7F7F7F, %%ebx     \n\t"    // V 1  
-       "andl $0x7F7F7F7F, %%eax     \n\t"    //U
-       "addl $0x00400040, %%ebx     \n\t"    // V 1
-       "addl $0x00400040, %%eax     \n\t"    //U
-       "xorl $0x00400040, %%ebx     \n\t"    // V 1
-       "xorl $0x00400040, %%eax     \n\t"    //U
-       "addl $4, %%esi              \n\t"    // V 1
-       "addl %%eax, %%ebx           \n\t"    //U
-       "decl %%ecx                  \n\t"    // V 1
-       "movl %%ebx, -4(%%esi      ) \n\t"    //U         LINE
-       " jnz 1b                     \n\t"    // V 1
-           :
-           : "D" (vgax<<1), "S" (actVid2MemBufp->b),
-             "c" (vgax * (wndy-2) >> 1) 
-           : "%eax", "%ebx", "%ecx", "%esi");
-
-
- }
-  */
+ asm("emms\n\t");
 }

Modified: trunk/2010/2_glob_a.asm
==============================================================================
--- trunk/2010/2_glob_a.asm	(original)
+++ trunk/2010/2_glob_a.asm	Wed Jul  4 10:39:35 2007
@@ -12,7 +12,7 @@ global _asm_temp4
 
 segment .bss
 
-align4
+;align4
 
 esp_save   resd 1
 _asm_temp  resd 1

Modified: trunk/2010/2_hw.cpp
==============================================================================
--- trunk/2010/2_hw.cpp	(original)
+++ trunk/2010/2_hw.cpp	Wed Jul  4 10:39:35 2007
@@ -11,6 +11,7 @@
 #include <time.h>
 #include "2_all.h"
 #include "2_hw.h"
+#include "2_hw_a.h"
 #include "2_hw_asm.h"
 #include "2_hw_mem.h"
 #include "2_71x6.h"
@@ -26,13 +27,13 @@
  extern volatile AllocList *alloclist;
  extern volatile VID2MEMBUF *vid2MemBuf, *actVid2MemBufp;
  extern volatile int grabVid2MemBuf, actVid2MemBuf;
- extern volatile int one_field;
+ extern volatile bool oneField;
  extern volatile int active_meteor;
  extern volatile meteor meteors[8];
  extern volatile int fields;
  extern byte *vidbuf;
- extern int vgax, vgay, xresc, xresvb, vb, mc, wndy, wndx;
- extern int *yuv2RgbLut;
+ extern int vgax, vgay, xresc, xresvb, vb, mc, wndy, wndx, outy;
+ extern unsigned short *yuv2RgbLut;
  extern int single;
  extern int in_int;
  extern volatile int frames;
@@ -40,6 +41,7 @@
  extern bool iState;
  extern int infoPosX, infoPosY;
  extern TVSTD TVStd;
+ extern int iVSmooth;
 
  bool drop=false;
  bool allowDrop=true;
@@ -51,133 +53,13 @@
 
  int VID2MEMBUF::num;
 
-void mem2vid(byte *to, byte *from, int num, int bpp){
+void mem2vid(byte *to, byte *from, int num, int bpp, int y){
 
  if(yuvMode!=0){
-   asm(//"int $3                            \n\t"
-       "pushl %%ebp                       \n\t"
-       "movl %%eax, %%ebp                 \n\t"
-       "movl %%esp, (__esp)               \n\t"
-
-       "addl %%ecx, %%esi                 \n\t"
-       "addl %%ecx, %%edi                 \n\t"
-       "negl %%ecx                        \n\t"
-       "movl (%%esi, %%ecx), %%eax        \n\t"
-       "movl %%esi, (__temp1)             \n\t"
-
-       "1:                                \n\t"
-       "movl %%eax, %%edx                 \n\t"
-       "andl $0xFE, %%eax                 \n\t"
-
-       "movl %%edx, %%ebp                 \n\t"
-       "andl $0xFE00, %%edx               \n\t"
-
-       "shrl $8, %%edx                    \n\t"
-       "movl     (%%ebx, %%eax, 4), %%esp \n\t"
-
-       "shrl $16, %%ebp                   \n\t"
-       "movl    4(%%ebx, %%eax, 4), %%eax \n\t"
-
-       "movl 1024(%%ebx, %%edx, 4), %%esi \n\t"
-       "movl 1028(%%ebx, %%edx, 4), %%edx \n\t"
-
-       "addl %%esi, %%esp                 \n\t"
-       "addl %%edx, %%eax                 \n\t"
-
-       "movl %%ebp, %%edx                 \n\t"
-       "andl $0xFE, %%ebp                 \n\t"
-
-       "shrl $8, %%edx                    \n\t"
-       "andl $0x7FEFF9FF, %%esp           \n\t"
-
-       "andl $0xFE, %%edx                 \n\t"
-       "movl 2048(%%ebx, %%ebp, 4), %%esi \n\t"
-
-       "andl $0x7FCFFBFF, %%eax           \n\t"
-       "movl 2052(%%ebx, %%ebp, 4), %%ebp \n\t"
-
-       "addl %%esi, %%esp                 \n\t"
-       "addl %%ebp, %%eax                 \n\t"
-
-       "andl $0x7FEFF9FF, %%esp           \n\t"
-       "movl 3072(%%ebx, %%edx, 4), %%esi \n\t"
-
-       "andl $0x7FCFFBFF, %%eax           \n\t"
-       "movl 3076(%%ebx, %%edx, 4), %%edx \n\t"
-
-       "addl %%esi, %%esp                 \n\t"
-       "addl %%edx, %%eax                 \n\t"
-
-       "test $0x600C0180, %%esp           \n\t"
-       " jz 2f                            \n\t"
-
-       "movl %%esp, %%esi                 \n\t"
-       "movl %%esp, %%edx                 \n\t"
-
-       "shrl $7, %%esi                    \n\t"
-       "andl $0x40080100, %%edx           \n\t"
-
-       "shrl $8, %%edx                    \n\t"
-       "andl $0x00400801, %%esi           \n\t"
-
-       "addl $0xEFDFFBFF, %%edx           \n\t"
-       "addl $0xEFDFFBFF, %%esi           \n\t"
-
-       "xorl $0x10200400, %%edx           \n\t"
-       "xorl $0xEFDFFBFF, %%esi           \n\t"
-
-       "orl %%esi, %%esp                  \n\t"
-
-       "andl %%edx, %%esp                 \n\t"
-
-       "2:                                \n\t"
-       "test $0x600C0300, %%eax           \n\t"
-       " jz 3f                            \n\t"
-
-       "movl %%eax, %%esi                 \n\t"
-       "movl %%eax, %%edx                 \n\t"
-
-       "shrl $8, %%esi                    \n\t"
-       "andl $0x40080200, %%edx           \n\t"
-
-       "shrl $9, %%edx                    \n\t"
-       "andl $0x00200401, %%esi           \n\t"
-
-       "addl $0xEFEFFDFF, %%edx           \n\t"
-       "addl $0xEFEFFDFF, %%esi           \n\t"
-
-       "xorl $0x10100200, %%edx           \n\t"
-       "xorl $0xEFEFFDFF, %%esi           \n\t"
-
-       "orl %%esi, %%eax                  \n\t"
-
-       "andl %%edx, %%eax                 \n\t"
-
-       "3:                                \n\t"
-
-       "shrl $2, %%esp                    \n\t"
-       "movl (__temp1), %%esi             \n\t"
-
-       "shll $3, %%eax                    \n\t"
-       "andl $0x07E0F81F, %%esp           \n\t"
-
-       "andl $0xF81F07E0, %%eax           \n\t"
-
-       "orl %%eax, %%esp                  \n\t"
-
-       "movl %%esp, (%%edi, %%ecx)        \n\t"
-
-       "movl 4(%%esi, %%ecx), %%eax       \n\t"
-       "addl $4, %%ecx                    \n\t"
-
-       "jnc 1b                            \n\t"
-
-       "movl (__esp), %%esp               \n\t"
-       "popl %%ebp                        \n\t"
-       :
-       : "b" (yuv2RgbLut), "S" (from), "D" (to), "c" (num<<1)
-       : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi");
-
+   if((y&1) && !oneField && wndy==outy)
+     CopyYuv2Rgb2(int(from), int(to), num<<1, int(yuv2RgbLut));
+   else
+     CopyYuv2Rgb1(int(from), int(to), num<<1, int(yuv2RgbLut));
  }else{
    if(strangeRgb16){
      asm("addl %%ecx, %%esi             \n\t"
@@ -316,13 +198,13 @@ void copy_vidbuffer(void){
    if(page_flip==0){
      int p=0;
      for(int y=0; y<goody; y++){
-       mem2vid(vidbuf+p,              actVid2MemBufp->b+p, goodx, bpp);
+       mem2vid(vidbuf+p,              actVid2MemBufp->b+p, goodx, bpp, y);
        p+=vgax<<1;
      }
    }else if(page_flip==2){
      int p=0;
      for(int y=0; y<goody; y++){
-       mem2vid(vidbuf+xresvb*vgay+p,  actVid2MemBufp->b+p, goodx, bpp);
+       mem2vid(vidbuf+xresvb*vgay+p,  actVid2MemBufp->b+p, goodx, bpp, y);
        p+=vgax<<1;
      }
      page_flip=1;
@@ -330,7 +212,7 @@ void copy_vidbuffer(void){
    }else{
      int p=0;
      for(int y=0; y<goody; y++){
-       mem2vid(vidbuf+p,              actVid2MemBufp->b+p, goodx, bpp);
+       mem2vid(vidbuf+p,              actVid2MemBufp->b+p, goodx, bpp, y);
        p+=vgax<<1;
      }
      page_flip=2;
@@ -339,6 +221,8 @@ void copy_vidbuffer(void){
  }
  drop=false;
 
+ asm("emms\n\t");
+
  if(iState){
    T2=uclock();
  }

Added: trunk/2010/2_hw_a.asm
==============================================================================
--- (empty file)
+++ trunk/2010/2_hw_a.asm	Wed Jul  4 10:39:35 2007
@@ -0,0 +1,231 @@
+;2010 0.1 Copyright (C) Michael Niedermayer 1999
+
+%include "2_all_a.asm"
+
+segment .data
+
+extern esp_save
+
+global _CopyYuv2Rgb1__Fiiii
+global _CopyYuv2Rgb2__Fiiii
+
+%define pYuv     par2
+%define pRgb     par3
+%define nOp      par4
+%define pLut     par5
+%define nbX      par6
+
+_CopyYuv2Rgb1__Fiiii:
+ push eax
+ push ebx
+ push ecx
+ push edx
+ push esi
+ push edi
+ push ebp
+
+ mov [esp_save], esp
+
+ mov esi, [pYuv]
+ mov edi, [pRgb]
+ mov ecx, [nOp]
+ mov esp, [pLut]
+ add esi, ecx
+ add edi, ecx
+ xor ecx, byte -1
+ inc ecx
+
+ movq mm6, [uMask]
+ movq mm7, [vMask]
+align16
+CopyLoop1:
+ movq mm0, [esi+ecx]               ;4p2 wMM0rESIECX  1    0 3
+ movq mm1, [esi+ecx]               ;4p2 wMM1rESIECX  10   1 4
+ movq mm2, [esi+ecx]               ;4p2 wMM2rESIECX  10   2 5
+
+ psrlw mm0, 11                     ;4p1 wMM0rMM0     0    1
+ ;iFetch
+ pand mm1, mm6                     ;3p01wMM1rMM6MM1  1    2
+ pand mm2, mm7                     ;3p01wMM2rMM7MM2  1    3
+ pslld mm1, 2                      ;4p1 wMM1rMM1     0    3
+
+ psrld mm2, 9                      ;4p1 wMM2rMM2     0    4
+ ;iFetch
+ por mm1, mm2                      ;3p01wMM1rMM2MM1  0    5
+ movq mm2, mm1                     ;3p01wMM2rMM1     0
+ pslld mm1, 16                     ;4p1 wMM1rMM1     0
+
+ por mm0, mm2                      ;3p01wMM0rMM2MM0  0
+ por mm0, mm1                      ;3p01wMM0rMM1MM0 ?0
+ ;iFetch
+ movd eax, mm0                     ;3p01wEAXrMM0     0
+ movzx ebx, ax                     ;3p01wEBXrEAX     0
+ shr eax, 16                       ;3p0 wEAXrEAX     0
+
+ movzx eax, word [esp + eax*2]     ;4p2 wEAXrESPEAX  10
+ shl eax, 16                       ;3p0 wEAXrEAX     0
+ ;iFetch
+ movzx ebx, word [esp + ebx*2]     ;4p2 wEBXrESPEBX  1
+ psrlq mm0, 32                     ;4p1 wMM0rMM0     0
+ or eax, ebx                       ;2p01wEAXrEBXEAX  0
+
+ mov [edi+ecx], eax                ;3p3     rEDIECX  2    0   p4     rEAX 0
+ movd eax, mm0                     ;3p01wMM0rMM1MM0 ?00
+ ;iFetch
+ movzx ebx, ax                     ;3p01wEBXrEAX     0
+ shr eax, 16                       ;3p0 wEAXrEAX     0
+ movzx ebx, word [esp + ebx*2]     ;4p2 wEBXrESPEBX  1
+
+ movzx eax, word [esp + eax*2]     ;4p2 wEAXrESPEAX  10
+ ;iFetch
+ shl eax, 16                       ;3p0 wEAXrEAX     0
+ or eax, ebx                       ;2p01wEAXrEBXEAX  0
+
+ mov [edi+ecx+4], eax              ;3p3     rEDIECX  2    1   p4     rEAX 0
+ add ecx, byte 8                   ;3p01wECXrECX     110  0
+  jnc CopyLoop1                    ;2p1     rFLAG         0
+ ; 13 4 6 7 2 2  12+ cyc 
+ ; fetch/decode 14 cyc
+
+
+
+
+; movzx eax, byte [esi+ecx  ]       ;p2 wEAXrESIECX
+; movzx ebp, byte [esi+ecx+1]       ;p2 wEBPrESIECX
+; movzx edx, byte [esi+ecx+2]       ;p2 wEDXrESIECX
+; movzx ebx, byte [esi+ecx+3]       ;p2 wEBXrESIECX
+; and edx, 0xF8                     ;p01wEDXrEDX
+; and eax, 0xF8                     ;p01wEAXrEAX
+; shr ebp, 3                        ;p0 wEBPrEBP
+; shr ebx, 3                        ;p0 wEBXrEBX
+; shl edx, 7                        ;p0 wEDXrEDX
+; lea eax, [edx + eax*4]            ;p0 wEAXrEDXEAX
+; or ebp, eax                       ;p01wEBPrEAXEBP
+; or ebx, eax                       ;p01wEBXrEAXEBX 
+; movzx ebp, word [esp + ebp*2]     ;p2 wEBPrESPEBP
+; movzx ebx, word [esp + ebx*2]     ;p2 wEBXrESPEBX
+; shl ebx, 16                       ;p0 wEBXrEBX    
+; or ebp, ebx                       ;p01wEBPrEBXEBP
+; mov [edi+ecx], ebp                ;p3     rEDIECX     p4     rEBP
+; add ecx, byte 4                   ;p01wECXrECX
+;  jnc NEAR CopyLoop1               ;p1     rFLAG
+ ; 6 5 1 6 1 1    7+ cyc asy
+
+ mov esp, [esp_save]
+
+ pop ebp
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop ebx
+ pop eax
+ret
+
+_CopyYuv2Rgb2__Fiiii:
+ push eax
+ push ebx
+ push ecx
+ push edx
+ push esi
+ push edi
+ push ebp
+
+ mov [esp_save], esp
+
+ mov esi, [pYuv]
+ mov edi, [pRgb]
+ mov ecx, [nOp]
+ mov esp, [pLut]
+ add esi, ecx
+ add edi, ecx
+ xor ecx, byte -1
+ inc ecx
+
+ movq mm7, [antiSigned]
+ movq mm6, [bit0Mask]
+ movq mm5, [bit20Mask]
+
+align16
+CopyLoop2:
+ movq mm0, [esi+ecx-1600]
+ movq mm1, [esi+ecx+1600]
+ paddb mm0, mm7
+ paddb mm1, mm7
+ pand mm0, mm6
+ pand mm1, mm6
+ psrlw mm0, 1
+ psrlw mm1, 1
+ paddb mm0, mm1
+ psubb mm0, mm7
+ pand mm0, mm5
+ movd ebx, mm0
+ psrlq mm0, 32
+
+ movzx eax, bl
+ movzx ebp, bh
+ shr ebx, 16
+ movzx edx, bl
+ movzx ebx, bh
+
+ shr ebp, 3
+ shr ebx, 3
+ shl edx, 7
+ lea eax, [edx + eax*4]
+ or ebp, eax                           
+ or ebx, eax                           
+
+ movzx ebp, word [esp + ebp*2]
+ movzx ebx, word [esp + ebx*2]       
+ shl ebx, 16                           
+ or ebp, ebx
+
+ mov [edi+ecx], ebp
+
+
+ movd ebx, mm0
+
+ movzx eax, bl
+ movzx ebp, bh
+ shr ebx, 16
+ movzx edx, bl
+ movzx ebx, bh
+
+ shr ebp, 3
+ shr ebx, 3
+ shl edx, 7
+ lea eax, [edx + eax*4]
+ or ebp, eax                           
+ or ebx, eax                           
+
+ movzx ebp, word [esp + ebp*2]
+ movzx ebx, word [esp + ebx*2]       
+ shl ebx, 16                           
+ or ebp, ebx
+
+ mov [edi+ecx+4], ebp
+
+ add ecx, byte 8
+  jnc NEAR CopyLoop2                       
+
+ mov esp, [esp_save]
+
+ pop ebp
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop ebx
+ pop eax
+ret
+
+xyz times 8 dd 0
+
+align8
+antiSigned times 2 dd 0x00800080
+bit0Mask times 2 dd 0xFEFEFEFE
+bit20Mask times 2 dd 0xF8F8F8F8
+uMask times 2 dd 0x000000F8
+vMask times 2 dd 0x00F80000
+
+align16

Added: trunk/2010/2_hw_a.h
==============================================================================
--- (empty file)
+++ trunk/2010/2_hw_a.h	Wed Jul  4 10:39:35 2007
@@ -0,0 +1,8 @@
+//2010 0.1 Copyright (C) Michael Niedermayer 1998
+#ifndef n2_hw_a_h
+#define n2_hw_a_h
+
+void CopyYuv2Rgb1(int, int, int, int);
+void CopyYuv2Rgb2(int, int, int, int);
+
+#endif

Modified: trunk/2010/2_hw_mem.cpp
==============================================================================
--- trunk/2010/2_hw_mem.cpp	(original)
+++ trunk/2010/2_hw_mem.cpp	Wed Jul  4 10:39:35 2007
@@ -25,14 +25,18 @@ bool checkAlign(void *p, int align){    
 
 void *newAlign(int size, int align){        
   int v= int(new char[size + align + 3]);
+#ifdef debug
   printf("anew %d\n", v);
+#endif
   int v2= ( (v+align-1+4) & ~(align-1) );
   ((int*)v2)[-1]= v;
   return (void*)v2;
 }
 
 void deleteAlign(void *v){        
+#ifdef debug
   printf("adel %d\n", int(((char**)v)[-1]) );
+#endif
   delete [] ((char**)v)[-1];
 }
 
@@ -360,7 +364,6 @@ void vds_free(dds *vds_info){
    vd=vd->next;
 
  if(vd==vdslist && vd->vds_info.phys==vds_info->phys ){
-   printf("HALLO1\n");
    vdslist= vdslist->next;
    delete vd;
  }else if(vd!=vdslist && vd->next!=NULL){

Modified: trunk/2010/2_nag.cpp
==============================================================================
--- trunk/2010/2_nag.cpp	(original)
+++ trunk/2010/2_nag.cpp	Wed Jul  4 10:39:35 2007
@@ -18,7 +18,7 @@
 #include "2_hw_mem.h"
 #include "2010.h"
 
-#define dwnSmp 1
+#define dwnSmp 3
 
 #define FREQ_CHROM     4433618.75
 #define FREQ_HS        15625.0
@@ -244,9 +244,10 @@ void nag_decrypt(void){
 
  if(nagSizeAll!=NAG_LINES) error(Nagra);
 
+#ifdef debug
  printf("%d %d\n", sizeof(u_short), sizeof(BEST2) );
+#endif
 
- printf("HALLOX\n");
  static bool first=true;
 
  static char *sinPerLineMulX=new char[(NAG_LINES+32)<<8];
@@ -626,7 +627,7 @@ void nag_decrypt(void){
  if(!isWss) wssLine=32;
 
  byte * const lowRes=(byte*) newAlign( wndx>>dwnSmpSize, 8);
- for(int line=0; line<NAG_LINES; line++){
+ for(int line=0; line<NAG_LINES; line++){       // vtune some btb misspredicts
    byte * const linep= actVid2MemBufp->b + nagLineLogi2Phys(line)*(vgax<<1);
 //   printf("%d\n", line);
    doDwnSmp(lowRes, linep);
@@ -639,11 +640,9 @@ void nag_decrypt(void){
    }
  }
  if(mmx) asm("emms\n\t");
- printf("H1\n");
 
  deleteAlign(decoLowRes);
  deleteAlign(lowRes);
- printf("H2\n");
 
  if(iState){
    T2=uclock();
@@ -678,7 +677,6 @@ void nag_decrypt(void){
    keysListPos[keysListPoses]= best2[l].keyNdx;
    keysListPoses++;
  }
- printf("H3\n");
 
  if(iState){
    T2=uclock();
@@ -736,7 +734,9 @@ void nag_decrypt(void){
 
  int perm[NAG_LINES];
  getPerm(bestKey, perm);
+#ifdef debug
  printf("%X\n", bestKey);
+#endif
 
  delete [] bestCoeffs;   // kill this
 
@@ -915,7 +915,6 @@ void nag_decrypt(void){
    T1=T2;
  }
 
- printf("H4\n");
 /*
  for(int i=0; i<NAG_LINES; i++){
    const int p=nagLineLogi2Phys(i);
@@ -992,12 +991,12 @@ void nag_decrypt(void){
 
  }
 
- if(outy == wndy){
+/* if(outy == wndy){
    for(int i=0; i<outy; i+=2){
      memcpy(actVid2MemBufp->b + ((i+1) * vgax<<1),
             actVid2MemBufp->b + (i * vgax<<1)    , wndx << 1);
-   }
- }
+   } 
+ }   */
 
  delete [] copyBuff;
 

Modified: trunk/2010/2_nag_a.asm
==============================================================================
--- trunk/2010/2_nag_a.asm	(original)
+++ trunk/2010/2_nag_a.asm	Wed Jul  4 10:39:35 2007
@@ -36,12 +36,12 @@ _nagraCorr__Fiiiii:
 
  and edx, 0xFFFFFFF8
 
- sub edx, 8
+ sub edx, byte 8
 
  add esi, edx
  add edi, edx
 
- xor edx, -1
+ xor edx, byte -1
  mov eax, [_mmx]
 
  inc edx
@@ -68,21 +68,26 @@ _nagraCorr__Fiiiii:
 
  movq mm6, [mmx_clip]
 
+align16
 MMXloop:
- psubusb mm2, mm1            ; U
- psubusb mm1, mm0            ;  V 1
- movq mm0, [esp + esi + 8]   ; U
- por mm2, mm1                ;  V 1
- movq mm1, [esp + edi + 8]   ; U
- paddusb mm2, mm6            ;  V 1
- movq mm4, mm2               ; U
- punpcklbw mm2, mm3          ;  V 1
- punpckhbw mm4, mm3          ; U
- paddusw mm5, mm2            ;  V 1
- paddusw mm5, mm4            ; U
- movq mm2, mm0               ;  V 1
- add esp, 8                  ; U
-  jnc MMXloop                ;  V 1
+ psubusb mm2, mm1            ; p01wMM2rMM1MM2   0
+ psubusb mm1, mm0            ; p01wMM1rMM0MM1   1
+ por mm2, mm1                ; p01wMM2rMM1MM2   0
+ movq mm0, [esp + esi + 8]   ; p2 wMM0rESPESI   1
+ paddusb mm2, mm6            ; p01wMM2rMM6MM2   1
+ movq mm4, mm2               ; p01wMM4rMM2      0
+ punpcklbw mm2, mm3          ; p1 wMM2rMM3MM2   1
+ punpckhbw mm4, mm3          ; p1 wMM4rMM3MM4  (1)
+ paddusw mm5, mm2            ; p01wMM5rMM2MM5   1
+ paddusw mm5, mm4            ; p01wMM5rMM4MM5  (1)
+ movq mm2, mm0               ; p01wMM2rMM0      0
+ movq mm1, [esp + edi + 8]   ; p2 wMM1rESPEDI   2
+ add esp, byte 8             ; p01wESPrESP      1
+  jnc MMXloop                ; p1     rFLAG
+  ; 5+ cyc deco
+  ; 5- cyc rat
+  ; 6 cyc exec (0/1 satur)
+
 
  psubusb mm2, mm1            
  psubusb mm1, mm0            
@@ -201,3 +206,5 @@ xyz times 8 dd 0
 
 align8
 mmx_clip times 8 db 0
+
+align16

Modified: trunk/2010/2_vc.cpp
==============================================================================
--- trunk/2010/2_vc.cpp	(original)
+++ trunk/2010/2_vc.cpp	Wed Jul  4 10:39:35 2007
@@ -37,15 +37,14 @@
 #define BAD_THRESHOLD1 0.8
 #define BAD_THRESHOLD2 200
 #define EDGE_LIMIT 70
-#define MAX_EDGE 500
-#define EDGE_LOG_COEFF 10
+#define MAX_EDGE 500                // ? 2000 ?
 #define PHASE_EXP 4
 #define PHASE_LIMIT 40
 #define MAX_PHASE_PENALTY 1000
 #define AMP_THRESHOLD 200
 
 #define PHASE_COEFF (MAX_PHASE_PENALTY/pow(PHASE_LIMIT, PHASE_EXP))
-#define EDGE_COEFF (MAX_EDGE/log(EDGE_LIMIT*EDGE_LOG_COEFF))
+#define EDGE_COEFF (MAX_EDGE/log(EDGE_LIMIT))
 
 #define NUM_PHASE_SMP (vcDiff>>PHASE_CMP_STEP)
 
@@ -85,15 +84,18 @@ void vc_decrypt(void){
 
  const int lowResSize= mmx ? DWN_SMP : (DWN_SMP-1);
 
+#ifdef debug
  printf("xx%f %f %f\n", PHASE_POINT/PI*180, PHASE_SEG/PI*180, PHASE_SMP/PI*180);
+#endif
 
  int vScore=0;
 
 // showPoints=true;
 
  const double freqPix2= double(wndx) / double(x_field) * FREQ_PIX;
+#ifdef debug
  printf("xy%d %d\n", wndx, x_field);
-
+#endif
                            //  18
 // const int vcStartX=int(double(16 -scales_x+3)/x_field*wndx + .5) + some - 10;
  const int vcStartX=int(double(16 -scales_x+3)/x_field*wndx + .5);
@@ -121,7 +123,9 @@ void vc_decrypt(void){
  const int vcEndX= vcStartX + int( double(POINT_LAND + NO_POINT_LAND)
                                                * freqPix2 / FREQ_SMP + .5);
 
+#ifdef debug
  printf("vcEndX %d\n",vcEndX);
+#endif
  if(vcEndX>wndx) error(error_code(-123));
  const int vcDiff=vcEndX - vcStartX;
 
@@ -147,7 +151,7 @@ void vc_decrypt(void){
  static int phaseLut[256];
  static bool first= true;
 
- static byte phasePerCutP[256];
+ static int phasePerCutP[256];
  static char sin1PerCutP[256];
  static char cos1PerCutP[256];
  static char sin2PerCutP[256];
@@ -158,11 +162,11 @@ void vc_decrypt(void){
  fields++;
 
  if(first){
-   some=10;
+   if(first) some=10;
 
    for(int i=0; i<512; i++){
      const int j= min(i+1, EDGE_LIMIT);
-     edgeLut[i]=int( log(j*EDGE_LOG_COEFF)*EDGE_COEFF );
+     edgeLut[i]=int( log(j)*EDGE_COEFF );
    }
 
    for(int i=0; i<256; i++){                           
@@ -291,6 +295,7 @@ void vc_decrypt(void){
           || bestDiff+(bestDiff>>1) > addapBuff[ cutP - (step>>1) ]
           || bestDiff+(bestDiff>>1) > addapBuff[ cutP + (step>>1) ]){
 
+
          int diff;
          if(mmx){
            diff = vc_corr_mmx(
@@ -525,14 +530,15 @@ void vc_decrypt(void){
 
        const byte a= (line & 1) ^ ( vScore>0 ) ? -ang: ang;
 
-       const byte phase1= a + phasePerCutP[ lastCutP ];
+       const int phase1= (a + phasePerCutP[ lastCutP ]) & 0xFF;
+       const int lastPhaseErrLastCutP= lastPhaseErr[lastCutP] + 128;
           // FIX (ASM_OPTIMIZE)
        if(chromAmp>AMP_THRESHOLD){
          for(int newCutP= newCutPStart; newCutP <= newCutPEnd; newCutP++){
-           const byte phaseErr= phase1 + phasePerCutP[ newCutP ];
+           const int phaseErr= (phase1 + phasePerCutP[ newCutP ]) & 0xFF;
            const int val=  lastVal[lastCutP] - driftPenalty[drift]
                          - phaseLut[ phaseErr ]
-                         - phaseLut[ (phaseErr - lastPhaseErr[lastCutP] + 128) & 0xFF];// useless without phase errors from decomb-filter
+                         - phaseLut[ (phaseErr - lastPhaseErrLastCutP) & 0xFF];// useless without phase errors from decomb-filter
      
            if(newVal[newCutP] < val){
              newVal[newCutP]= val;
@@ -547,7 +553,7 @@ void vc_decrypt(void){
        else{
          linep[ ((vcStartX -  1)<<1) + 1]=255;
          for(int newCutP= newCutPStart; newCutP <= newCutPEnd; newCutP++){
-           const byte phaseErr= phase1 + phasePerCutP[ newCutP ];
+           const int phaseErr= (phase1 + phasePerCutP[ newCutP ]) & 0xFF;
            const int val=  lastVal[lastCutP] - driftPenalty[drift];
      
            if(newVal[newCutP] < val){
@@ -773,10 +779,12 @@ void vc_decrypt(void){
    bestCutP= dir[(line<<8) + bestCutP];
  }
 
+#ifdef debug
  for(int line=1; line<wndy-1; line++)
     {
     fprintf(pF, "%d\n", cutpp[line]);
     }
+#endif
 
 /*
  float avgCutP=0;
@@ -788,8 +796,9 @@ void vc_decrypt(void){
  printf("avg%f\n", avgCutP-float(vcDiff)/2.0);
   */
   
+#ifdef debug
  printf("vScore %d\n", vScore);
-
+#endif
 
  if(iState){
    T2=uclock();
@@ -811,11 +820,11 @@ void vc_decrypt(void){
    infoPosY+=5;
    T1=T2;                           
  }
-
+/*
  c.init(255, 0, 0, false);
  sprintf(textbuf[6],"%d l %d f", cutPs, fields);
  gprint(infoPosX, infoPosY+=10, c.col, textbuf[6]);
-
+  */
 
   delete [] bestDir;
   delete [] dir;

Modified: trunk/2010/2_vc_a.asm
==============================================================================
--- trunk/2010/2_vc_a.asm	(original)
+++ trunk/2010/2_vc_a.asm	Wed Jul  4 10:39:35 2007
@@ -45,7 +45,7 @@ _vc_corr__Fiiiiii:
  xor ebp, ebp
  xor edx, edx
 
- cmp esp, 4
+ cmp esp, byte 4
   jb skip1
 
  mov eax, [esp + esi]
@@ -91,7 +91,7 @@ skip1:
  mov esp, [count2]
  xor edx, edx
 
- cmp esp, 4
+ cmp esp, byte 4
   jb skip2
 
  mov eax, [esp + esi]       
@@ -123,7 +123,7 @@ NoMMXloop2:
  or  eax, 0x80008000         ;  V 1
  xor edx, ecx                ; U
  mov ebx, [esp + edi + 8]    ;  V 1
- add esp, 8                  ; U
+ add esp, byte 8             ; U
   jnc NoMMXloop2             ;  V 1
 
 skip2:
@@ -147,7 +147,6 @@ skip2:
 
 ret
 
-
 _vc_corr_mmx__Fiiiiii:
 ; int3
  push ebx
@@ -162,29 +161,29 @@ _vc_corr_mmx__Fiiiiii:
  mov ebx, edi
  mov ecx, [mcount1]
 
- and ecx, -8
- and ebx, 7
+ and ecx, byte -8
+ and ebx, byte 7
 
  shl ebx, 3
- and edi, -8
+ and edi, byte -8
 
  add esi, ecx
 
  movd mm6, ebx         
  pxor mm7, mm7
 
- xor ebx, -1
+ xor ebx, byte -1
  add edi, ecx
 
- add ebx, 65           
- xor ecx, -1
+ add ebx, byte 65           
+ xor ecx, byte -1
 
  inc ecx
 
  movd mm5, ebx        
  pxor mm4,mm4
 
- cmp ecx, 8
+ cmp ecx, byte 8
   jb skipMMX1
 
  movq mm3, [ecx + edi    ]
@@ -197,24 +196,28 @@ _vc_corr_mmx__Fiiiiii:
  
  por mm1, mm3
 
+align16
 MMXLoop:
- movq mm2, mm0               ; U
- psubusb mm0, mm1            ;  V 1
- movq mm3, [ecx + edi + 8 ]  ; U
- psubusb mm1, mm2            ;  V 1
- por mm1, mm0                ; U
- psrlq mm3, mm6              ;  V 1
- movq mm2, mm1               ; U
- punpcklbw mm1, mm4          ;  V 1
- punpckhbw mm2, mm4          ; U
- paddusw mm7, mm1            ;  V 1
- movq mm1, [ecx + edi + 16]  ; U
- paddusw mm7, mm2            ;  V 1
- movq mm0, [ecx + esi + 8 ]  ; U
- psllq mm1, mm5              ;  V 1
- por mm1, mm3                ; U
- add ecx, 8                  ;  V 1
-  jnc MMXLoop                ; U  1
+ movq mm2, [ecx + esi     ]  ;0p2 wMM0rECXESI   1
+ psubusb mm0, mm1            ; p01wMM0rMM1MM0   0
+ psubusb mm1, mm2            ; p01wMM1rMM2MM1   0
+ movq mm3, [ecx + edi + 8 ]  ;0p2 wMM3rECXEDI   1
+ por mm1, mm0                ; p01wMM1rMM0MM1   0
+ movq mm0, [ecx + esi + 8 ]  ; p2 wMM0rECXESI   1
+ psrlq mm3, mm6              ;0p0 wMM3rMM6      1
+ movq mm2, mm1               ; p01wMM2rMM1      0
+ punpcklbw mm1, mm4          ; p0 wMM1rMM4MM1   1
+ punpckhbw mm2, mm4          ;0p0 wMM2rMM4MM2   10  
+ paddusw mm7, mm1            ; p01wMM7rMM1MM7   1
+ paddusw mm7, mm2            ; p01wMM7rMM2MM7   10
+ movq mm1, [ecx + edi + 16]  ;0p2 wMM1rECXEDI   2
+ psllq mm1, mm5              ; p0 wMM1rMM5      1
+ por mm1, mm3                ; p01wMM1rMM3MM1   1?
+ add ecx, byte 8             ;0p01wECXrECX      1
+  jnc MMXLoop                ; p1     rFLAG
+; 8  4 1 4    7 cyc asy
+; 6+ cyc deco
+; 7 cyc rat
 
 skipMMX1:
 
@@ -224,27 +227,27 @@ skipMMX1:
  mov ebx, edi
  mov ecx, [mcount2]
 
- and ecx, -8
- and ebx, 7
+ and ecx, byte -8
+ and ebx, byte 7
 
  shl ebx, 3
- and edi, -8
+ and edi, byte -8
 
  add esi, ecx
 
  movd mm6, ebx         
 
- xor ebx, -1
+ xor ebx, byte -1
  add edi, ecx
 
- add ebx, 65           
- xor ecx, -1
+ add ebx, byte 65           
+ xor ecx, byte -1
 
  inc ecx
 
  movd mm5, ebx        
 
- cmp ecx, 8
+ cmp ecx, byte 8
   jb skipMMX2
 
  movq mm3, [ecx + edi    ]
@@ -257,24 +260,25 @@ skipMMX1:
  
  por mm1, mm3
 
+align16
 MMXLoop2:
- movq mm2, mm0               ; U
- psubusb mm0, mm1            ;  V 1
- movq mm3, [ecx + edi + 8 ]  ; U
- psubusb mm1, mm2            ;  V 1
- por mm1, mm0                ; U
- psrlq mm3, mm6              ;  V 1
- movq mm2, mm1               ; U
- punpcklbw mm1, mm4          ;  V 1
- punpckhbw mm2, mm4          ; U
- paddusw mm7, mm1            ;  V 1
- movq mm1, [ecx + edi + 16]  ; U
- paddusw mm7, mm2            ;  V 1
- movq mm0, [ecx + esi + 8 ]  ; U
- psllq mm1, mm5              ;  V 1
- por mm1, mm3                ; U
- add ecx, 8                  ;  V 1
-  jnc MMXLoop2               ; U  1
+ movq mm2, [ecx + esi     ]  ;0p2 wMM0rECXESI   1
+ psubusb mm0, mm1            ; p01wMM0rMM1MM0   0
+ psubusb mm1, mm2            ; p01wMM1rMM2MM1   0
+ movq mm3, [ecx + edi + 8 ]  ;0p2 wMM3rECXEDI   1
+ por mm1, mm0                ; p01wMM1rMM0MM1   0
+ movq mm0, [ecx + esi + 8 ]  ; p2 wMM0rECXESI   1
+ psrlq mm3, mm6              ;0p0 wMM3rMM6      1
+ movq mm2, mm1               ; p01wMM2rMM1      0
+ punpcklbw mm1, mm4          ; p0 wMM1rMM4MM1   1
+ punpckhbw mm2, mm4          ;0p0 wMM2rMM4MM2   10  
+ paddusw mm7, mm1            ; p01wMM7rMM1MM7   1
+ paddusw mm7, mm2            ; p01wMM7rMM2MM7   10
+ movq mm1, [ecx + edi + 16]  ;0p2 wMM1rECXEDI   2
+ psllq mm1, mm5              ; p0 wMM1rMM5      1
+ por mm1, mm3                ; p01wMM1rMM3MM1   1?
+ add ecx, byte 8             ;0p01wECXrECX      1
+  jnc MMXLoop2               ; p1     rFLAG
 
 skipMMX2:
 
@@ -302,3 +306,4 @@ ret
 
 
 
+align16

Modified: trunk/2010/makefile
==============================================================================
--- trunk/2010/makefile	(original)
+++ trunk/2010/makefile	Wed Jul  4 10:39:35 2007
@@ -1,7 +1,7 @@
 .SUFFIXES: .c .cc .cpp .h .o .asm .S .s
 
 OPTS = -g
-OPTS = -O3 -mpentium -fomit-frame-pointer -g -Wall -Wstrict-prototypes -malign-double -ffast-math -funroll-loops -fno-default-inline
+OPTS = -O5 -mpentiumpro -fomit-frame-pointer -g -Wall -Wstrict-prototypes -malign-double -ffast-math -funroll-loops -fno-default-inline
 AFLAGS = -f coff
 
 .c.o:   ; gcc $(CFLAGS) -c $<
@@ -14,13 +14,15 @@ AFLAGS = -f coff
 
 .asm.o: ; nasm $(AFLAGS) $<
 
-O = 2010.o 2_file.o 2_grafix.o 2_hw.o 2_gfunc.o \
-    2_hw_asm.o 2_71x6.o 2_hw_mem.o 2_crypt.o\
-    2_glob_a.o 2_txt.o 2_cryp_a.o
+O = 2_glob_a.o 2_cryp_a.o 2_hw_a.o\
+    2010.o 2_file.o 2_grafix.o 2_hw.o 2_gfunc.o \
+    2_hw_asm.o 2_71x6.o 2_hw_mem.o 2_crypt.o 2_txt.o 
+    
 
-OC = 2010.o 2_file.o 2_grafix.o 2_hw.o 2_gfunc.o \
-     2_hw_asm.o 2_71x6.o 2_hw_mem.o 2_crypt.o 2_vc.o 2_nag.o\
-     2_nag_a.o 2_vc_a.o 2_glob_a.o 2_txt.o 2_cryp_a.o
+OC = 2_nag_a.o 2_vc_a.o 2_glob_a.o 2_cryp_a.o 2_hw_a.o\
+     2010.o 2_file.o 2_grafix.o 2_hw.o 2_gfunc.o \
+     2_hw_asm.o 2_71x6.o 2_hw_mem.o 2_crypt.o 2_vc.o 2_nag.o 2_txt.o 
+     
 
 default: ;$(MAKE) 2010 CFLAGS="$(OPTS)"
 
@@ -36,7 +38,7 @@ clean: ; del $(O) 2010
 2_gfunc.o  : 2_gfunc.cpp  2_all.h 2_gfunc.h 2010.h 
 2_grafix.o : 2_grafix.cpp 2_all.h 2_grafix.h 2010.h 2_gfunc.h 2_hw.h 2_file.h
 2_hw.o     : 2_hw.cpp     2_all.h 2_hw.h 2010.h 2_hw_asm.h 2_hw_mem.h\
-                             2_71x6.h 2_mmx.h
+                             2_71x6.h 2_mmx.h 2_hw_a.h
 2_hw_mem.o : 2_hw_mem.cpp 2_all.h 2_hw_mem.h 2010.h 
 2_71x6.o   : 2_71x6.cpp   2_all.h 2_71x6.h 2_hw.h 2_hw_mem.h 2010.h 2_hw_asm.h
 2_crypt.o  : 2_crypt.cpp  2_all.h 2_crypt.h 2_71x6.h 2_vc.h 2_nag.h 2_txt.h
@@ -51,4 +53,5 @@ clean: ; del $(O) 2010
 2_nag_a.o  : 2_nag_a.asm  2_all_a.asm
 2_vc_a.o   : 2_vc_a.asm   2_all_a.asm
 2_cryp_a.o : 2_cryp_a.asm 2_all_a.asm
+2_hw_a.o   : 2_hw_a.asm   2_all_a.asm
 



More information about the Mndiff-dev mailing list