[MN-dev] [mndiff]: r52 - in trunk/2010: 2010.cc 2_71x6.cc 2_all.h 2_all_a.asm 2_crypt.cc 2_file.cc 2_grafix.cc 2_nag.cc 2_nag_a.asm 2_vc.cc 2_vc_a.asm 2_vc_a.h 2d

michael subversion at mplayerhq.hu
Wed Jul 4 09:25:09 CEST 2007


Author: michael
Date: Wed Jul  4 09:25:09 2007
New Revision: 52

Log:
next version (unknown date)


Modified:
   trunk/2010/2010.cc
   trunk/2010/2_71x6.cc
   trunk/2010/2_all.h
   trunk/2010/2_all_a.asm
   trunk/2010/2_crypt.cc
   trunk/2010/2_file.cc
   trunk/2010/2_grafix.cc
   trunk/2010/2_nag.cc
   trunk/2010/2_nag_a.asm
   trunk/2010/2_vc.cc
   trunk/2010/2_vc_a.asm
   trunk/2010/2_vc_a.h
   trunk/2010/2d

Modified: trunk/2010/2010.cc
==============================================================================
--- trunk/2010/2010.cc	(original)
+++ trunk/2010/2010.cc	Wed Jul  4 09:25:09 2007
@@ -32,10 +32,11 @@
  extern int active_meteor;
  extern int yuvmode;
  extern bool mmx;
- extern int *best1;
- extern BEST2 *best2;
- extern u_short *keysList;
- extern int *decoLine;
+// extern short *linesPerKeyTab;
+// extern int *best1;
+// extern BEST2 *best2;
+// extern u_short *keysList;
+// extern int *decoLine;
  extern int bright, contr, satur;
  extern int showCont;
  extern CRYPTSTD cryptStd;
@@ -44,6 +45,7 @@
  extern bool helpState;
 
  int maxBuf=2;
+ int some=0;
 
 int _crt0_startup_flags = _CRT0_FLAG_NEARPTR | _CRT0_FLAG_NONMOVE_SBRK;
  int grabf;
@@ -111,6 +113,7 @@ int error(error_code e){
                             "         -m n (Meteor Card) in case there are more than 1 (:\n"
                             "         -NoFlip (Disable Page-Fliping)\n"
                             "         -NoMMX  (Disable MMX-Support)\n"
+                            "         -MaxBuf (num of Buffers)\n"
                             "         -? / -h (This stuff)\n");
                      break;
   default          : printg_t("\nUnknown error!");
@@ -122,10 +125,6 @@ int error(error_code e){
 
 void exitt(void){
  close_hw();
- delete [] decoLine;
- delete [] best1;
- delete [] best2;
- delete [] keysList;
 }
 
 int main(int argc, char **argv){
@@ -333,11 +332,9 @@ int main(int argc, char **argv){
 
 
 
-   if(kbhit()){
+   while(kbhit()){
      int key= getkey();
 
-     while(kbhit()) getkey();
-
      switch(menuLevel){
        case mLNorm : switch(key){
                        case 'g'  :
@@ -392,6 +389,10 @@ int main(int argc, char **argv){
                        break;
                        case '2'  : delay--; if(delay<0) delay=0;
                        break;
+                       case '3'  : some++;
+                       break;
+                       case '4'  : some--; if(some<0) some=0;
+                       break;
                      }
        break;
        case mLTVStd : switch(key){

Modified: trunk/2010/2_71x6.cc
==============================================================================
--- trunk/2010/2_71x6.cc	(original)
+++ trunk/2010/2_71x6.cc	Wed Jul  4 09:25:09 2007
@@ -17,6 +17,7 @@
 //#define wait_time 0.010
 #define wait_time 0.020
 
+//#define NoColorDeco
 
 extern meteor meteors[8];
 extern __Go32_Info_Block _go32_info_block;
@@ -48,8 +49,8 @@ extern int vgax, vgay;
 int v_smooth=1;
 
 int bright=0x80, satur=0x40, contr=0x40;
-//int scales_x=0x03, scales_y= 0x10;               3 / 574 / 3 nag 800600
-//int scalee_x=0x03, scalee_y= 0x12;               
+//int scales_x=0x03, scales_y= 0x11;           
+//int scalee_x=0x03, scalee_y= 0x11;               
 int scales_x=0x03, scales_y= 0x11;           
 int scalee_x=0x03, scalee_y= 0x11;               
 
@@ -521,7 +522,11 @@ void init_meteor(void){
  write_saa7196(0x05, 0xF4);/* 7:0  Horizontal Sync Start after PH1 for 50hz */
 
 
+#ifndef NoColorDeco
  write_saa7196(0x06, 0x46);             //46
+#else
+ write_saa7196(0x06, 0xC6);             //46
+#endif
                            /*   7  Input mode =0 CVBS, =1 S-Video
                                 6  Pre filter
                               5:4  Aperture Bandpass characteristics
@@ -541,9 +546,12 @@ void init_meteor(void){
                                 2  S-VHS bit
                                 1  GPSW2
                                 0  GPSW1                                   */
-write_saa7196(0x10, 0x00);   //BAK
 //write_saa7196(0x10, 0x02);   
-// write_saa7196(0x10, 0x04);
+#ifndef NoColorDeco
+ write_saa7196(0x10, 0x00);
+#else
+ write_saa7196(0x10, 0x04);
+#endif
                            /*   2  Select HREF Position
                               1:0  Vertical noise reduction                */
  write_saa7196(0x11, 0x2C);/* 7:0  Chrominance gain conrtol for QAM        */

Modified: trunk/2010/2_all.h
==============================================================================
--- trunk/2010/2_all.h	(original)
+++ trunk/2010/2_all.h	Wed Jul  4 09:25:09 2007
@@ -10,6 +10,8 @@
 #define max(a,b)   ((a)>(b) ? (a) : (b))
 #define min(a,b)   ((a)<(b) ? (a) : (b))
 
+#define minmax(a,b,c)   ((a)>(b) ? (a) : ( (b)<(c) ? (b) : (c) ))
+
 #define limit(a,b,c) if( (a)<(b) || (a)>(c) ) error(-201)
 
 
@@ -25,6 +27,7 @@
 struct COL{
   long yuv;
   short rgb16;
+  byte r, g, b;
 };
 
 /*
@@ -65,12 +68,15 @@ class color{
  public:
    COL col;
    color(void) { };
-   void init(byte y, byte u, byte v, byte yuv_mode){
+   void init(int y, int u, int v, byte yuv_mode){       // fix (yuv_mode==0)
      col.yuv=   u | (y<<8) | (v<<16) | (y<<24);
-     const int r= (int( y + 1.375   *v             )>>3) & 0x1F; 
-     const int g= (int( y - 0.703125*v - 0.34375*u )>>2) & 0x1F; 
-     const int b= (int( y              + 1.734375*u)>>3) & 0x1F;
-     col.rgb16= b | (g<<5) | (r<<11);
+     col.r=minmax(0, int( y + 1.375   *v             ), 255);
+     col.g=minmax(0, int( y - 0.703125*v - 0.34375*u ), 255);
+     col.b=minmax(0, int( y              + 1.734375*u), 255);
+     const int r5= ((col.r>>3) & 0x1F);
+     const int g6= ((col.g>>2) & 0x3F); 
+     const int b5= ((col.b>>3) & 0x1F);
+     col.rgb16= b5 | (g6<<5) | (r5<<11);
    };
 } ;
 

Modified: trunk/2010/2_all_a.asm
==============================================================================
--- trunk/2010/2_all_a.asm	(original)
+++ trunk/2010/2_all_a.asm	Wed Jul  4 09:25:09 2007
@@ -1,13 +1,19 @@
 ; 2010 0.1 Copyright (C) Michael Niedermayer 1998
 
-%define par1 (esp + 28)
-%define par2 (esp + 32)
-%define par3 (esp + 36)
-%define par4 (esp + 40)
-%define par5 (esp + 44)
-%define par6 (esp + 48)
-%define par7 (esp + 52)
-%define par8 (esp + 56)
+%define par_5 (esp + 04)
+%define par_4 (esp + 08)
+%define par_3 (esp + 12)
+%define par_2 (esp + 16)
+%define par_1 (esp + 20)
+%define par0  (esp + 24)
+%define par1  (esp + 28)
+%define par2  (esp + 32)
+%define par3  (esp + 36)
+%define par4  (esp + 40)
+%define par5  (esp + 44)
+%define par6  (esp + 48)
+%define par7  (esp + 52)
+%define par8  (esp + 56)
 
 ;%define int3 db 0xCC
 

Modified: trunk/2010/2_crypt.cc
==============================================================================
--- trunk/2010/2_crypt.cc	(original)
+++ trunk/2010/2_crypt.cc	Wed Jul  4 09:25:09 2007
@@ -18,17 +18,17 @@ extern volatile bool oneField;
 extern int satur;
 
 static void vSmooth(void);
-static void decomb(void);
+static void decomb(int q);
 
 bool vSmoothState=true;
-int show_points=0;
+bool showPoints=false;
 
  asm("__esp: .long 0\n\t");
 
 void decrypt(void){
 
- if( cryptStd == nag && satur!=0 ) decomb();
-// decomb();
+ if( cryptStd == nag && satur!=0 ) decomb(0);
+ if( cryptStd == vc  && satur!=0 ) decomb(1);
 
  if(cryptStd==vc)       vc_decrypt();
  else if(cryptStd==nag) nag_decrypt();
@@ -38,8 +38,7 @@ void decrypt(void){
 }
 
 
-static void decomb(void){
- int segs=1;
+static void decomb(int q){
  int linep, llinep, line, i;
  int j, k;
  int temp[max_x];
@@ -48,32 +47,43 @@ static void decomb(void){
  const int stride=oneField ? (vgax<<1) : (vgax<<2);
  const int o=oneField ? 0 : 1;
  const int end=oneField ? outy : outy1;
- const int segSize= end/segs;
 
-   int startp= 0;
-   for(int seg=0; seg<segs; seg++){
+ for(i=0; i<wndx; i++){
+   temp[i]=0;
+ }
 
+ if(q==1){
+   linep= stride*128;
+   if(o) linep+=vgax<<1;
+   for(line=128; line>=0; line--){
      for(i=0; i<wndx; i++){
-       temp[i]=0;
+
+       j = char(actVid2MemBufp->b[linep  + (i<<1)]);
+       j=(j + j) - temp[i];
+       if(j<temp[i] && (line&2)) j++;
+       if     (j> 127) j= 127;
+       else if(j<-128) j=-128;
+       temp[i] = j;
      }
+     linep-=stride;
+   }
+ }
 
-     linep= startp;
-     if(o) linep+=vgax<<1;
-     for(line=0; line<segSize; line++){
-       for(i=0; i<wndx; i++){
+ linep= 0;
+ if(o) linep+=vgax<<1;
+ for(line=0; line<end; line++){
+   for(i=0; i<wndx; i++){
 
-         j = char(actVid2MemBufp->b[linep  + (i<<1)]);
-         j=(j + j) - temp[i];
-         if(j<temp[i]) j++;
-         if     (j> 127) j= 127;
-         else if(j<-128) j=-128;
-         temp[i] = j;
-         actVid2MemBufp->b[linep  + (i<<1)] = (char)(j);
-       }
-       linep+=stride;
-     }
-     startp+= segSize*stride;
+     j = char(actVid2MemBufp->b[linep  + (i<<1)]);
+     j=(j + j) - temp[i];
+     if(j<temp[i] && (line&2)) j++;
+     if     (j> 127) j= 127;
+     else if(j<-128) j=-128;
+     temp[i] = j;
+     actVid2MemBufp->b[linep  + (i<<1)] = (char)(j);
    }
+   linep+=stride;
+ }
 
 /*
  asm(//"int $3                    \n\t"

Modified: trunk/2010/2_file.cc
==============================================================================
--- trunk/2010/2_file.cc	(original)
+++ trunk/2010/2_file.cc	Wed Jul  4 09:25:09 2007
@@ -10,13 +10,12 @@
  extern int vgax, vgay, wndx, wndy;
  extern int fysize;
  extern byte font[4096];
- extern VID2MEMBUF actVid2MemBufp;
+ extern VID2MEMBUF *actVid2MemBufp;
  extern int g_mode;
  extern int yuvmode;
  extern char *yuv2rgb_lut;
 
 void grab(void){
-/*
  int i, j;
  FILE *fg;
  char grab_name[16];
@@ -47,24 +46,42 @@ void grab(void){
 
  fwrite(&bmpi, sizeof(bmpi), 1, fg);
  for(i=wndy-1; i>=0; i--){
-   for(j=0; j<wndx; j++){
-     if(yuvmode==0) fwrite((void*)(&actVid2MemBufp.b[(j + i*vgax)<<2]), 1, 3, fg);
-     else {
-       if(j>=wndx>>1) break;
+   if(yuvmode==0){
+     for(j=0; j<wndx; j++){
+/*       const int b=  (actVid2MemBufp->b[ (j + i*vgax)<<1   ] & 0x1F) << 3;
+       const int g= ((actVid2MemBufp->b[ (j + i*vgax)<<1   ] & 0xE0) >> 3)
+                   +((actVid2MemBufp->b[((j + i*vgax)<<1)+1] & 0x07) << 5);
+       const int r=   actVid2MemBufp->b[((j + i*vgax)<<1)+1] & 0xF8;
+*/
+       const int b=  (actVid2MemBufp->b[ (j + i*vgax)<<1   ] & 0x1F) << 3;
+       const int g= ((actVid2MemBufp->b[ (j + i*vgax)<<1   ] & 0xE0) >> 2)
+                   +((actVid2MemBufp->b[((j + i*vgax)<<1)+1] & 0x03) << 6);
+       const int r=  (actVid2MemBufp->b[((j + i*vgax)<<1)+1] & 0x7C) << 1;
 
-       mem2vid(fwbuf, (s_yuv*)(&actVid2MemBufp.b[(j + (i*vgax>>1))<<2]), 2, 32);
+       const int bgr= b + (g<<8) + (r<<16);
 
-       fwbuf[3]=fwbuf[4];
-       fwbuf[4]=fwbuf[5];
-       fwbuf[5]=fwbuf[6];
+       fwrite(&bgr, 1, 3, fg);
+     }
+   }
+   else{
+     for(j=0; j<wndx; j+=2){
+       const int u = char(actVid2MemBufp->b[ ((j + i*vgax)<<1)     ]);
+       const int y1=      actVid2MemBufp->b[ ((j + i*vgax)<<1) + 1 ];
+       const int v = char(actVid2MemBufp->b[ ((j + i*vgax)<<1) + 2 ]);
+       const int y2=      actVid2MemBufp->b[ ((j + i*vgax)<<1) + 3 ];
+       color c[2];
+       c[0].init(y1, u, v, 1);
+       c[1].init(y2, u, v, 1);
+       byte bgr2[6]={ c[0].col.b, c[0].col.g, c[0].col.r,
+                      c[1].col.b, c[1].col.g, c[1].col.r };
 
-       fwrite(fwbuf, 1, 6, fg);
+       fwrite(bgr2, 1, 6, fg);
      }
    } 
  }
 
  fclose(fg);
-  */
+
 }
 
 long fsize(FILE *f){

Modified: trunk/2010/2_grafix.cc
==============================================================================
--- trunk/2010/2_grafix.cc	(original)
+++ trunk/2010/2_grafix.cc	Wed Jul  4 09:25:09 2007
@@ -22,6 +22,7 @@
  extern MENULEVEL menuLevel;
  extern int infoPosY;
  extern VID2MEMBUF *vid2MemBuf;
+ extern int some;
 
  bool helpState=true;
  bool iState=false;
@@ -78,13 +79,20 @@ void showStuff(void){
    gprint(10, infoPosY+=10, c.col, textbuf);
 
    sprintf(textbuf,"%d Buffers", vid2MemBuf[0].num);
-   gprint(150, infoPosY   , c.col, textbuf);
+   gprint(120, infoPosY   , c.col, textbuf);
+
+   if(yuvmode==0) sprintf(textbuf,"RGB16");
+   else           sprintf(textbuf,"YUV422");
+   gprint(220, infoPosY   , c.col, textbuf);
 
    sprintf(textbuf,"%d Corrupted Fields", corr_errors);
    gprint(10, infoPosY+=10, c.col, textbuf);
 
    sprintf(textbuf,"%d Address error's", addr_errors);
    gprint(10, infoPosY+=10, c.col, textbuf);
+
+   sprintf(textbuf,"%d Some", some);
+   gprint(10, infoPosY+=10, c.col, textbuf);
    infoPosY+=5;
  }
 

Modified: trunk/2010/2_nag.cc
==============================================================================
--- trunk/2010/2_nag.cc	(original)
+++ trunk/2010/2_nag.cc	Wed Jul  4 09:25:09 2007
@@ -1,5 +1,7 @@
 //2010 0.1 Copyright (C) Michael Niedermayer 1998
 
+//FIX search multiply (use deltas)
+
 #include <string.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -21,18 +23,19 @@ extern vgax, vgay, wndx, wndy, outy, y_f
 extern VID2MEMBUF *actVid2MemBufp;
 extern volatile int scales_x, scalee_x;
 extern volatile int scales_y, scalee_y;
-extern int one_field;
 extern u_long asm_temp, asm_temp4;
 extern bool iState;
 extern int yuvmode;
 extern bool mmx;
 
-int *decoLine=NULL;
-int decoLines;
-int keyTable[256];
-int *best1=NULL;
-BEST2 *best2=NULL;           
-u_short *keysList=NULL;
+static int alg2Lines;
+static short *linesPerKeyTab=NULL;
+static int *decoLine=NULL;
+static int decoLines;
+static int keyTable[256];
+static int *best1=NULL;
+static BEST2 *best2=NULL;           
+static u_short *keysList=NULL;
 static int outy1;
 static int nagStart2;
 static int nagEnd2;
@@ -154,11 +157,15 @@ static inline void doDwnSmp(byte * const
 
 static inline int nagLineLogi2Phys(const int logi){
   int phys;
+#ifdef CHECK
   if(logi>=287) error(-100);
+#endif
 
   if(logi<32) phys= nagStart1 + ( logi    <<1);
   else        phys= nagStart2 + ((logi-32)<<1);
+#ifdef CHECK
   if(phys>=576) error(-101);
+#endif
   return phys;
 }
 
@@ -244,8 +251,8 @@ static inline void mod4Fix(byte *p, cons
             "c" (-wndx<<2) 
           : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi");
 
-
-  /*
+    /*
+  
 
   const bool xchg[4]={false, true, false, true};
   const int  neg [4]={    0,    0,    -1,   -1};
@@ -268,33 +275,44 @@ static inline void mod4Fix(byte *p, cons
       p[x+2]= (v^neg[type]) - neg[type];
 
     }
-  } */
-}
+  }   */
+}     
 
-void corrV(byte * p1, byte * p2, const int type, int *vPhase){
+static inline void corrV(byte * p1, byte * p2, const int type, int *vPhase){
   const bool xchg[4]={false, true, false, true};
   const int  neg [4]={    0,    0,    -1,   -1};
 
-  for(int x=0; x<(wndx<<1); x+=4){
-    int u1= char(p1[x  ]);            // perhaps u><v
-    int v1= char(p1[x+2]);
+  if(xchg[type]){
 
-    int u2= char(p2[x  ]);            // perhaps u><v
-    int v2= char(p2[x+2]);
+    for(int x=0; x<(wndx<<1); x+=32){        // 32 FASTER until asm
+      int v1= char(p1[x  ]);            
+      int u1= char(p1[x+2]);
 
-    if(xchg[type]){
-      const int t=u1;
-      u1=v1;
-      v1=t;
+      const int u2= char(p2[x  ]);            
+      const int v2= char(p2[x+2]);
+
+      u1=(u1^neg[type]) - neg[type];
+      v1=(v1^neg[type]) - neg[type];
+   
+      *vPhase+= mabs(u1-u2) + mabs(v1-v2);
+      *vPhase-= mabs(u1+u2) + mabs(v1+v2);
     }
+  }
+  else{
+    for(int x=0; x<(wndx<<1); x+=32){          // 32 FASTER until asm
+      int u1= char(p1[x  ]);            
+      int v1= char(p1[x+2]);
 
-    u1=(u1^neg[type]) - neg[type];
-    v1=(v1^neg[type]) - neg[type];
+      const int u2= char(p2[x  ]);            
+      const int v2= char(p2[x+2]);
 
-    *vPhase+= mabs(u1-u2) + mabs(v1-v2);
-    *vPhase-= mabs(u1+u2) + mabs(v1+v2);
+      u1=(u1^neg[type]) - neg[type];
+      v1=(v1^neg[type]) - neg[type];
+   
+      *vPhase+= mabs(u1-u2) + mabs(v1-v2);
+      *vPhase-= mabs(u1+u2) + mabs(v1+v2);
+    }
   }
-
 }
 
 void nag_decrypt(void){
@@ -338,17 +356,21 @@ void nag_decrypt(void){
    decoLine[12]=213;
    decoLine[13]=222;
 
+   alg2Lines=80;
+
    FILE *f= fopen("key.txt", "r");
    if(f==NULL) error(KeyFile);
    for(int i=0; i<256; i++)
      if( fscanf(f, "%d", &keyTable[i])!=1 ) error(KeyFile);
    fclose(f);
-     
+
    f=fopen("2010.dat", "rb");
    if(f!=NULL){
-     int decoLines2;
+     int decoLines2, alg2Lines2;
      fread(&decoLines2,   sizeof(int), 1, f);
+     fread(&alg2Lines2,   sizeof(int), 1, f);
      if(decoLines2!=decoLines) goto badFile;
+     if(alg2Lines2!=alg2Lines) goto badFile;
 
      int decoLine2[NAG_LINES];
      fread(decoLine2,     sizeof(int), decoLines2, f);
@@ -356,16 +378,20 @@ void nag_decrypt(void){
 
      int best2Size;
      int keysListSize;
-     fread(&best2Size,    sizeof(int), 1, f);
-     fread(&keysListSize, sizeof(int), 1, f);
+     int linesPerKeyTabSize;
+     fread(&best2Size,          sizeof(int), 1, f);
+     fread(&keysListSize,       sizeof(int), 1, f);
+     fread(&linesPerKeyTabSize, sizeof(int), 1, f);
 
-     best1=    new int[decoLines * NAG_LINES];
-     best2=    new BEST2[best2Size];
-     keysList= new u_short[keysListSize];
+     best1=          new int[decoLines * NAG_LINES];
+     best2=          new BEST2[best2Size];
+     keysList=       new u_short[keysListSize];
+     linesPerKeyTab= new short[linesPerKeyTabSize];
 
-     fread(best1,    sizeof(int),      decoLines * NAG_LINES, f);
-     fread(best2,    sizeof(BEST2),    best2Size            , f);
-     fread(keysList, sizeof(u_short),  keysListSize         , f);
+     fread(best1,          sizeof(int),      decoLines * NAG_LINES, f);
+     fread(best2,          sizeof(BEST2),    best2Size            , f);
+     fread(keysList,       sizeof(u_short),  keysListSize         , f);
+     fread(linesPerKeyTab, sizeof(short),    linesPerKeyTabSize   , f);
 
      badFile:;
        fclose(f);
@@ -463,18 +489,74 @@ void nag_decrypt(void){
      delete [] keysTempStart;
      delete [] state;
 
+     const int startW= (NAG_LINES-alg2Lines)>>1;
+     const int endW=   (NAG_LINES+alg2Lines)>>1;
+     int linesPerKeyTabSize=0;
+     int minLines=1000;
+     for(int key=0; key<KEYS; key++){
+       int perm[NAG_LINES];
+       getPerm(key, perm);
+       int lines=0;
+       for(int clearLine=1; clearLine<NAG_LINES; clearLine++){
+         int l1=perm[clearLine  ];
+         int l2=perm[clearLine-1];
+         if( l1 >= startW && l1 < endW && l2 >= startW && l2 < endW){
+           linesPerKeyTabSize++;
+           lines++;
+         }
+       }
+       if(lines<minLines) minLines=lines;  // linesPerKeyTabSize bigger then nes.
+     }
+     linesPerKeyTab= new short[linesPerKeyTabSize+1];
+
+     printf("minLines %d\n", minLines);
+
+     int linesPerKeyTabP=0;
+     for(int key=0; key<KEYS; key++){
+       int perm[NAG_LINES];
+       getPerm(key, perm);
+       int lines=0;
+       for(int clearLine=1; clearLine<NAG_LINES; clearLine++){
+         int l1=perm[clearLine  ];
+         int l2=perm[clearLine-1];
+         if( l1 >= startW && l1 < endW && l2 >= startW && l2 < endW){
+           if(l1>l2){
+             int lt=l1;
+             l1=l2;
+             l2=lt;
+           }
+           l1-=startW;
+           l2-=startW;
+           l1*=alg2Lines;
+           linesPerKeyTab[linesPerKeyTabP]=l1+l2;
+           linesPerKeyTabP++;
+           lines++;
+           if(lines>=minLines) break;
+         }
+       }
+       linesPerKeyTab[linesPerKeyTabP]|=1<<15;
+
+     }
+     linesPerKeyTab[linesPerKeyTabP]=-1;
+
+     linesPerKeyTabSize=linesPerKeyTabP+1;
+
      f=fopen("2010.dat", "wb");
      if(f==NULL) error(DatWrite);
 
      fwrite(&decoLines,    sizeof(int), 1, f);
+     fwrite(&alg2Lines,    sizeof(int), 1, f);
+
      fwrite(decoLine,      sizeof(int), decoLines, f);
 
-     fwrite(&best2Size,    sizeof(int), 1, f);
-     fwrite(&keysListSize, sizeof(int), 1, f);
+     fwrite(&best2Size,          sizeof(int), 1, f);
+     fwrite(&keysListSize,       sizeof(int), 1, f);
+     fwrite(&linesPerKeyTabSize, sizeof(int), 1, f);
 
-     fwrite(best1,    sizeof(int),      decoLines * NAG_LINES, f);
-     fwrite(best2,    sizeof(BEST2),    best2Size            , f);
-     fwrite(keysList, sizeof(u_short),  keysListSize         , f);
+     fwrite(best1,          sizeof(int),      decoLines * NAG_LINES, f);
+     fwrite(best2,          sizeof(BEST2),    best2Size            , f);
+     fwrite(keysList,       sizeof(u_short),  keysListSize         , f);
+     fwrite(linesPerKeyTab, sizeof(short),    linesPerKeyTabSize   , f);
 
      fclose(f);
 
@@ -535,7 +617,9 @@ void nag_decrypt(void){
      bestCoeffs[dL].line[1]= bestCoeffs[dL].line[0];
      bestCoeffs[dL].line[0]= t;
    }
+#ifdef CHECK
    limit(bestCoeffs[dL].line[0], 0, 285);
+#endif
 
    int l= best1[ bestCoeffs[dL].line[0]     + dL*NAG_LINES ];
    int r= best1[ bestCoeffs[dL].line[0] + 1 + dL*NAG_LINES ];
@@ -598,16 +682,77 @@ void nag_decrypt(void){
 
  delete [] keysListPos;
 
- int perm[NAG_LINES];
- getPerm(bestKey, perm);
- printf("%X\n", bestKey);
-
  if(iState){
    T2=uclock();
    sprintf(textbuf[3],"%f FindKey", (float)(T2-T1)/UCLOCKS_PER_SEC);
    T1=T2;
  }
 
+ if(bestNum<3){                                     
+   short *coeff= new short[alg2Lines*alg2Lines];
+
+   byte *lowRes= new byte[alg2Lines*wndx>>dwnSmpSize];
+
+   const int startW= (NAG_LINES-alg2Lines)>>1;
+   const int endW=   (NAG_LINES+alg2Lines)>>1;
+
+   for(int line=0; line<alg2Lines; line++){
+     const int physLine= nagLineLogi2Phys(line+startW);
+     byte * const linep= actVid2MemBufp->b + physLine*(vgax<<1);
+     byte * const lowResp= lowRes + line*(wndx>>dwnSmpSize);
+     doDwnSmp(lowResp ,linep);
+   }
+
+   int coeffp=0;
+   int line0p=0;
+   for(int line0=0; line0<alg2Lines; line0++){
+     int line1p=0;
+     for(int line1=line0; line1<alg2Lines; line1++){
+       BESTCOEFFS out;
+       out.coeff[0]=
+       out.coeff[1]=100000;
+       nagraCorr(int(line0p), int(line1p), wndx>>dwnSmpSize, int(&out), 0);
+       coeff[coeffp + line1]=short(out.coeff[0]);
+       line1p+= wndx>>dwnSmpSize;
+     }
+     coeffp+= alg2Lines;
+     line0p+= wndx>>dwnSmpSize;
+   }
+   if(mmx) asm("emms\n\t");
+
+   int linesPerKeyTabP=0;
+   bestNum=100000;
+   int key=0;
+   int num=0;
+   for(;;){
+     if(linesPerKeyTab[linesPerKeyTabP]==-1) break;
+     num+= coeff[ linesPerKeyTab[linesPerKeyTabP] & !(1<<15) ];
+
+     if(linesPerKeyTab[linesPerKeyTabP] & (1<<15) ){
+       if(num<bestNum){
+         bestNum=num;
+         bestKey=key;
+         num=0;
+       }
+       key++;
+     }
+     linesPerKeyTabP++;
+   }
+
+   delete [] lowRes;
+   delete [] coeff;
+ }
+
+ if(iState){
+   T2=uclock();
+   sprintf(textbuf[4],"%f Algo2", (float)(T2-T1)/UCLOCKS_PER_SEC);
+   T1=T2;
+ }
+
+ int perm[NAG_LINES];
+ getPerm(bestKey, perm);
+ printf("%X\n", bestKey);
+
  delete [] bestCoeffs;   // kill this
 
 
@@ -616,7 +761,7 @@ void nag_decrypt(void){
  int lastVType[2];
  int vPhase=0;
  for(int i=0; i<NAG_LINES; i++){
-   if(perm[i]>=32 && i-perm[i]<=0){
+   if(perm[i]>=32 && i-perm[i]<=-16){          // fewer to test (FASTER)
      const cleMod4= (i      ) & 3;
      const encMod4= (perm[i]) & 3;
      const type= cleMod4 ^ encMod4;
@@ -767,7 +912,7 @@ void nag_decrypt(void){
 
  if(iState){
    T2=uclock();
-   sprintf(textbuf[4],"%f ReOrder", (float)(T2-T1)/UCLOCKS_PER_SEC);
+   sprintf(textbuf[6],"%f ReOrder", (float)(T2-T1)/UCLOCKS_PER_SEC);
    T1=T2;
  }
 
@@ -785,8 +930,8 @@ void nag_decrypt(void){
    gprint(110, 110, c.col, textbuf[3]);
    gprint(120, 120, c.col, textbuf[4]);
    gprint(130, 130, c.col, textbuf[5]);
-/*   gprint(140, 140, c.col, textbuf[6]);
-   gprint(150, 150, c.col, textbuf[7]);
+   gprint(140, 140, c.col, textbuf[6]);
+/*   gprint(150, 150, c.col, textbuf[7]);
    gprint(160, 160, c.col, textbuf[8]);
    gprint(170, 170, c.col, textbuf[9]);*/
    T1=T2;

Modified: trunk/2010/2_nag_a.asm
==============================================================================
--- trunk/2010/2_nag_a.asm	(original)
+++ trunk/2010/2_nag_a.asm	Wed Jul  4 09:25:09 2007
@@ -58,7 +58,7 @@ _nagraCorr__Fiiiii:
  test eax, eax
   jz NEAR NoMMX
 
- movq mm3, [mmx_zero]
+ pxor mm3, mm3
 
  movq mm0, [esp + esi]
  movq mm5, mm3
@@ -200,5 +200,4 @@ ret
 xyz times 8 dd 0
 
 align8
-mmx_zero times 2 dd 0
 mmx_clip times 8 db 0

Modified: trunk/2010/2_vc.cc
==============================================================================
--- trunk/2010/2_vc.cc	(original)
+++ trunk/2010/2_vc.cc	Wed Jul  4 09:25:09 2007
@@ -3,119 +3,266 @@
 #include <stdio.h>
 #include <time.h>
 #include <string.h>
+#include <math.h>
 #include "2_all.h"
 #include "2_crypt.h"
 #include "2_vc.h"
 #include "2_71x6.h"
 #include "2_gfunc.h"
 #include "2_vc_a.h"
+#include "2_hw_mem.h"
+#include "2010.h"
+                      
+#define FREQ_SMP       2.1e7
+#define FREQ_CHROM     4433618.75
+#define FREQ_HS        15625.0
+#define FREQ_PIX       (FREQ_HS * 1888.0 * 0.5)
+#define POINT_LAND     765.0
+#define NO_POINT_LAND1 145.0              // GUESSED! 
+#define NO_POINT_LAND2 145.0              // GUESSED! 
+#define NO_POINT_LAND3 10.0               // GUESSED!
+ // ? POINT_LAND + NO_POINT_LAND(1|2) = 910   (FIFO SIZE)
+ // ? NO_POINT_LAND + POINT_LAND = 1065       (PHASE ~+-60deg)
 
-#define addap_start 16
-#define addap_end 2
-#define vc_dead 7.5
-#define check 1
+ // 142 142 16
+ // 143 143 14
+ // 144 144 12
+ // 145 145 10
 
-extern vgax, vgay, wndx, wndy;
+#define NO_POINT_LAND  (NO_POINT_LAND1 + NO_POINT_LAND2 + NO_POINT_LAND3)
+#define PHASE_POINT    (fmod(FREQ_CHROM / (FREQ_SMP/3.0), 1.0) * PI * 2.0)
+#define PHASE_SMP      (fmod(FREQ_CHROM / (FREQ_SMP    ), 1.0) * PI * 2.0)
+#define PHASE_SEG      (fmod(FREQ_CHROM / FREQ_SMP \
+                          * (POINT_LAND + NO_POINT_LAND), 1.0) * PI * 2.0)
+
+
+#define FIX_POINT 10
+#define STEP_START 16
+#define STEP_END 2
+#define VC_DEAD 7.5
+#define DWN_SMP 1
+#define MAX_DRIFT 2
+#define PHASE_CMP_STEP 3
+#define BAD_THRESHOLD1 0.8
+#define BAD_THRESHOLD2 200
+#define PHASE_DIFF_TH 35
+
+#define NUM_PHASE_SMP (vcDiff>>PHASE_CMP_STEP)
+
+extern int vgax, vgay, wndx, wndy, x_field;
 extern VID2MEMBUF *actVid2MemBufp;
 extern volatile int scales_x, scalee_x;
 extern volatile int scales_y, scalee_y;
-extern int one_field;
-extern int show_points;
+extern bool showPoints;
 extern bool iState;
 extern int yuvmode;
-extern int edge_lut[256];
-
+extern int some;
+extern bool mmx;
 
-static int *new_val=new int[max_x], *last_val=new int[max_x];
+// FIX ABS ??
 
-static byte copy_buff[max_x<<1];
+static inline float atan3(const float f1, const float f2){
+  float out;
+                     // ? don't know what atan2 does ( crashes )
+  asm(//"int $3\n\t"
+      "fpatan \n\t"
+      : "=t"(out)
+      : "0"(f2), "u"(f1)
+      : "st(1)"
+                          );
 
+  return out;
+}
 
 void vc_decrypt(void){
- int i, j, k=0, l;
- int line, cutp, diff, linep, llinep, add_step, start_step;
- int edgep;
- int bestp=0, bestdiff;
- int diffall;
- int temp;
- int cutpoint[max_y];
- int addap_buff[max_x];
- int drift;
- int best_edge=0;
- int best_val, best_edge_val;
- int best_dir=0;
+ int cutPointX[max_y];
+ int addapBuff[max_x];
  char textbuf[9][128];
  long T1=0, T2;
  long alg1=0, alg2=0, alg3=0;
  color c;
 
- show_points=1;
- const int vc_endx=wndx;
- const int vc_startx=int(double(22-scales_x+3)/double(768-scales_x+scalee_x)*wndx);
+ int tempScore[2]={0 ,0};
 
- const int vc_diff=vc_endx - vc_startx;
+ for(int i=0; i<360; i++){
+   double d=i;
 
- int *dir=new int[vc_diff*(wndy + 1)];
+//   asm("int $3\n\t");
+   double a=atan2(cos(d/180*PI), sin(d/180*PI));
+   double a2=atan3(cos(d/180*PI), sin(d/180*PI));
+   if(a<0.0) a+=PI*2;
+   if(a2<0.0) a2+=PI*2;
+   printf("xyz %f %f\n",a,a2);
+ }
 
- const int mindist=int(double(vc_diff) / vc_dead);
+ const int lowResSize= mmx ? DWN_SMP : (DWN_SMP-1);
 
- cutpoint[0]=0;
+ printf("xx%f %f %f\n", PHASE_POINT/PI*180, PHASE_SEG/PI*180, PHASE_SMP/PI*180);
 
- char *datbuf[2];
+ double temp[1000][2];
 
- datbuf[0]= new char[vc_diff];
- datbuf[1]= new char[vc_diff];
+ int vScore=0;
+
+// showPoints=true;
+
+ const double freqPix2= double(wndx) / double(x_field) * FREQ_PIX;
+ printf("xy%d %d\n", wndx, x_field);
+
+                           //  18
+ const int vcStartX=int(double(16 -scales_x+3)/x_field*wndx + .5);
+
+ static int cutP2X[256];
+ static int x2CutP[max_x];
+// static byte atanLut[ 1<< (ATAN_LUT_SIZE<<1) ]
+
+ static int wndxBak= -1;
+ if(wndx != wndxBak){
+   for(int x=0; x<max_x; x++){       
+
+     const int cutP=int( double(x) / ( 3.0 * freqPix2) * FREQ_SMP
+                             - (NO_POINT_LAND1 + NO_POINT_LAND3/2)/3.0 + .5);
+     x2CutP[x]= cutP;
+   }
+   for(int cutP=0; cutP<256; cutP++){
+     const int x=int( double(cutP*3 + NO_POINT_LAND1 + NO_POINT_LAND3/2)
+                                                 * freqPix2 / FREQ_SMP + .5);
+     cutP2X[cutP]= x;
+   }
+   wndxBak=wndx;
+ }
+ const int vcEndX= vcStartX + int( double(POINT_LAND + NO_POINT_LAND)
+                                               * freqPix2 / FREQ_SMP + .5);
+ printf("vcEndX %d\n",vcEndX);
+ if(vcEndX>wndx) error(error_code(-123));
+ const int vcDiff=vcEndX - vcStartX;
+
+ const int minDist= cutP2X[0]-1;
+
+ cutPointX[0]=0;
+
+ char *lowRes[2];
+
+ lowRes[0]= (char*)newAlign(vcDiff, 8);
+ lowRes[1]= (char*)newAlign(vcDiff, 8);
+
+ int *convVec= new int[vcDiff>>(PHASE_CMP_STEP-1)];
 
  if(iState){
    T1=uclock();
  }
 
- for(i = 0; i < vc_diff; i++) last_val[i]=1;
+ static int edgeLut[512];
+ static bool first= true;
+ static int score[40];
 
- llinep=-(vgax<<2);
- linep=-(vgax<<1);
- edgep=0;
- for(line=0; line<wndy; line++){
+ static byte phasePerCutP[256];
+
+ if(first){
+   some=10;
+
+   edgeLut[0]=0;
+   for(int i=1; i<512; i++){
+     const int j= min(i, 70);
+     edgeLut[i]=int( log(j*10)*100.0 );
+   }
+   first=false;
+
+   for(int cutP=0; cutP<256; cutP++){
+     const double phase= PHASE_SMP*(NO_POINT_LAND1 + NO_POINT_LAND3 + 1) + PHASE_POINT*cutP;
+     phasePerCutP[cutP]= (byte)( phase * 128 / PI );
+     printf("ppcp %d %d\n", cutP, phasePerCutP[cutP]*360/256);
+   }
+
+   for(int i=0; i<40; i++) score[i]=0;
+
+ }
+
+ const float sinSegPhase= sin(PHASE_SEG);
+ const float cosSegPhase= cos(PHASE_SEG);
+
+ const float sinSegPhase2= sin(PHASE_SEG*2);
+ const float cosSegPhase2= cos(PHASE_SEG*2);
+
+ bool *black= new bool[wndy];
+ int *phaseDiff= new int[(vcDiff*wndy)>>PHASE_CMP_STEP];
+ int *chromAmp= new int[wndy];
+
+ int llinep=-(vgax<<2);
+ int linep=-(vgax<<1);
+ for(int line=0; line<wndy; line++){
    llinep+=vgax<<1;
    linep+=vgax<<1;
-   bestdiff=0x1FFFFFFF;
-   diffall=0;
-   start_step=0;
 
-   char *dtemp = datbuf[0];
-   datbuf[0] = datbuf[1];
-   datbuf[1] = dtemp;
+   char *dTemp = lowRes[0];
+   lowRes[0] = lowRes[1];
+   lowRes[1] = dTemp;
 
-   asm(//"int $3 \n\t"
-       "pushl %%ebp          \n\t"
+   if(mmx){
+     asm(//"int $3 \n\t"
+         "pushl %%ebp          \n\t"
 
-       "1:                   \n\t"
-       "movw (%%edi), %%ax   \n\t"   // U        load 32 byte in L1-cache
-       "xorl %%ebx, %%ebx    \n\t"   //  V 1
-       "movl %1, %%ebp       \n\t"   // U
-       "xorl %%eax, %%eax    \n\t"   //  V 1
-       "2:                   \n\t"
-       "addl %%eax, %%ebx    \n\t"   // U
-       "xorl %%eax, %%eax    \n\t"   //  V 1
-       "movb (%%esi), %%al   \n\t"   // U
-       "addl $2, %%esi       \n\t"   //  V 1
-       "decl %%ebp           \n\t"   // U
-       " jnz 2b              \n\t"   //  V 1
-       "addl $2, %%edi       \n\t"   // U
-       "addl %%eax, %%ebx    \n\t"   //  V 1
-       "shrl %0, %%ebx       \n\t"   // U 
-       "decl %%edx           \n\t"   //  V 1
-       "movw %%bx, -2(%%edi) \n\t"   // U
-       " jnz 1b              \n\t"   //  V 1
+         "1:                   \n\t"
+         "movb (%%edi), %%al   \n\t"   // U        load 32 byte in L1-cache
+         "xorl %%ebx, %%ebx    \n\t"   //  V 1
+         "movl %1, %%ebp       \n\t"   // U
+         "xorl %%eax, %%eax    \n\t"   //  V 1
+         "2:                   \n\t"
+         "addl %%eax, %%ebx    \n\t"   // U
+         "xorl %%eax, %%eax    \n\t"   //  V 1
+         "movb (%%esi), %%al   \n\t"   // U
+         "addl $2, %%esi       \n\t"   //  V 1
+         "decl %%ebp           \n\t"   // U
+         " jnz 2b              \n\t"   //  V 1
+         "incl %%edi           \n\t"   // U
+         "addl %%eax, %%ebx    \n\t"   //  V 1
+         "shrl %0, %%ebx       \n\t"   // U         
+         "decl %%edx           \n\t"   //  V 1
+         "movb %%bl, -1(%%edi) \n\t"   // U
+         " jnz 1b              \n\t"   //  V 1
   
-       "popl %%ebp            \n\t"
-        : 
-        : "I" (check), "i" (1<<check),
-          "S" (actVid2MemBufp->b + linep + 1 + (vc_startx<<1)) ,
-          "d" (vc_diff>>check) ,
-          "D" (datbuf[1]) 
-        : "%eax", "%ebx", "%edx", "%esi", "%edi");
+         "popl %%ebp            \n\t"
+          : 
+          : "I" (DWN_SMP), "i" (1<<DWN_SMP),
+            "S" (actVid2MemBufp->b + linep + 1 + (vcStartX<<1)) ,
+            "d" (vcDiff>>DWN_SMP) ,
+            "D" (lowRes[1]) 
+          : "%eax", "%ebx", "%edx", "%esi", "%edi");
+
+   }
+   else{
+     asm(//"int $3 \n\t"
+         "pushl %%ebp          \n\t"
+
+         "1:                   \n\t"
+         "movw (%%edi), %%ax   \n\t"   // U        load 32 byte in L1-cache
+         "xorl %%ebx, %%ebx    \n\t"   //  V 1
+         "movl %1, %%ebp       \n\t"   // U
+         "xorl %%eax, %%eax    \n\t"   //  V 1
+         "2:                   \n\t"
+         "addl %%eax, %%ebx    \n\t"   // U
+         "xorl %%eax, %%eax    \n\t"   //  V 1
+         "movb (%%esi), %%al   \n\t"   // U
+         "addl $2, %%esi       \n\t"   //  V 1
+         "decl %%ebp           \n\t"   // U
+         " jnz 2b              \n\t"   //  V 1
+         "addl $2, %%edi       \n\t"   // U
+         "addl %%eax, %%ebx    \n\t"   //  V 1
+         "shrl %0, %%ebx       \n\t"   // U         
+         "decl %%edx           \n\t"   //  V 1
+         "movw %%bx, -2(%%edi) \n\t"   // U
+         " jnz 1b              \n\t"   //  V 1
+  
+         "popl %%ebp            \n\t"
+          : 
+          : "I" (DWN_SMP), "i" (1<<DWN_SMP),
+            "S" (actVid2MemBufp->b + linep + 1 + (vcStartX<<1)) ,
+            "d" (vcDiff>>DWN_SMP) ,
+            "D" (lowRes[1]) 
+          : "%eax", "%ebx", "%edx", "%esi", "%edi");
+   }
+
 
+   black[line]=false; //fix this
 
    if(iState){
      T2=uclock();
@@ -125,138 +272,274 @@ void vc_decrypt(void){
 
    if(line==0) continue;
 
-   for(add_step=addap_start; add_step>addap_end; add_step>>=1){
-     start_step>>=1;
-     if(add_step==(addap_start>>1) && start_step==0){
-       add_step=addap_start;
-       start_step=addap_start>>1;
-     }
-     for(cutp=start_step; cutp<vc_diff; cutp+=add_step){
-       if(cutp==start_step || start_step==0 || cutp+add_step>=vc_diff
-          || bestdiff+(bestdiff>>1) > addap_buff[cutp - (add_step>>1)]
-          || bestdiff+(bestdiff>>1) > addap_buff[cutp + (add_step>>1)]){
+   int bestDiff=0x1FFFFFFF;
+   int avgDiff=0;             // for detecting bad lines
+   int avgDiffNum=0;          // for detecting bad lines
+   int bestCutPX=0;
 
+   bool firstPass=true;
+   for(int step=STEP_START; step>STEP_END; step>>=1){
 
-         diff = vc_corr(
-               int(datbuf[1] + (((                  + cutp) << (1-check))&~1)),
-               int(datbuf[0] + ((( - cutp + vc_diff + cutp) << (1-check))&~1)),
-                  -(( cutp << (1-check) ) & ~1),
-               int(datbuf[1] + (((   cutp + vc_diff - cutp) << (1-check))&~1)),
-               int(datbuf[0] + (((        + vc_diff - cutp) << (1-check))&~1)),
-                  -(( (vc_diff-cutp) << (1-check) ) & ~1) ) ;
+     const int cutPStart= firstPass ? 0 : step>>1;
 
-         if(bestdiff>diff){
-           bestdiff=diff;
-           bestp=cutp;
+     for(int cutP=cutPStart; cutP<vcDiff; cutP+=step){
+       if(firstPass || cutP+step>=vcDiff || cutP==step>>1 
+          || bestDiff+(bestDiff>>1) > addapBuff[ cutP - (step>>1) ]
+          || bestDiff+(bestDiff>>1) > addapBuff[ cutP + (step>>1) ]){
+
+         int diff;
+         if(mmx){
+           diff = vc_corr_mmx(
+                  int(lowRes[1]                                ),
+                  int(lowRes[0] + (( vcDiff - cutP) >> DWN_SMP)),
+                      cutP >> DWN_SMP ,
+                  int(lowRes[1] + (( cutP         ) >> DWN_SMP)),
+                  int(lowRes[0]                                ),
+                      (vcDiff-cutP) >> DWN_SMP                  );
+           asm("emms\n\t");
          }
-         addap_buff[cutp]=diff;
-         if(start_step==0) diffall+=diff;
+         else{
+           diff = vc_corr(
+                  int(lowRes[1] + (((        + cutP) >> (DWN_SMP-1))&~1)),
+                  int(lowRes[0] + ((( vcDiff       ) >> (DWN_SMP-1))&~1)),
+                     -(( cutP >> (DWN_SMP-1) ) & ~1),
+                  int(lowRes[1] + ((( vcDiff       ) >> (DWN_SMP-1))&~1)),
+                  int(lowRes[0] + ((( vcDiff - cutP) >> (DWN_SMP-1))&~1)),
+                     -(( (vcDiff-cutP) >> (DWN_SMP-1) ) & ~1)             );
+
+         }
+
+         if(bestDiff>diff){
+           bestDiff=diff;
+           bestCutPX=cutP;
+         }
+
+         avgDiff+=diff;
+         avgDiffNum++;
+
+         addapBuff[cutP]=diff;
        }
-       else addap_buff[cutp]=0x7FFFFFFF;
+       else addapBuff[cutP]=0x1FFFFFFF;
      }
+     if(firstPass) step<<=1;
+     firstPass=false;
    }
-   bestp+= cutpoint[line-1];
-   if(bestp>=vc_diff) bestp-=vc_diff;
-   cutpoint[line]=bestp;
-   if(bestdiff==0) bestdiff++;
-   temp=diffall * addap_start * 32 / (vc_diff * bestdiff);
-        if(temp<42) drift=wndx;
-   else if(temp<56) drift=addap_end;
-   else             drift=(addap_end>>1) | 1;
 
+   if( int( avgDiff*BAD_THRESHOLD1/double(avgDiffNum) ) - BAD_THRESHOLD2
+                                                                 < bestDiff )
+       black[line]=true;
+
+   if(line>2){
+
+     int sumI=0;
+     for(int x=-4; x<0; x+=2){
+       const int nx= x + (vcDiff & ~1);
+       const int ul= char(actVid2MemBufp->b[ linep + (((vcStartX & ~1) +  x)<<1) + 0]);
+       const int vl= char(actVid2MemBufp->b[ linep + (((vcStartX & ~1) +  x)<<1) + 2]);
+       const int un= char(actVid2MemBufp->b[ linep + (((vcStartX & ~1) + nx)<<1) + 0]);
+       const int vn= char(actVid2MemBufp->b[ linep + (((vcStartX & ~1) + nx)<<1) + 2]);
+       sumI+= ul*vn - vl*un;
+     }
+//     actVid2MemBufp->b[linep + (((vcStartX & ~1) - 4)<<1) + 1]=255;
+  
+     if(line>10 && line<250){
+       if(line & 1){
+         vScore+= sumI;
+       }else{
+         vScore-= sumI;
+       }
+     }
+              
+   }
+
+   bestCutPX+= cutPointX[line-1];
+   if(bestCutPX>=vcDiff) bestCutPX-=vcDiff;
+   cutPointX[line]=bestCutPX;
 
    if(iState){
      T2=uclock();
      alg1+=T2-T1;
      T1=T2;
    }
+ }
 
-   edgep +=vc_diff;
+ delete [] convVec;
+ deleteAlign(lowRes[0]);
+ deleteAlign(lowRes[1]);
 
-   best_edge_val=0;
+ int *dir=new int[257*(wndy + 1)];
 
-   for(i = 0; i < vc_diff; i++){
-     l= i + cutpoint[line];
-     if(l >= vc_diff) l-=vc_diff;
-     if(l < mindist || l >= vc_diff - mindist){
-       new_val[i]=0;
-       dir[edgep + i]=best_edge;    //FIX ME (not nes)
+ const int driftPenalty[MAX_DRIFT*2+1]={10000, 1000, 0, 1000, 10000};
+
+ int *newVal=new int[257], *lastVal=new int[257];
+ memset(lastVal, 0, 257<<2);
+
+ llinep=-(vgax<<2);
+ linep=-(vgax<<1);
+ for(int line=0; line<wndy; line++){
+   llinep+=vgax<<1;
+   linep+=vgax<<1;
+
+   memset(newVal, 0, 257<<2);
+   bool noLeft=true;
+
+   dir[line*257 + 256]=-1;
+
+   if(line==0) continue;
+
+   int relCutPX= cutPointX[line] - cutPointX[line-1];
+   if(relCutPX<0) relCutPX+=vcDiff;
+
+   chromAmp[line]=0;
+   for(int x=0; x<vcDiff; x+= 1<<PHASE_CMP_STEP){
+     int nx= x + (relCutPX & ~1);
+     if(nx>=vcDiff) nx-= (vcDiff & ~1);
+     const int ul= char(actVid2MemBufp->b[llinep + (((vcStartX & ~1) +  x)<<1) + 0]);
+     const int vl= char(actVid2MemBufp->b[llinep + (((vcStartX & ~1) +  x)<<1) + 2]);
+     const int un= char(actVid2MemBufp->b[ linep + (((vcStartX & ~1) + nx)<<1) + 0]);
+     const int vn= char(actVid2MemBufp->b[ linep + (((vcStartX & ~1) + nx)<<1) + 2]);
+     const int p= x>>(PHASE_CMP_STEP-1);
+     convVec[p  ]= ul*un + vl*vn;
+     convVec[p+1]= ul*vn - vl*un;
+     chromAmp[line]+= un*un + vn*vn;
+   }
+
+//   actVid2MemBufp->b[linep + ((vcStartX +  0)<<1) + 1]=255;
+//   actVid2MemBufp->b[linep + ((vcStartX +  relCutPX)<<1) + 1]=200;
+
+   int sumR0=0, sumI0=0;
+   int sumR1=0, sumI1=0;
+   for(int x=0; x<vcDiff; x+= 1<<PHASE_CMP_STEP){
+     const int p= x>>(PHASE_CMP_STEP-1);
+     if(x+relCutPX<vcDiff){
+       sumR0+= convVec[p  ];
+       sumI0+= convVec[p+1];
      }
-     else if(drift==wndx){
-       new_val[i]=1;
-       dir[edgep + i]=best_edge;
+     else{
+       sumR1+= convVec[p  ];
+       sumI1+= convVec[p+1];
+     }
+   }
+
+   const float s= ( (line & 1) ^ ( vScore>0 ) ) ? 1 : -1;
+
+   int sumR0C= int( sumR0*cosSegPhase   + sumI0*sinSegPhase*s);
+   int sumI0C= int(-sumR0*sinSegPhase*s + sumI0*cosSegPhase);
+   int sumR1C= int( sumR1*cosSegPhase   + sumI1*sinSegPhase*s);
+   int sumI1C= int(-sumR1*sinSegPhase*s + sumI1*cosSegPhase);
+
+
+   int ffR0=0    , ffI0=0;
+   int ssR0=sumR0, ssI0=sumI0;
+   int ffR1=0    , ffI1=0;
+   int ssR1=sumR1, ssI1=sumI1;
+   const int phaseP= line*vcDiff>>PHASE_CMP_STEP;
+   for(int x=0; x<vcDiff; x+= 1<<PHASE_CMP_STEP){
+     const int p= x>>(PHASE_CMP_STEP-1);
+     const int q= x>> PHASE_CMP_STEP;
+     if(x+relCutPX<vcDiff){
+       ssR0-=convVec[p  ];
+       ssI0-=convVec[p+1];
+
+       const int r= ffR0 + sumR1C;
+       const int i= ffI0 + sumI1C;
+
+       phaseDiff[phaseP + q  ]= int( atan3(i, r)/PI*128 ) & 0xFF;
+/*
+       phaseDiff[phaseP + q  ]= ffR0 ;//+ sumR1C;
+       phaseDiff[phaseP + q+1]= ffI0 ;//+ sumI1C;
+       phaseDiff[phaseP + q+2]= ssR0;
+       phaseDiff[phaseP + q+3]= ssI0;
+  */
+       ffR0+=convVec[p  ];
+       ffI0+=convVec[p+1];
      }
      else{
+       ssR1-=convVec[p  ];
+       ssI1-=convVec[p+1];
 
-       diff=0;
-       int edge_x= l == 0 ? (l - 2) + vc_diff : (l - 2);
-       do{
-         if((edge_x++) >= vc_diff) edge_x-= vc_diff;
-         diff+= actVid2MemBufp->b[linep + ((vc_startx + edge_x)<<1) + 1];
-       }while( edge_x != l );
-       do{
-         if((edge_x++) >= vc_diff) edge_x-= vc_diff;
-         diff-= actVid2MemBufp->b[linep + ((vc_startx + edge_x)<<1) + 1];
-       }while( edge_x != l + 2);
+       const int r= ffR1 + sumR0C;
+       const int i= ffI1 + sumI0C;
 
-       diff= edge_lut[ mabs(diff) ];
+       phaseDiff[phaseP + q  ]= int( atan3(i, r)/PI*128 ) & 0xFF;
+/*
+       phaseDiff[phaseP + q  ]= ffR1 ;//+ sumR0C;
+       phaseDiff[phaseP + q+1]= ffI1 ;//+ sumI0C;
+       phaseDiff[phaseP + q+2]= ssR1;
+       phaseDiff[phaseP + q+3]= ssI1;
+  */
+       ffR1+=convVec[p  ];
+       ffI1+=convVec[p+1];
+     }
+   }     
+            
+   for(int lastCutP = 0; lastCutP < 256; lastCutP++){
+     if(black[line] || lastVal[lastCutP]==0) continue;
 
-       new_val[i]= diff;
+     int newXTemp= cutPointX[line] - cutPointX[line-1] + cutP2X[lastCutP];
+     if(newXTemp <  0     ) newXTemp+= vcDiff;
+     if(newXTemp >= vcDiff) newXTemp-= vcDiff;
+     if(newXTemp >= vcDiff) newXTemp-= vcDiff;
 
-       best_val=0;
+     const int newCutPStart= max(x2CutP[ newXTemp ] - MAX_DRIFT, 0);
+     const int newCutPEnd=   min(x2CutP[ newXTemp ] + MAX_DRIFT, 256);
 
-       for(j = i - (drift<<2); j <  i + (drift<<2); j++){
-         k=j;
-         int drift_p;
-         const int drift_t=mabs(j - i);
-              if(drift_t <=     1   ) drift_p= 0;
-         else if(drift_t <= drift   ) drift_p= 2;
-         else if(drift_t <= drift<<1) drift_p=10;
-         else if(drift_t <= drift*3 ) drift_p=20;
-         else                         drift_p=40;
+     int drift= newCutPStart - (x2CutP[ newXTemp ] - MAX_DRIFT);
 
-              if(k <  0      ) k+= vc_diff;
-         else if(k >= vc_diff) k-= vc_diff;
+     const int p=  (line*vcDiff>>PHASE_CMP_STEP)
+                  +(cutP2X[ lastCutP ]>>PHASE_CMP_STEP);
 
-         if(last_val[k] - drift_p > best_val){
-           best_val=last_val[k] - drift_p;
-           best_dir=k;
-           if(best_edge_val < best_val){
-             best_edge_val=best_val;
-             best_edge=best_dir;
-           }
-         }
+     int a= phaseDiff[p  ];
+     if( (line & 1) ^ ( vScore>0 ) ) a= -a;
 
-       }
+     const int phase1= a + phasePerCutP[ lastCutP ];
+   
+     for(int newCutP= newCutPStart; newCutP <= newCutPEnd; newCutP++){
+       int val= lastVal[lastCutP] - driftPenalty[drift];
+       int phase= phase1 + phasePerCutP[ newCutP ];
+       phase&= 0xFF;
+       phase= mabs( phase - 128 );
+       if(chromAmp[line]>3000 && phase>=PHASE_DIFF_TH) val-=100000;
+       if(chromAmp[line]<=3000) 
+         actVid2MemBufp->b[linep + ((vcStartX -  1)<<1) + 1]=255;
+     
+       if(newVal[newCutP] < val){
+         newVal[newCutP]= val;
+         dir[line*257 + newCutP]= lastCutP;
 
-       if(best_val!=0){
-         new_val[i]+= best_val;
-         dir[edgep + i]= best_dir;
-       }else{
-         new_val[i]=0;
-         dir[edgep + i]= 0;    //FIX ME (Not Nes)
-         for(j=k+1; i<vc_diff ; j++){
-           if(j >= vc_diff) j-=vc_diff;
-           if(last_val[j]==0){
-             i++;
-             l= i + cutpoint[line];
-             if(l >= vc_diff) l-=vc_diff;
-             new_val[i]=0;
-             dir[edgep + i]= 0;    //FIX ME (Not Nes)
-           }
-           else break;
-         }              
-         if(i >=  vc_diff && best_edge_val==0){
-           i=0;
-           drift=wndx;
-         }
+         noLeft=false;
        }
+       drift++;
      }
+
    }
 
-   int *temp_val=last_val;
-   last_val=new_val;
-   new_val=temp_val;
+   for(int newCutP = 0; newCutP < 256; newCutP++){
+     if(black[line]) continue;
+     if(!noLeft && newVal[newCutP]==0) continue;
+
+     const int newX= cutP2X[ newCutP ];
+
+     const int diff=edgeLut[ mabs(
+      +actVid2MemBufp->b[linep + ((vcStartX + newX - 2)<<1) + 1]
+      +actVid2MemBufp->b[linep + ((vcStartX + newX - 1)<<1) + 1]
+      -actVid2MemBufp->b[linep + ((vcStartX + newX    )<<1) + 1]
+      -actVid2MemBufp->b[linep + ((vcStartX + newX + 1)<<1) + 1] ) ];
+
+     if(newVal[newCutP]==0) newVal[newCutP] =  diff + 1000000000;
+     else                   newVal[newCutP] += diff;
+
+     if(newVal[256] < newVal[newCutP]){
+       newVal[256]= newVal[newCutP];
+       dir[line*257 + 256]= newCutP;
+     }
+
+   }
+   
+
+   int *tempVal=lastVal;
+   lastVal=newVal;
+   newVal=tempVal;
 
    if(iState){
      T2=uclock();
@@ -266,6 +549,10 @@ void vc_decrypt(void){
 
  }
 
+ delete [] newVal;
+ delete [] lastVal;
+
+
  if(iState){
    T2=uclock();
    sprintf(textbuf[0],"%f Corr", (float)(alg1)/UCLOCKS_PER_SEC);
@@ -274,56 +561,187 @@ void vc_decrypt(void){
    T1=T2;
  }
 
- i=best_edge;
- for(line=wndy-1; line>0; line--){
-   if(show_points==0){
-     cutpoint[line]+=i;
-     if(cutpoint[line] >= vc_diff) cutpoint[line]-= vc_diff;
-   }
-   else{
-     j=cutpoint[line]+i;
-     if(j >= vc_diff) j-= vc_diff;
-     actVid2MemBufp->b[(line * vgax<<1) + ((vc_startx + j)<<1)+1]=255;
-     actVid2MemBufp->b[(line * vgax<<1) + ((vc_startx + j)<<1)+3]=0;
+ 
+ if(showPoints){
+   int bestCutP=-1;
+   for(int line=wndy-1; line>0; line--){
+     if(dir[line*257 + 256]==-1){
+       bestCutP=-1;
+       continue;
+     }
+     if(bestCutP==-1) bestCutP= dir[line*257 + 256];
+     if(bestCutP<0 || bestCutP>=256){
+       printf("one%d\n", bestCutP);
+       bestCutP=-1;
+       continue;
+     }
+
+     const int bestX= cutP2X[ bestCutP ];
+     actVid2MemBufp->b[(line * vgax<<1) + ((vcStartX + bestX)<<1)+1]=255;
+     actVid2MemBufp->b[(line * vgax<<1) + ((vcStartX + bestX)<<1)+3]=0;
+     actVid2MemBufp->b[(line * vgax<<1) + ((vcStartX + bestX)<<1)+5]=255;
+
+     bestCutP= dir[line*257 + bestCutP];
    }
-   i=dir[vc_diff*line + i];
  }
 
+ 
  if(iState){
    T2=uclock();
    sprintf(textbuf[3],"%f Misc", (float)(T2-T1)/UCLOCKS_PER_SEC);
    T1=T2;
  }
 
- linep=-(vgax<<1);
-// vc_diff&= ~1;
- for(line=0; line<wndy; line++){
+ int cutpp[1000];
+ byte copy_buff[max_x<<1];
 
-//   cutpoint[line]&= ~1;
+ int bestCutP=-1;
+ linep=(wndy-1) * (vgax<<1);
+ for(int line=wndy-2; line>0; line--){
+   linep-= vgax<<1;
 
-   linep+=vgax<<1;
-   if(show_points!=0){
-     actVid2MemBufp->b[linep+(vc_startx<<1)+1]=255;
-     actVid2MemBufp->b[linep+(vc_startx<<1)+3]=0;
+   cutpp[line]=0;
+
+   if(black[line]){
+     actVid2MemBufp->b[linep + ((vcStartX - 4)<<1) + 1]=255;
+     actVid2MemBufp->b[linep + ((vcStartX - 4)<<1) + 3]=255;
    }
-   memcpy(copy_buff, actVid2MemBufp->b + linep + (vc_startx<<1), vc_diff<<1);
-   memcpy(actVid2MemBufp->b + linep + (vc_startx<<1), copy_buff + (cutpoint[line]<<1),
-                                                 (vc_diff-cutpoint[line])<<1);
-   memcpy(actVid2MemBufp->b + linep + (vc_startx<<1) + ((vc_diff-cutpoint[line])<<1),
-                                                                   copy_buff,
-                                                         cutpoint[line]<<1);
+
+   if(showPoints){
+     actVid2MemBufp->b[linep+(vcStartX<<1)+1]=255;
+     actVid2MemBufp->b[linep+(vcStartX<<1)+3]=0;
+   }
+   if(dir[line*257 + 256]==-1){
+     bestCutP=-1;
+     continue;
+   }
+   if(bestCutP==-1) bestCutP=dir[line*257 + 256];
+
+   if(bestCutP<0 || bestCutP>=256){
+     printf("two%d\n", bestCutP);
+     bestCutP=-1;
+     continue;
+   }
+
+   const int bestX= cutP2X[ bestCutP ];
+
+   int cutX= (showPoints ? cutPointX[line] : bestX );
+
 /*
-   if(show_points!=0){
-     for(i=0; i<vc_diff; i++){
-       j=dir[line * vc_diff + i];
-       if(j<0 || j>=vc_diff) j=0;
+   const int cutP= min(x2CutP[cutX], 0);          // fix ? ness, max?
 
-       actVid2MemBufp->b[linep + ((vc_startx + j) <<1)+1]=255;
+   const float s1= sin( -float(phasePerCutP[ cutP ] + some)/128*PI + PHASE_SEG );
+   const float c1= cos( -float(phasePerCutP[ cutP ] + some)/128*PI + PHASE_SEG );
+   const float s2= sin( -float(phasePerCutP[ cutP ] + some)/128*PI             );
+   const float c2= cos( -float(phasePerCutP[ cutP ] + some)/128*PI             );
+
+   for(int x=0; x<vcDiff; x+=2){
+     const char u=char(actVid2MemBufp->b[linep+((vcStartX + x)<<1)  ]);
+     const char v=char(actVid2MemBufp->b[linep+((vcStartX + x)<<1)+2]);
+     char u1, v1;
+
+     float s, c;
+     if(x<cutP){
+       s= s1;
+       c= c1;
      }
-   }   
-     */
+     else{
+       s= s2;
+       c= c2;
+     }
+
+
+     if( (line & 1) ^ ( vScore>0 ) ){
+       v1=(char)( -u*c - v*s );
+       u1=(char)( -v*c + u*s );
+     }
+     else{
+       v1=(char)( -u*c + v*s );
+       u1=(char)( -v*c - u*s );
+     }
+
+     printf("uv %d %d  %d %d \n", u,v,u1,v1);
+     actVid2MemBufp->b[linep+((vcStartX + x)<<1)  ]= u1;
+     actVid2MemBufp->b[linep+((vcStartX + x)<<1)+2]= v1;
+
+   }
+
+   cutX&= ~1;
+   */
+   memcpy(copy_buff, actVid2MemBufp->b + linep + (vcStartX<<1), vcDiff<<1);
+
+   memcpy(actVid2MemBufp->b + linep + (vcStartX<<1), copy_buff + (cutX<<1),
+                                                 (vcDiff-cutX)<<1);
+   memcpy(actVid2MemBufp->b + linep + (vcStartX<<1) + ((vcDiff-cutX)<<1),
+                                                                   copy_buff,
+                                                         cutX<<1);
+
+   cutpp[line]=cutX;
+
+
+   bestCutP= dir[line*257 + bestCutP];
  }
 
+ for(int line=80; line<200; line++){
+     if( (line & 1) ^ ( vScore>0 ) ) printf(" e");
+     else                            printf(" o");
+
+     const int p= (cutpp[line-1]>>PHASE_CMP_STEP)
+                + (line*vcDiff>>PHASE_CMP_STEP) ;
+     int a= phaseDiff[p  ];
+
+     int relCutp= cutpp[line-1] + cutpp[line];
+     if(relCutp>=vcDiff) relCutp-=vcDiff;
+
+     printf("c%03d %03d %03d %03d\n", relCutp, a*180/128, x2CutP[cutpp[line-1]], x2CutP[cutpp[line]]);
+        //SIGSEGV
+
+     const int cut1= x2CutP[ cutpp[line-1] ];
+     const int cut2= x2CutP[ cutpp[line  ] ];
+
+     if(cut1<30 || cut1>220) continue;
+     if(cut2<30 || cut2>220) continue;
+
+     for(int offset=-20; offset<20; offset++){
+         int diff;
+         if( (line & 1) ^ ( vScore>0 ) ){
+           diff=  phasePerCutP[ cut1 + offset ]
+                + phasePerCutP[ cut2 + offset ]
+                - a;
+         }
+         else{
+           diff=  phasePerCutP[ cut1 + offset ]
+                + phasePerCutP[ cut2 + offset ]
+                + a;
+         }
+
+         diff&= 0xFF;
+
+         diff=mabs( diff-128 );
+         if(diff<PHASE_DIFF_TH && chromAmp[line]>3000) diff=1;
+         else if(chromAmp[line]>3000) diff=100000;
+         else diff=0;
+         score[offset+20]+=diff;
+     }
+
+//     int cutP2=cutpp[line] - cutpp[line-2];
+//     if(cutP2<0) cutP2+= vcDiff;
+ }
+
+ for(int i=0; i<40; i++)
+   printf("score %d   %d\n", score[i], i-20);
+
+ double avgCutP=0;
+ for(int line=50; line<200; line++){
+   avgCutP+=double(cutpp[line]);
+ }
+ avgCutP/=(200.0-50.0);
+ printf("avg%f\n", avgCutP-double(vcDiff)/2.0);
+  
+  
+ printf("vScore %d\n", vScore);
+
+
  if(iState){
    T2=uclock();
    sprintf(textbuf[4],"%f Rotate", (float)(T2-T1)/UCLOCKS_PER_SEC);
@@ -338,7 +756,9 @@ void vc_decrypt(void){
 
 
   delete [] dir;
-  delete [] datbuf[0];
-  delete [] datbuf[1];
+  delete [] black;
+  delete [] phaseDiff;
+  delete [] chromAmp;
 }
 
+

Modified: trunk/2010/2_vc_a.asm
==============================================================================
--- trunk/2010/2_vc_a.asm	(original)
+++ trunk/2010/2_vc_a.asm	Wed Jul  4 09:25:09 2007
@@ -4,9 +4,11 @@
 
 segment .data
 
+extern _mmx
 extern esp_save
 extern _asm_temp
 global _vc_corr__Fiiiiii
+global _vc_corr_mmx__Fiiiiii
 
 %define line1a       par1
 %define line1b       par2
@@ -15,6 +17,13 @@ global _vc_corr__Fiiiiii
 %define line2b       par5
 %define count2       par6
 
+%define mline1a       par_1
+%define mline1b       par0 
+%define mcount1       par1
+%define mline2a       par2
+%define mline2b       par3
+%define mcount2       par4
+
 _vc_corr__Fiiiiii:
  push ebx
 
@@ -139,5 +148,157 @@ skip2:
 ret
 
 
+_vc_corr_mmx__Fiiiiii:
+; int3
+ push ebx
+ push esi
+
+ push edi
+ push ecx
+
+ mov esi, [mline1a]
+ mov edi, [mline1b]
+
+ mov ebx, edi
+ mov ecx, [mcount1]
+
+ and ecx, -8
+ and ebx, 7
+
+ shl ebx, 3
+ and edi, -8
+
+ add esi, ecx
+
+ movd mm6, ebx         
+ pxor mm7, mm7
+
+ xor ebx, -1
+ add edi, ecx
+
+ add ebx, 65           
+ xor ecx, -1
+
+ inc ecx
+
+ movd mm5, ebx        
+ pxor mm4,mm4
+
+ cmp ecx, 8
+  jb skipMMX1
+
+ movq mm3, [ecx + edi    ]
+
+ movq mm1, [ecx + edi + 8]
+ psrlq mm3, mm6
+
+ movq mm0, [ecx + esi    ]
+ psllq mm1, mm5
+ 
+ por mm1, mm3
+
+MMXLoop:
+ movq mm2, mm0               ; U
+ psubusb mm0, mm1            ;  V 1
+ movq mm3, [ecx + edi + 8 ]  ; U
+ psubusb mm1, mm2            ;  V 1
+ por mm1, mm0                ; U
+ psrlq mm3, mm6              ;  V 1
+ movq mm2, mm1               ; U
+ punpcklbw mm1, mm4          ;  V 1
+ punpckhbw mm2, mm4          ; U
+ paddusw mm7, mm1            ;  V 1
+ movq mm1, [ecx + edi + 16]  ; U
+ paddusw mm7, mm2            ;  V 1
+ movq mm0, [ecx + esi + 8 ]  ; U
+ psllq mm1, mm5              ;  V 1
+ por mm1, mm3                ; U
+ add ecx, 8                  ;  V 1
+  jnc MMXLoop                ; U  1
+
+skipMMX1:
+
+ mov esi, [mline2b]         
+ mov edi, [mline2a]
+
+ mov ebx, edi
+ mov ecx, [mcount2]
+
+ and ecx, -8
+ and ebx, 7
+
+ shl ebx, 3
+ and edi, -8
+
+ add esi, ecx
+
+ movd mm6, ebx         
+
+ xor ebx, -1
+ add edi, ecx
+
+ add ebx, 65           
+ xor ecx, -1
+
+ inc ecx
+
+ movd mm5, ebx        
+
+ cmp ecx, 8
+  jb skipMMX2
+
+ movq mm3, [ecx + edi    ]
+
+ movq mm1, [ecx + edi + 8]
+ psrlq mm3, mm6
+
+ movq mm0, [ecx + esi    ]
+ psllq mm1, mm5
+ 
+ por mm1, mm3
+
+MMXLoop2:
+ movq mm2, mm0               ; U
+ psubusb mm0, mm1            ;  V 1
+ movq mm3, [ecx + edi + 8 ]  ; U
+ psubusb mm1, mm2            ;  V 1
+ por mm1, mm0                ; U
+ psrlq mm3, mm6              ;  V 1
+ movq mm2, mm1               ; U
+ punpcklbw mm1, mm4          ;  V 1
+ punpckhbw mm2, mm4          ; U
+ paddusw mm7, mm1            ;  V 1
+ movq mm1, [ecx + edi + 16]  ; U
+ paddusw mm7, mm2            ;  V 1
+ movq mm0, [ecx + esi + 8 ]  ; U
+ psllq mm1, mm5              ;  V 1
+ por mm1, mm3                ; U
+ add ecx, 8                  ;  V 1
+  jnc MMXLoop2               ; U  1
+
+skipMMX2:
+
+ movq mm0, mm7
+ psrlq mm7, 32
+
+ paddusw mm7, mm0
+
+ movq mm0, mm7
+ psrlq mm7, 16
+
+ paddusw mm7, mm0
+
+ movd eax, mm7
+
+ and eax, 0xFFFF
+ pop ecx
+
+ pop edi
+ pop esi
+
+ pop ebx
+
+ret
+
 
 

Modified: trunk/2010/2_vc_a.h
==============================================================================
--- trunk/2010/2_vc_a.h	(original)
+++ trunk/2010/2_vc_a.h	Wed Jul  4 09:25:09 2007
@@ -3,5 +3,6 @@
 #define n2_vc_a_h
 
 int vc_corr(int, int, int, int, int, int);
+int vc_corr_mmx(int, int, int, int, int, int);
 
 #endif

Modified: trunk/2010/2d
==============================================================================
--- trunk/2010/2d	(original)
+++ trunk/2010/2d	Wed Jul  4 09:25:09 2007
@@ -1,2 +1,10 @@
-fix asm_code in bfd stuff
-deComb overFlow
+load ?
+vsmooth
+vc color
+vc rightdrift ?
+
+vc (black like)
+vc exact start/end lines (last one not in 300x400)
+vc use amp to reject phase
+crypt decomb (math proofs and fixPoint Arith)
+double2float



More information about the Mndiff-dev mailing list