[MN-dev] [mndiff]: r54 - in trunk/2010: 2010.cc 2_71x6.cc 2_71x6.h 2_crypt.cc 2_grafix.cc 2_hw.cc 2_nag.cc 2_nag.h 2_vc.cc

michael subversion at mplayerhq.hu
Wed Jul 4 09:37:52 CEST 2007


Author: michael
Date: Wed Jul  4 09:37:52 2007
New Revision: 54

Log:
version from 1998-11-01 20:54


Modified:
   trunk/2010/2010.cc
   trunk/2010/2_71x6.cc
   trunk/2010/2_71x6.h
   trunk/2010/2_crypt.cc
   trunk/2010/2_grafix.cc
   trunk/2010/2_hw.cc
   trunk/2010/2_nag.cc
   trunk/2010/2_nag.h
   trunk/2010/2_vc.cc

Modified: trunk/2010/2010.cc
==============================================================================
--- trunk/2010/2010.cc	(original)
+++ trunk/2010/2010.cc	Wed Jul  4 09:37:52 2007
@@ -12,6 +12,7 @@
 #include <crt0.h>
 #include <ctype.h>
 #include <signal.h>
+#include <time.h>
 #include "2_all.h"
 #include "2010.h"
 #include "2_hw.h"
@@ -43,10 +44,12 @@
  extern TVSTD TVStd;
  extern bool iState;
  extern bool helpState;
+ extern bool allowDrop;
 
  int maxBuf=2;
  int some=0;
 
+
 int _crt0_startup_flags = _CRT0_FLAG_NEARPTR | _CRT0_FLAG_NONMOVE_SBRK;
  int grabf;
  int fysize;
@@ -56,8 +59,17 @@ int _crt0_startup_flags = _CRT0_FLAG_NEA
  int *yuv2RgbLut;
  int edge_lut[256];
  MENULEVEL menuLevel=mLNorm;
+ int infoPosX;
  int infoPosY;
  bool strangeRgb16= true;   //  commandLineOpt FIX
+ long uclockWaste;
+
+static inline void getUclockWaste(void){
+  long T1=uclock();
+  for(int i=0; i<1000; i++) uclock();
+  uclockWaste= (uclock()-T1)/1000;
+  printf("uclockWaste %d\n", int(uclockWaste));
+}
 
 void printg_t(char *text){
  close_hw();
@@ -314,12 +326,14 @@ int main(int argc, char **argv){
   
  init_hw();
  init_meteor();
+ getUclockWaste();
  static int delay=0;
 
  bool quit=false;
  while(!quit){
 
    infoPosY=0;
+   infoPosX=10;
 
    decrypt();
    showStuff();
@@ -337,6 +351,9 @@ int main(int argc, char **argv){
 
      switch(menuLevel){
        case mLNorm : switch(key){
+                       case 'p'  :
+                       case 'P'  : getkey();
+                       break;
                        case 'g'  :
                        case 'G'  : key=0, grabf=1;
                        break;
@@ -385,6 +402,9 @@ int main(int argc, char **argv){
                        case 'h'  :
                        case 'H'  : helpState= !helpState;
                        break;
+                       case 'd'  :
+                       case 'D'  : allowDrop= !allowDrop;
+                       break;
                        case '1'  : delay++;
                        break;
                        case '2'  : delay--; if(delay<0) delay=0;
@@ -424,7 +444,13 @@ int main(int argc, char **argv){
                        case '5'  : TVStd= PAL;
                                    cryptStd= nag;
                                    if(yuvmode==0) yuvmode=1;
-                                   //if(oneField && yuvmode==2) error(MemAlloc);
+                                   vSmoothState=false;
+                                   setStdScale();
+                                   contGrab();
+                       break;
+                       case '6'  : TVStd= TXTPAL;
+                                   cryptStd= nix;
+                                   if(yuvmode==0) yuvmode=1;
                                    vSmoothState=false;
                                    setStdScale();
                                    contGrab();

Modified: trunk/2010/2_71x6.cc
==============================================================================
--- trunk/2010/2_71x6.cc	(original)
+++ trunk/2010/2_71x6.cc	Wed Jul  4 09:37:52 2007
@@ -43,14 +43,13 @@ extern volatile int stride;
 extern volatile bool oneField;
 extern int wndx, wndy, outy;
 extern int single;
+extern int some;
 
 extern int vgax, vgay;
 
 int v_smooth=1;
 
 int bright=0x80, satur=0x40, contr=0x40;
-//int scales_x=0x03, scales_y= 0x11;           
-//int scalee_x=0x03, scalee_y= 0x11;               
 int scales_x=0x03, scales_y= 0x11;           
 int scalee_x=0x03, scalee_y= 0x11;               
 
@@ -318,24 +317,27 @@ void setStdScale(void){
  saa7116 *a_saa7116=(saa7116*)meteors[active_meteor].saa7116;
  int i;
 
-/* if(yuvmode!=0) mc=1;
- else mc=2;*/
  mc=1;
  xresc=vgax<<mc;
 
  wndx=vgax;
 
+ if(TVStd==TXTPAL)
+   scales_y=0x1 , scalee_y=0x1;
+ else 
+   scales_y=0x11, scalee_y=0x11;
+
  if(TVStd==NTSC)
     y_field=240-scales_y+scalee_y, x_field=640-scales_x+scalee_x;
  else
     y_field=288-scales_y+scalee_y, x_field=768-scales_x+scalee_x;
 
- if(vgay-70 <= y_field && cryptStd!=nag) oneField=true;
- else                                    oneField=false;
+ if(vgay-70 <= y_field && (cryptStd!=nag && TVStd!=TXTPAL)) oneField=true;
+ else                                                       oneField=false;
 
  int yrest;
- if(cryptStd!=nag || vgay-70 > y_field) yrest= vgay;
- else                                   yrest= vgay << 1;
+ if((cryptStd!=nag && TVStd!=TXTPAL) || vgay-70 > y_field) yrest= vgay;
+ else                                                      yrest= vgay << 1;
 
  if     (!oneField && yrest>y_field*2){
    outy= y_field*2;
@@ -349,6 +351,8 @@ void setStdScale(void){
    outy= yrest;
  }
 
+ if(TVStd==TXTPAL) wndx=vgax;
+
  if(wndx > vgax){
 //   outy= int( double(outy) * double(x_field) / double(vgax) ); // ?! JOKE
    wndx= vgax;
@@ -365,8 +369,8 @@ void setStdScale(void){
  if(!oneField) stride=(vgax + vgax - wndx)<<mc;
  else          stride=(vgax        - wndx)<<mc;
 
- if(cryptStd!=nag || vgay-70 > y_field) wndy= outy;
- else                                   wndy= outy >> 1;
+ if((cryptStd!=nag && TVStd!=TXTPAL) || vgay-70 > y_field) wndy= outy;
+ else                                                      wndy= outy >> 1;
 
 
  if(TVStd==SECAM) write_saa7196(0x0D, 0x85);
@@ -410,6 +414,7 @@ void setStdScale(void){
                            /* 7:0  [7:0] Pixel number per line on input    */
  write_saa7196(0x23, scales_x);/* 7:0  [7:0] Horizontal start position of scaling win*/
  i=saa7196_buf[0x24] & 0xE0;
+ i=(some<<5) & 0xE0;
  write_saa7196(0x24, i | ((x_field>>6) & 0x0C) | (wndx>>8));
                            /* 7:5  Horizontal decimation filter
                                 4  [8] Horizontal start position of scaling win
@@ -421,7 +426,9 @@ void setStdScale(void){
  write_saa7196(0x26, y_field & 0xFF);
                            /* 7:0  [7:0] Line number per input field       */
  write_saa7196(0x27, scales_y);/* 7:0  [7:0] Vertical start of scaling window  */
- i=saa7196_buf[0x28] & 0xE0;
+// i=saa7196_buf[0x28] & 0xE0;
+ if(TVStd==TXTPAL) i=0;
+ else              i=0x80;
  if(oneField) write_saa7196(0x28, i | ((y_field>>6) & 0x0C) | (outy>>8));
  else         write_saa7196(0x28, i | ((y_field>>6) & 0x0C) | (outy>>9));
                            /*   7  Adaptive filter switch
@@ -436,6 +443,7 @@ void setStdScale(void){
  printf("%d %d %d %d %d %d %d %d\n",vgax, vgay , wndx, wndy, outy,
                      y_field , x_field, stride);
 
+    // FIX (clear vga mem)
 }
 
 void init_meteor(void){

Modified: trunk/2010/2_71x6.h
==============================================================================
--- trunk/2010/2_71x6.h	(original)
+++ trunk/2010/2_71x6.h	Wed Jul  4 09:37:52 2007
@@ -11,6 +11,7 @@
 
 enum TVSTD{
  PAL,
+ TXTPAL,
  NTSC,
  SECAM,
 };

Modified: trunk/2010/2_crypt.cc
==============================================================================
--- trunk/2010/2_crypt.cc	(original)
+++ trunk/2010/2_crypt.cc	Wed Jul  4 09:37:52 2007
@@ -1,11 +1,16 @@
 //2010 0.1 Copyright (C) Michael Niedermayer 1998
 
 #include <string.h>
+#include <stdio.h>
+#include <time.h>
 #include "2_all.h"
 #include "2_crypt.h"
 #include "2_vc.h"
 #include "2_nag.h"
 #include "2_71x6.h"
+#include "2_gfunc.h"
+
+#define COMB_FIX 256
 
 extern volatile TVSTD TVStd;
 extern volatile CRYPTSTD cryptStd;
@@ -16,6 +21,9 @@ extern volatile int scales_y, scalee_y;
 extern int yuvmode;
 extern volatile bool oneField;
 extern int satur;
+extern bool iState;
+extern int infoPosX, infoPosY;
+extern int some;
 
 static void vSmooth(void);
 static void decomb(int q);
@@ -27,20 +35,40 @@ bool showPoints=false;
 
 void decrypt(void){
 
+ long T1=0, T2;
+ char textbuf[2][128];
+ if(iState){
+   T1=uclock();
+ }
+
  if( cryptStd == nag && satur!=0 ) decomb(0);
  if( cryptStd == vc  && satur!=0 ) decomb(1);
 
+ if(iState){
+   T2=uclock();
+   sprintf(textbuf[0],"%f DeComb", (float)(T2-T1)/UCLOCKS_PER_SEC);
+   T1=T2;
+ }
+
  if(cryptStd==vc)       vc_decrypt();
  else if(cryptStd==nag) nag_decrypt();
 
+ if(iState){
+   T2=uclock();
+   color c;
+   c.init(255, 0, 0, yuvmode);
+   sprintf(textbuf[1],"%f DeCrypt", (float)(T2-T1)/UCLOCKS_PER_SEC);
+   gprint(infoPosX, infoPosY+=10,  c.col, textbuf[0]);
+   gprint(infoPosX, infoPosY+=10,  c.col, textbuf[1]);
+   infoPosY+=5;
+ }
+
  if(!oneField && vSmoothState) vSmooth();
 
 }
 
 
 static void decomb(int q){
- int linep, llinep, line, i;
- int j, k;
  int temp[max_x];
  const int outy1= outy>>1;
 
@@ -48,43 +76,105 @@ static void decomb(int q){
  const int o=oneField ? 0 : 1;
  const int end=oneField ? outy : outy1;
 
- for(i=0; i<wndx; i++){
-   temp[i]=0;
- }
+ for(int i=0; i<wndx; i++) temp[i]=128<<8;
 
  if(q==1){
-   linep= stride*256;
-   if(o) linep+=vgax<<1;
-   for(line=256; line>=0; line--){
-     for(i=0; i<wndx; i++){
+  int linep= stride*256;
+  if(o) linep+=vgax<<1;
+  for(int line=256; line>=0; line--){
+   asm(//"int $3                       \n\t"
+       "pushl %%ebp                    \n\t"    //U
+       "movl %%eax, %%ebp              \n\t"    // V 1
+       "addl %%ebp, %%edi              \n\t"    //U
+       "addl %%ebp, %%esi              \n\t"    // V 1
+       "addl %%ebp, %%esi              \n\t"    //U  1
+       "negl %%ebp                     \n\t"    //UV 1
+       "1:                             \n\t"
+       "xorl %%eax, %%eax              \n\t"    //U
+       "xorl %%ebx, %%ebx              \n\t"    // V 1
+       "movb  (%%edi, %%ebp   ), %%al  \n\t"    //U
+       "movb 2(%%edi, %%ebp   ), %%bl  \n\t"    // V 1
+       "addb $128, %%al                \n\t"    //U
+       "addb $128, %%bl                \n\t"    // V 1
+       "shll $9, %%eax                 \n\t"    //U
+       "movl  (%%esi, %%ebp, 2), %%ecx \n\t"    // V 1
+       "shll $9, %%ebx                 \n\t"    //U
+       "subl %%ecx, %%eax              \n\t"    // V 1
+       "movl 4(%%esi, %%ebp, 2), %%edx \n\t"    //U
+       "cmpl %%ecx, %%eax              \n\t"    // V 1
+       "sbbl %%ecx, %%ecx              \n\t"    //U
+       "subl %%edx, %%ebx              \n\t"    // V 1
+       "andl %0, %%ecx                 \n\t"    //U
+       "cmpl %%edx, %%ebx              \n\t"    // V 1
+       "sbbl %%edx, %%edx              \n\t"    //U
+       "addl %%ecx, %%eax              \n\t"    // V 1
+       "andl %0, %%edx                 \n\t"    //U
+       "movl %%eax,  (%%esi, %%ebp, 2) \n\t"    // V 1
+       "addl %%edx, %%ebx              \n\t"    //U  1
+       "movl %%ebx, 4(%%esi, %%ebp, 2) \n\t"    //U
 
-       j = char(actVid2MemBufp->b[linep  + (i<<1)]);
-       j=(j + j) - temp[i];
-       if(j<temp[i] && (line&2)) j++;
-       if     (j> 127) j= 127;
-       else if(j<-128) j=-128;
-       temp[i] = j;
-     }
-     linep-=stride;
-   }
+       "addl $4, %%ebp                 \n\t"    // V 1
+       " jnc 1b                        \n\t"    //U  1
+       "popl %%ebp                     \n\t"    //U
+           :
+           : "i" (COMB_FIX), "a" ((wndx<<1) - 2),
+             "D" (actVid2MemBufp->b + linep + 2), "S" (int(temp) + 4)
+           : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi");
+   linep-=stride;
+  }
  }
 
- linep= 0;
+ int linep= 0;
  if(o) linep+=vgax<<1;
- for(line=0; line<end; line++){
-   for(i=0; i<wndx; i++){
+ for(int line=0; line<end; line++){
+   asm(//"int $3                       \n\t"
+       "pushl %%ebp                    \n\t"    //U
+       "movl %%eax, %%ebp              \n\t"    // V 1
+       "addl %%ebp, %%edi              \n\t"    //U
+       "addl %%ebp, %%esi              \n\t"    // V 1
+       "addl %%ebp, %%esi              \n\t"    //U  1
+       "negl %%ebp                     \n\t"    //UV 1
+       "1:                             \n\t"
+       "xorl %%eax, %%eax              \n\t"    //U
+       "xorl %%ebx, %%ebx              \n\t"    // V 1
+       "movb  (%%edi, %%ebp   ), %%al  \n\t"    //U
+       "movb 2(%%edi, %%ebp   ), %%bl  \n\t"    // V 1
+       "addb $128, %%al                \n\t"    //U
+       "addb $128, %%bl                \n\t"    // V 1
+       "shll $9, %%eax                 \n\t"    //U
+       "movl  (%%esi, %%ebp, 2), %%ecx \n\t"    // V 1
+       "shll $9, %%ebx                 \n\t"    //U
+       "subl %%ecx, %%eax              \n\t"    // V 1
+       "movl 4(%%esi, %%ebp, 2), %%edx \n\t"    //U
+       "cmpl %%ecx, %%eax              \n\t"    // V 1
+       "sbbl %%ecx, %%ecx              \n\t"    //U
+       "subl %%edx, %%ebx              \n\t"    // V 1
+       "andl %0, %%ecx                 \n\t"    //U
+       "cmpl %%edx, %%ebx              \n\t"    // V 1
+       "sbbl %%edx, %%edx              \n\t"    //U
+       "addl %%ecx, %%eax              \n\t"    // V 1
+       "andl %0, %%edx                 \n\t"    //U
+       "movl %%eax,  (%%esi, %%ebp, 2) \n\t"    // V 1
+       "addl %%edx, %%ebx              \n\t"    //U
+       "subb $128, %%ah                \n\t"    // V 1
+       "movl %%ebx, 4(%%esi, %%ebp, 2) \n\t"    //U
+       "subb $128, %%bh                \n\t"    // V 1
+       "movb %%ah,   (%%edi, %%ebp   ) \n\t"    //U
+       "movb %%bh,  2(%%edi, %%ebp   ) \n\t"    // V 1
 
-     j = char(actVid2MemBufp->b[linep  + (i<<1)]);
-     j=(j + j) - temp[i];
-     if(j<temp[i] && (line&2)) j++;
-     if     (j> 127) j= 127;
-     else if(j<-128) j=-128;
-     temp[i] = j;
-     actVid2MemBufp->b[linep  + (i<<1)] = (char)(j);
-   }
+       "addl $4, %%ebp                 \n\t"    //U
+       " jnc 1b                        \n\t"    // V 1
+       "popl %%ebp                     \n\t"    //U
+           :
+           : "i" (COMB_FIX), "a" ((wndx<<1) - 2),
+             "D" (actVid2MemBufp->b + linep + 2), "S" (int(temp) + 4)
+           : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi");
    linep+=stride;
  }
 
+
+
+
 /*
  asm(//"int $3                    \n\t"
      "1:                          \n\t"

Modified: trunk/2010/2_grafix.cc
==============================================================================
--- trunk/2010/2_grafix.cc	(original)
+++ trunk/2010/2_grafix.cc	Wed Jul  4 09:37:52 2007
@@ -20,9 +20,11 @@
  extern int single;
  extern int bright, contr, satur;
  extern MENULEVEL menuLevel;
+ extern int infoPosX;
  extern int infoPosY;
  extern VID2MEMBUF *vid2MemBuf;
  extern int some;
+ extern bool allowDrop;
 
  bool helpState=true;
  bool iState=false;
@@ -33,14 +35,15 @@ void showStuff(void){
  color c;
 
  if(helpState){
-   char text[5][256]={ "F1/F2 Brightness     s TVStandart",
+   char text[6][256]={ "F1/F2 Brightness     s TVStandart",
                        "F3/F4 Contrast       i debugInfo",
                        "F5/F6 Saturation     h Help",
                        "g     ScreenShot     q Quit",
-                       "v     smoothIterlace"};
-   for(int i=0; i<5; i++){
+                       "v     smoothIterlace p Pause",
+                       "d     allowDrop"};
+   for(int i=0; i<6; i++){
      c.init(255, 0, 0, yuvmode);
-     gprint(10, infoPosY+=10, c.col, text[i]);
+     gprint(infoPosX, infoPosY+=10, c.col, text[i]);
    }
    infoPosY+=5;
  }
@@ -50,21 +53,21 @@ void showStuff(void){
    sprintf(textbuf,"%3.1f%% brightness", bright/2.55);
    c.init(255, 0, 0, yuvmode);
 
-   gprint(10, infoPosY+=10, c.col, textbuf);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf);
    sprintf(textbuf,"%3.1f%% contrast"  , contr/1.27);
-   gprint(10, infoPosY+=10, c.col, textbuf);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf);
    sprintf(textbuf,"%3.1f%% saturation", satur/1.27);
-   gprint(10, infoPosY+=10, c.col, textbuf);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf);
    infoPosY+=5;
  }
 
  if(menuLevel==mLTVStd){
    char text[3][256]={ "1. PAL   4. VideoCrypt (PAL)",
                        "2. NTSC  5. NagraVision (PAL/SECAM)",
-                       "3. SECAM"};
+                       "3. SECAM 6. Pal-TeleText"};
    for(int i=0; i<3; i++){
      c.init(255, 0, 0, yuvmode);
-     gprint(10, infoPosY+=10, c.col, text[i]);
+     gprint(infoPosX, infoPosY+=10, c.col, text[i]);
    }
    infoPosY+=5;
  }
@@ -76,23 +79,28 @@ void showStuff(void){
    c.init(255, 0, 0, yuvmode);
 
    sprintf(textbuf,"%2.2f Fps",1/((float)(T1-T2)/UCLOCKS_PER_SEC));
-   gprint(10, infoPosY+=10, c.col, textbuf);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf);
 
-   sprintf(textbuf,"%d Buffers", vid2MemBuf[0].num);
-   gprint(120, infoPosY   , c.col, textbuf);
+   sprintf(textbuf,"%d Buf", vid2MemBuf[0].num);
+   gprint(infoPosX+110, infoPosY   , c.col, textbuf);
 
    if(yuvmode==0) sprintf(textbuf,"RGB16");
    else           sprintf(textbuf,"YUV422");
-   gprint(220, infoPosY   , c.col, textbuf);
+   gprint(infoPosX+170, infoPosY   , c.col, textbuf);
+
+   if(allowDrop){
+     sprintf(textbuf,"allowDrop");
+     gprint(infoPosX+230, infoPosY   , c.col, textbuf);
+   }
 
    sprintf(textbuf,"%d Corrupted Fields", corr_errors);
-   gprint(10, infoPosY+=10, c.col, textbuf);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf);
 
    sprintf(textbuf,"%d Address error's", addr_errors);
-   gprint(10, infoPosY+=10, c.col, textbuf);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf);
 
    sprintf(textbuf,"%d Some", some);
-   gprint(10, infoPosY+=10, c.col, textbuf);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf);
    infoPosY+=5;
  }
 

Modified: trunk/2010/2_hw.cc
==============================================================================
--- trunk/2010/2_hw.cc	(original)
+++ trunk/2010/2_hw.cc	Wed Jul  4 09:37:52 2007
@@ -1,7 +1,7 @@
 //2010 0.1 Copyright (C) Michael Niedermayer 1998
 
 #include <stdlib.h>
-#include <stdio.h>  //FIX ME (ONLY TEMP NEEDED)
+#include <stdio.h>
 #include <pc.h>
 #include <go32.h>
 #include <dpmi.h>
@@ -15,6 +15,7 @@
 #include "2_hw_mem.h"
 #include "2_71x6.h"
 #include "2_mmx.h"
+#include "2_gfunc.h"
 #include "2010.h"
 
  extern byte lock_start, lock_end;
@@ -36,7 +37,11 @@
  extern int in_int;
  extern volatile int frames;
  extern bool strangeRgb16;
+ extern bool iState;
+ extern int infoPosX, infoPosY;
 
+ bool drop=false;
+ bool allowDrop=true;
  int g_mode=0;
  extern u_short my_cs, my_ds;
  asm("__esp: .long 0\n\t");
@@ -46,7 +51,6 @@
  int VID2MEMBUF::num;
 
 void mem2vid(byte *to, byte *from, int num, int bpp){
- int i;
 
  if(yuvmode!=0){
    asm(//"int $3                            \n\t"
@@ -293,30 +297,48 @@ void set_start_disp(int x, int y){
 
 void copy_vidbuffer(void){
 
- if(page_flip==0){
-   int p=0;
-   for(int y=0; y<wndy; y++){
-     mem2vid(vidbuf+p,              actVid2MemBufp->b+p, wndx, bpp);
-     p+=vgax<<1;
-   }
- }else if(page_flip==2){
-   int p=0;
-   for(int y=0; y<wndy; y++){
-     mem2vid(vidbuf+xresvb*vgay+p,  actVid2MemBufp->b+p, wndx, bpp);
-     p+=vgax<<1;
-   }
-   page_flip=1;
-   set_start_disp(0, vgay);
- }else{
-   int p=0;
-   for(int y=0; y<wndy; y++){
-     mem2vid(vidbuf+p,              actVid2MemBufp->b+p, wndx, bpp);
-     p+=vgax<<1;
+ static long T1=0, T2=0;
+ if(iState){
+   char textBuf[128];
+   sprintf(textBuf, "%f vid2Mem", (float)(T2-T1)/UCLOCKS_PER_SEC);
+   color c;
+   c.init(255, 0, 0, yuvmode);
+   gprint(infoPosX, infoPosY+=10,  c.col, textBuf);
+   T1=uclock();
+   infoPosY+=5;
+ }
+
+ if(!drop || !allowDrop){
+
+   if(page_flip==0){
+     int p=0;
+     for(int y=0; y<wndy; y++){
+       mem2vid(vidbuf+p,              actVid2MemBufp->b+p, wndx, bpp);
+       p+=vgax<<1;
+     }
+   }else if(page_flip==2){
+     int p=0;
+     for(int y=0; y<wndy; y++){
+       mem2vid(vidbuf+xresvb*vgay+p,  actVid2MemBufp->b+p, wndx, bpp);
+       p+=vgax<<1;
+     }
+     page_flip=1;
+     set_start_disp(0, vgay);
+   }else{
+     int p=0;
+     for(int y=0; y<wndy; y++){
+       mem2vid(vidbuf+p,              actVid2MemBufp->b+p, wndx, bpp);
+       p+=vgax<<1;
+     }
+     page_flip=2;
+     set_start_disp(0, 0);
    }
-   page_flip=2;
-   set_start_disp(0, 0);
  }
+ drop=false;
 
+ if(iState){
+   T2=uclock();
+ }
 
  if( vid2MemBuf[actVid2MemBuf].state==Working ){
    vid2MemBuf[actVid2MemBuf].state=Empty;

Modified: trunk/2010/2_nag.cc
==============================================================================
--- trunk/2010/2_nag.cc	(original)
+++ trunk/2010/2_nag.cc	Wed Jul  4 09:37:52 2007
@@ -6,6 +6,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <time.h>
+#include <math.h>
 #include "2_all.h"
 #include "2_crypt.h"
 #include "2_vc.h"
@@ -19,7 +20,11 @@
 
 #define dwnSmp 1
 
-extern vgax, vgay, wndx, wndy, outy, y_field;
+#define FREQ_CHROM     4433618.75
+#define FREQ_HS        15625.0
+#define PHASE_DRIFT_PER_LINE (-fmod(FREQ_CHROM/FREQ_HS*4.0, 1.0)/4.0*PI*2.0)
+
+extern vgax, vgay, wndx, wndy, outy, y_field, x_field;
 extern VID2MEMBUF *actVid2MemBufp;
 extern volatile int scales_x, scalee_x;
 extern volatile int scales_y, scalee_y;
@@ -27,9 +32,11 @@ extern u_long asm_temp, asm_temp4;
 extern bool iState;
 extern int yuvmode;
 extern bool mmx;
+extern int infoPosX;
+extern int infoPosY;
+extern int some;
+extern bool drop;
 
-static int alg2Lines;
-static short *linesPerKeyTab=NULL;
 static int *decoLine=NULL;
 static int decoLines;
 static int keyTable[256];
@@ -44,8 +51,22 @@ static int nagEnd1;
 static int nagSize1;
 static int nagSize2;
 static int nagSizeAll;
+static int rawWssThreshold=10000;
 
-static inline void getPerm(const int key, int * const perm){
+// fix inlines 
+
+static int qsortHelper(const void *x0, const void *x1){
+  SINFO *sInfo0 = (SINFO*)(x0);
+  SINFO *sInfo1 = (SINFO*)(x1);
+  int r;
+  r=sInfo0->line0 - sInfo1->line0;
+  if(r!=0) return r;
+  r=sInfo0->line1 - sInfo1->line1;
+  if(r!=0) return r;
+  return sInfo0->key - sInfo1->key;
+}
+
+static void getPerm(const int key, int * const cle2enc){
   int keyNdx= (key >> 7) & 0xFF;
   const int keyInc= ((key & 0x7F) << 1) + 1;
   int buffer[32];
@@ -53,13 +74,13 @@ static inline void getPerm(const int key
   for(int i=0; i<32; i++) buffer[i]= i;
 
   for(int i=0; i<255; i++){
-    perm[i]= buffer[ keyTable[keyNdx] ];
+    cle2enc[i]= buffer[ keyTable[keyNdx] ];
     buffer[ keyTable[keyNdx] ]= i + 32;
     keyNdx= (keyNdx + keyInc) & 0xFF;
   }
 
   for(int i=0; i<32; i++){
-    perm[i+255]= buffer[i];
+    cle2enc[i+255]= buffer[i];
   }
 }
 
@@ -169,115 +190,6 @@ static inline int nagLineLogi2Phys(const
   return phys;
 }
 
-static inline void mod4Fix(byte *p, const int type){
-  asm(//"int $3                          \n\t"
-      "cmpl $2, %%edi                  \n\t"
-      " jb 1f                          \n\t"
-      "cmpl $3, %%edi                  \n\t"
-      " jb 2f                          \n\t"
-      "3:                              \n\t"
-      "  movb  (%%esi, %%ecx), %%al    \n\t"
-      "  movb 4(%%esi, %%ecx), %%bl    \n\t"
-
-      "  movb 2(%%esi, %%ecx), %%ah    \n\t"
-      "  movb 6(%%esi, %%ecx), %%bh    \n\t"
-      "7:                              \n\t"
-      "  xorb $-1, %%al                \n\t"
-      "  xorb $-1, %%bl                \n\t"
-
-      "  xorb $-1, %%ah                \n\t"
-      "  xorb $-1, %%bh                \n\t"
-
-      "  movb %%al, 2(%%esi, %%ecx)    \n\t"
-      "  movb %%bl, 6(%%esi, %%ecx)    \n\t"
-
-      "  movb %%ah,  (%%esi, %%ecx)    \n\t"
-      "  movb %%bh, 4(%%esi, %%ecx)    \n\t"
-
-      "  movb  8(%%esi, %%ecx), %%al    \n\t"
-      "  movb 12(%%esi, %%ecx), %%bl    \n\t"
-
-      "  movb 10(%%esi, %%ecx), %%ah    \n\t"
-      "  movb 14(%%esi, %%ecx), %%bh    \n\t"
-
-      "  addl $8, %%ecx                \n\t"
-      "jnc 7b                          \n\t"
-      " jmp 8f                         \n\t"
-
-      "2:                              \n\t"
-      "  movl   (%%esi, %%ecx), %%eax  \n\t"
-      "  movl  4(%%esi, %%ecx), %%ebx  \n\t"
-      "7:                              \n\t"
-      "  xorl $0x00FF00FF, %%eax       \n\t"
-      "  xorl $0x00FF00FF, %%ebx       \n\t"
-
-      "  movl %%eax,  (%%esi, %%ecx)   \n\t"
-      "  movl %%ebx, 4(%%esi, %%ecx)   \n\t"
-
-      "  movl  8(%%esi, %%ecx), %%eax  \n\t"
-      "  movl 12(%%esi, %%ecx), %%ebx  \n\t"
-
-      "  addl $8, %%ecx                \n\t"
-      "jnc 7b                          \n\t"
-      " jmp 8f                         \n\t"
-
-      "1:                              \n\t"
-      "  movb  (%%esi, %%ecx), %%al    \n\t"
-      "  movb 4(%%esi, %%ecx), %%bl    \n\t"
-
-      "  movb 2(%%esi, %%ecx), %%ah    \n\t"
-      "  movb 6(%%esi, %%ecx), %%bh    \n\t"
-      "7:                              \n\t"
-      "  movb %%al, 2(%%esi, %%ecx)    \n\t"      // AGI 1 CYC
-      "  movb %%bl, 6(%%esi, %%ecx)    \n\t"
-
-      "  movb %%ah,  (%%esi, %%ecx)    \n\t"
-      "  movb %%bh, 4(%%esi, %%ecx)    \n\t"
-
-      "  movb  8(%%esi, %%ecx), %%al   \n\t"
-      "  movb 12(%%esi, %%ecx), %%bl   \n\t"
-
-      "  movb 10(%%esi, %%ecx), %%ah   \n\t"
-      "  movb 14(%%esi, %%ecx), %%bh   \n\t"
-
-      "  addl $8, %%ecx                \n\t"
-      "jnc 7b                          \n\t"
-
-      "8:                              \n\t"
-
-          :
-          : "S" (int(p) + (wndx<<2) ),
-            "D" (type) ,
-            "c" (-wndx<<2) 
-          : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi");
-
-    /*
-  
-
-  const bool xchg[4]={false, true, false, true};
-  const int  neg [4]={    0,    0,    -1,   -1};
-
-
-  if(xchg[type]){
-    for(int x=0; x<(wndx<<1); x+=4){
-      const byte u= p[x  ];
-      const byte v= p[x+2];
-
-      p[x  ]= (v^neg[type]) - neg[type];
-      p[x+2]= (u^neg[type]) - neg[type];
-    }
-  }else{
-    for(int x=0; x<(wndx<<1); x+=4){
-      const byte u= p[x  ];
-      const byte v= p[x+2];
-
-      p[x  ]= (u^neg[type]) - neg[type];
-      p[x+2]= (v^neg[type]) - neg[type];
-
-    }
-  }   */
-}     
-
 static inline void corrV(byte * p1, byte * p2, const int type, int *vPhase){
   const bool xchg[4]={false, true, false, true};
   const int  neg [4]={    0,    0,    -1,   -1};
@@ -326,37 +238,57 @@ void nag_decrypt(void){
  nagSizeAll= nagSize1 + nagSize2;
  char textbuf[10][128];
  color c;
- long T1, T2;
  const int dwnSmpSize= mmx ? dwnSmp : dwnSmp-1;
+ long T1, T2;
+ T1=uclock();
 
  if(nagSizeAll!=NAG_LINES) error(Nagra);
 
- T1=uclock();
-
  printf("%d %d\n", sizeof(u_short), sizeof(BEST2) );
 
  printf("HALLOX\n");
  static bool first=true;
 
+ static char *sinPerLineMulX=new char[(NAG_LINES+32)<<8];
+ static char *cosPerLineMulX=new char[(NAG_LINES+32)<<8];
+ static long long *sinPerLine=new long long[NAG_LINES+32];
+ static long long *cosPerLine=new long long[NAG_LINES+32];
+
  if(first){
-   decoLines=14;
+   decoLines=32;
    decoLine=new int[decoLines];
-   decoLine[ 0]= 91;
-   decoLine[ 1]=103;
-   decoLine[ 2]=112;
-   decoLine[ 3]=124;
-   decoLine[ 4]=131;
-   decoLine[ 5]=145;
-   decoLine[ 6]=152;
-   decoLine[ 7]=163;
-   decoLine[ 8]=175;
-   decoLine[ 9]=181;
-   decoLine[10]=194;
-   decoLine[11]=200;
-   decoLine[12]=213;
-   decoLine[13]=222;
-
-   alg2Lines=80;
+   decoLine[ 0]= 14;
+   decoLine[ 1]= 33;
+   decoLine[ 2]= 47;
+   decoLine[ 3]= 64;
+   decoLine[ 4]= 81;
+   decoLine[ 5]= 84;
+   decoLine[ 6]= 90;
+   decoLine[ 7]= 95;
+   decoLine[ 8]=103;
+   decoLine[ 9]=106;
+   decoLine[10]=110;
+   decoLine[11]=115;
+   decoLine[12]=121;
+   decoLine[13]=125;
+   decoLine[14]=127;
+   decoLine[15]=136;
+   decoLine[16]=142;
+   decoLine[17]=145;
+   decoLine[18]=149;
+   decoLine[19]=153;
+   decoLine[20]=161;
+   decoLine[21]=165;
+   decoLine[22]=172;
+   decoLine[23]=176;
+   decoLine[24]=180;
+   decoLine[25]=185;
+   decoLine[26]=189;
+   decoLine[27]=195;
+   decoLine[28]=211;
+   decoLine[29]=227;
+   decoLine[30]=243;
+   decoLine[31]=259;
 
    FILE *f= fopen("key.txt", "r");
    if(f==NULL) error(KeyFile);
@@ -364,13 +296,25 @@ void nag_decrypt(void){
      if( fscanf(f, "%d", &keyTable[i])!=1 ) error(KeyFile);
    fclose(f);
 
+   const float a=PHASE_DRIFT_PER_LINE;
+   for(int l=0; l<NAG_LINES+32; l++){
+     for(int v=0; v<256; v++){
+       int r=v;
+       if(v>127) r=v-256;
+       sinPerLineMulX[v + (l<<8)]= char(sin(a*l)*r);
+       cosPerLineMulX[v + (l<<8)]= char(cos(a*l)*r);
+     }
+     sinPerLine[l]= int(sin(a*l)*255) & 0xFFFF;
+     cosPerLine[l]= int(cos(a*l)*255) & 0xFFFF;
+   }
+
+
    f=fopen("2010.dat", "rb");
    if(f!=NULL){
-     int decoLines2, alg2Lines2;
+     int s=0;
+     int decoLines2;
      fread(&decoLines2,   sizeof(int), 1, f);
-     fread(&alg2Lines2,   sizeof(int), 1, f);
      if(decoLines2!=decoLines) goto badFile;
-     if(alg2Lines2!=alg2Lines) goto badFile;
 
      int decoLine2[NAG_LINES];
      fread(decoLine2,     sizeof(int), decoLines2, f);
@@ -378,20 +322,20 @@ void nag_decrypt(void){
 
      int best2Size;
      int keysListSize;
-     int linesPerKeyTabSize;
      fread(&best2Size,          sizeof(int), 1, f);
      fread(&keysListSize,       sizeof(int), 1, f);
-     fread(&linesPerKeyTabSize, sizeof(int), 1, f);
 
-     best1=          new int[decoLines * NAG_LINES];
+     best1=          new int[decoLines * NAG_LINES * 33];
      best2=          new BEST2[best2Size];
      keysList=       new u_short[keysListSize];
-     linesPerKeyTab= new short[linesPerKeyTabSize];
 
-     fread(best1,          sizeof(int),      decoLines * NAG_LINES, f);
-     fread(best2,          sizeof(BEST2),    best2Size            , f);
-     fread(keysList,       sizeof(u_short),  keysListSize         , f);
-     fread(linesPerKeyTab, sizeof(short),    linesPerKeyTabSize   , f);
+     fread(best1,          sizeof(int),      decoLines * NAG_LINES * 33, f);
+     fread(best2,          sizeof(BEST2),    best2Size                 , f);
+     fread(keysList,       sizeof(u_short),  keysListSize              , f);
+
+     for(int i=0; i<decoLines * NAG_LINES * 33; i++) s+=best1[i];
+     for(int i=0; i<best2Size                 ; i++) s+=best2[i].line;
+     for(int i=0; i<keysListSize              ; i++) s+=keysList[i];
 
      badFile:;
        fclose(f);
@@ -399,6 +343,102 @@ void nag_decrypt(void){
    }
 
    if(best1==NULL){
+     int best2Size=0;
+     int keysListSize=0;
+     int best2i=0;
+     int keysListi=0;
+     for(int phase=0; phase<2; phase++){
+       if(phase==1){
+         best1=    new int[NAG_LINES * decoLines * 33];
+         for(int i=0; i<NAG_LINES * decoLines * 33; i++) best1[i]=-1;
+         best2=    new BEST2[best2Size];
+         keysList= new u_short[keysListSize];
+       }
+       for(int wssLine=0; wssLine<33; wssLine++){
+         for(int dl=0; dl<decoLines; dl++){
+           int sInfos=0;
+           SINFO *sInfo= new SINFO[KEYS];
+           int *cle2enc= new int[NAG_LINES];
+           for(int key=0; key<KEYS; key++){
+             int keyNdx= (key >> 7) & 0xFF;
+//             const int keyInc= ((key & 0x7F) << 1) + 1;
+
+             if(keyTable[keyNdx]!=wssLine && wssLine!=32) continue;
+             getPerm(key, cle2enc);
+
+             if(phase==0) keysListSize++;
+             int clearLine;
+             for(clearLine=0; clearLine<NAG_LINES; clearLine++){
+               if(cle2enc[clearLine] == decoLine[dl]) break;
+             }
+             if(clearLine==NAG_LINES) asm("int $3\n\t");
+
+             int line0= cle2enc[(clearLine-1<0)
+                                       ? (clearLine+2) : (clearLine-1)];
+             int line1= cle2enc[(clearLine+1>=NAG_LINES)
+                                       ? (clearLine-2) : (clearLine+1)];
+
+             if(line1<line0){
+               const int t=line0;
+               line0=line1;
+               line1=t;
+             }
+
+             sInfo[sInfos]=(SINFO){line0: line0, line1: line1, key: key};
+             sInfos++; 
+           }
+           delete [] sInfo;
+           delete [] cle2enc;
+           qsort(sInfo, sInfos, sizeof(SINFO), qsortHelper);
+
+           if(phase==0){
+             u_long last0=510;
+             u_long last1=510;
+             for(int i=0; i<sInfos; i++){
+               if(last0!=sInfo[i].line0 || last1!=sInfo[i].line1){
+                 last0=sInfo[i].line0;
+                 last1=sInfo[i].line1;
+                 best2Size++;
+               }
+             }
+           }
+           else{
+             u_long last0=510;
+             u_long last1=510;
+             int best1i=-1;
+             for(int sInfoi=0; sInfoi<sInfos; sInfoi++){
+               if(sInfo[sInfoi].line0 != last0){
+                 last0= sInfo[sInfoi].line0;
+                 best1i= wssLine*NAG_LINES*decoLines + dl*NAG_LINES + last0;
+                 best1[best1i]=best2i;                 
+                 last1=510;
+               }
+               if(sInfo[sInfoi].line1 != last1){
+                 last1= sInfo[sInfoi].line1;
+                 best2[best2i]=(BEST2){line: last1, keyNdx: keysListi};
+                 best2i++;
+                 if(keysListi>0) keysList[keysListi-1]|=KEYS;
+               }
+               keysList[keysListi]=sInfo[sInfoi].key;
+               keysListi++;
+             }
+             if(keysListi>0) keysList[keysListi-1]|=KEYS;
+           }
+         }
+       }
+     }
+ 
+     for(int best1i=NAG_LINES*decoLines*33-1; best1i>=0; best1i--){
+       if(best1[best1i]==-1) best1[best1i]= best2i;
+       else best2i=best1[best1i];
+     }
+
+
+
+
+ /*
+
+
 
      bool *state= new bool[NAG_LINES * NAG_LINES * decoLines];
      int *keysTempStart= new int[NAG_LINES * NAG_LINES * decoLines];
@@ -443,11 +483,11 @@ void nag_decrypt(void){
      // fix compression for keysList
      int keysListSize= KEYS*decoLines;
 
-     best1= new int[NAG_LINES * decoLines];
+     best1= new int[NAG_LINES * decoLines * 33];
      best2= new BEST2[best2Size];
      keysList= new u_short[keysListSize];
 
-     for(int i=0; i<NAG_LINES * decoLines; i++){
+     for(int i=0; i<NAG_LINES * decoLines * 33; i++){
        best1[i]=-1;
      }
 
@@ -489,74 +529,22 @@ void nag_decrypt(void){
      delete [] keysTempStart;
      delete [] state;
 
-     const int startW= (NAG_LINES-alg2Lines)>>1;
-     const int endW=   (NAG_LINES+alg2Lines)>>1;
-     int linesPerKeyTabSize=0;
-     int minLines=1000;
-     for(int key=0; key<KEYS; key++){
-       int perm[NAG_LINES];
-       getPerm(key, perm);
-       int lines=0;
-       for(int clearLine=1; clearLine<NAG_LINES; clearLine++){
-         int l1=perm[clearLine  ];
-         int l2=perm[clearLine-1];
-         if( l1 >= startW && l1 < endW && l2 >= startW && l2 < endW){
-           linesPerKeyTabSize++;
-           lines++;
-         }
-       }
-       if(lines<minLines) minLines=lines;  // linesPerKeyTabSize bigger then nes.
-     }
-     linesPerKeyTab= new short[linesPerKeyTabSize+1];
-
-     printf("minLines %d\n", minLines);
-
-     int linesPerKeyTabP=0;
-     for(int key=0; key<KEYS; key++){
-       int perm[NAG_LINES];
-       getPerm(key, perm);
-       int lines=0;
-       for(int clearLine=1; clearLine<NAG_LINES; clearLine++){
-         int l1=perm[clearLine  ];
-         int l2=perm[clearLine-1];
-         if( l1 >= startW && l1 < endW && l2 >= startW && l2 < endW){
-           if(l1>l2){
-             int lt=l1;
-             l1=l2;
-             l2=lt;
-           }
-           l1-=startW;
-           l2-=startW;
-           l1*=alg2Lines;
-           linesPerKeyTab[linesPerKeyTabP]=l1+l2;
-           linesPerKeyTabP++;
-           lines++;
-           if(lines>=minLines) break;
-         }
-       }
-       linesPerKeyTab[linesPerKeyTabP]|=1<<15;
-
-     }
-     linesPerKeyTab[linesPerKeyTabP]=-1;
+     */
 
-     linesPerKeyTabSize=linesPerKeyTabP+1;
 
      f=fopen("2010.dat", "wb");
      if(f==NULL) error(DatWrite);
 
      fwrite(&decoLines,    sizeof(int), 1, f);
-     fwrite(&alg2Lines,    sizeof(int), 1, f);
 
      fwrite(decoLine,      sizeof(int), decoLines, f);
 
      fwrite(&best2Size,          sizeof(int), 1, f);
      fwrite(&keysListSize,       sizeof(int), 1, f);
-     fwrite(&linesPerKeyTabSize, sizeof(int), 1, f);
 
-     fwrite(best1,          sizeof(int),      decoLines * NAG_LINES, f);
-     fwrite(best2,          sizeof(BEST2),    best2Size            , f);
-     fwrite(keysList,       sizeof(u_short),  keysListSize         , f);
-     fwrite(linesPerKeyTab, sizeof(short),    linesPerKeyTabSize   , f);
+     fwrite(best1,          sizeof(int),      decoLines * NAG_LINES * 33, f);
+     fwrite(best2,          sizeof(BEST2),    best2Size                 , f);
+     fwrite(keysList,       sizeof(u_short),  keysListSize              , f);
 
      fclose(f);
 
@@ -564,6 +552,29 @@ void nag_decrypt(void){
  }
  first=false;
 
+ const int wssStartX=int(double(7   -scales_x+3)/x_field*wndx + .5);
+ const int wssEndX=  int(double(164 -scales_x+3)/x_field*wndx + .5);
+ const int wssLenX= wssEndX - wssStartX;
+
+ static wssDat[max_x];
+
+ static int lastVgaX=0;
+ static wssThreshold=0;
+ if(lastVgaX!=vgax){
+   const byte wssRaw[53]= {1,1,1,1,1,  0,0,0,  1,1,1, 0,0,0,  1,1,1, 0,0,0,
+                           1,1,1,  0,0,0,  1,1,1,  0,0,0,  1,1,1,1,  0,0,0,
+                           1,1,1,1,  0,0,0,0,0,  1,1,1,1,1};
+   const float delta= 53.0 / float(wssLenX);
+   float pos=0;
+   for(int i=wssStartX; i<wssEndX; i++){
+     pos+= delta;
+     wssDat[i]= wssRaw[int(pos)]*255;
+   }
+
+   wssThreshold= rawWssThreshold*wssLenX>>7;
+   lastVgaX=vgax;
+ }
+
  BESTCOEFFS *bestCoeffs= new BESTCOEFFS[decoLines];
  for(int i=0; i<decoLines; i++){
    bestCoeffs[i].coeff[0]=
@@ -583,6 +594,37 @@ void nag_decrypt(void){
    T1=T2;
  }
 
+ bool isWss;
+ int wssLine=0;
+ {
+   int minSum=0x7FFFFFFF;
+   for(int line=0; line<32; line++){
+     int sum=0;
+     byte * const linep= actVid2MemBufp->b + nagLineLogi2Phys(line)*(vgax<<1);
+     for(int x=wssStartX; x<wssEndX; x++){
+       sum+= mabs( int(linep[(x<<1) + 1]) - int(wssDat[x]) );
+     }
+     if(sum<minSum){
+       minSum=sum;
+       wssLine=line;
+     }
+   }
+/*
+   byte * const linep= actVid2MemBufp->b + nagLineLogi2Phys(wssLine)*(vgax<<1);
+   for(int x=wssStartX; x<wssEndX; x++){
+     linep[(x<<1) + 1]= (byte)(mabs( int(linep[(x<<1) + 1]) - int(wssDat[x]) ));
+   }
+ */
+   
+   if(minSum < wssThreshold){
+     isWss=true;
+     actVid2MemBufp->b[nagLineLogi2Phys(wssLine)*(vgax<<1) + (wssStartX<<1) + 3]=0;
+     actVid2MemBufp->b[nagLineLogi2Phys(wssLine)*(vgax<<1) + (wssStartX<<1) + 5]=255;
+   }
+   else isWss=false;
+ }
+ if(!isWss) wssLine=32;
+
  byte * const lowRes=(byte*) newAlign( wndx>>dwnSmpSize, 8);
  for(int line=0; line<NAG_LINES; line++){
    byte * const linep= actVid2MemBufp->b + nagLineLogi2Phys(line)*(vgax<<1);
@@ -621,8 +663,8 @@ void nag_decrypt(void){
    limit(bestCoeffs[dL].line[0], 0, 285);
 #endif
 
-   int l= best1[ bestCoeffs[dL].line[0]     + dL*NAG_LINES ];
-   int r= best1[ bestCoeffs[dL].line[0] + 1 + dL*NAG_LINES ];
+   int l= best1[ bestCoeffs[dL].line[0]     + dL*NAG_LINES + wssLine*decoLines*NAG_LINES];
+   int r= best1[ bestCoeffs[dL].line[0] + 1 + dL*NAG_LINES + wssLine*decoLines*NAG_LINES];
    if(l == r) continue;
    r--;
 
@@ -645,107 +687,50 @@ void nag_decrypt(void){
  }
 
  bool didSome;
- int lastLowKey=-1;
+ int lastLowKey=0xFFFF;
  int bestKey=0;
  int bestNum=0;
+ int maxKeys=0;
  do{
    didSome=false;
-   u_short lowKey=0xFFFF;
+   int lowKey=0xFFFF;
    int lowNum=0;
-   for(int kLP=0; kLP<keysListPoses; kLP++){
-     if(keysListPos[kLP]==-1) continue;
+   for(int kLPi=0; kLPi<keysListPoses; kLPi++){
 
-     if( int(keysList[ keysListPos[kLP] ] & (KEYS-1) )==lastLowKey ){
-       if( (keysList[ keysListPos[kLP] ] & KEYS)==KEYS ){
-         keysListPos[kLP]=-1;
+     if( (keysList[ keysListPos[kLPi] ] & (KEYS-1))==lastLowKey ){
+       if( (keysList[ keysListPos[kLPi] ] & KEYS)==KEYS ){
+         keysListPoses--;
+         keysListPos[kLPi]= keysListPos[keysListPoses];
          continue;
        }
-       keysListPos[kLP]++;
+       keysListPos[kLPi]++;
+       maxKeys++;
      }
-
-     if( int(keysList[ keysListPos[kLP] ] & (KEYS-1)) == lowKey ){
+                           
+     if( (keysList[ keysListPos[kLPi] ] & (KEYS-1)) == lowKey ){
        lowNum++;
+       if(lowNum>bestNum){
+         bestNum=lowNum;
+         bestKey=lowKey;
+       }
      }
-     if( int(keysList[ keysListPos[kLP] ] & (KEYS-1)) < lowKey ){
-       lowKey= int(keysList[ keysListPos[kLP] ] & (KEYS-1));
+     else if( (keysList[ keysListPos[kLPi] ] & (KEYS-1)) < lowKey ){
+       lowKey= keysList[ keysListPos[kLPi] ] & (KEYS-1);
        lowNum=0;
      }
      didSome= true;
    }
    lastLowKey=lowKey;
-//   printf("KEY %d %d\n", lowNum, lowKey);
-   if(lowNum>bestNum){
-     bestNum=lowNum;
-     bestKey=lowKey;
-   }
  }while(didSome);
 
  delete [] keysListPos;
 
- if(iState){
-   T2=uclock();
-   sprintf(textbuf[3],"%f FindKey", (float)(T2-T1)/UCLOCKS_PER_SEC);
-   T1=T2;
- }
-
- if(bestNum<3){                                     
-   short *coeff= new short[alg2Lines*alg2Lines];
-
-   byte *lowRes= new byte[alg2Lines*wndx>>dwnSmpSize];
-
-   const int startW= (NAG_LINES-alg2Lines)>>1;
-   const int endW=   (NAG_LINES+alg2Lines)>>1;
-
-   for(int line=0; line<alg2Lines; line++){
-     const int physLine= nagLineLogi2Phys(line+startW);
-     byte * const linep= actVid2MemBufp->b + physLine*(vgax<<1);
-     byte * const lowResp= lowRes + line*(wndx>>dwnSmpSize);
-     doDwnSmp(lowResp ,linep);
-   }
-
-   int coeffp=0;
-   int line0p=0;
-   for(int line0=0; line0<alg2Lines; line0++){
-     int line1p=0;
-     for(int line1=line0; line1<alg2Lines; line1++){
-       BESTCOEFFS out;
-       out.coeff[0]=
-       out.coeff[1]=100000;
-       nagraCorr(int(line0p), int(line1p), wndx>>dwnSmpSize, int(&out), 0);
-       coeff[coeffp + line1]=short(out.coeff[0]);
-       line1p+= wndx>>dwnSmpSize;
-     }
-     coeffp+= alg2Lines;
-     line0p+= wndx>>dwnSmpSize;
-   }
-   if(mmx) asm("emms\n\t");
-
-   int linesPerKeyTabP=0;
-   bestNum=100000;
-   int key=0;
-   int num=0;
-   for(;;){
-     if(linesPerKeyTab[linesPerKeyTabP]==-1) break;
-     num+= coeff[ linesPerKeyTab[linesPerKeyTabP] & !(1<<15) ];
-
-     if(linesPerKeyTab[linesPerKeyTabP] & (1<<15) ){
-       if(num<bestNum){
-         bestNum=num;
-         bestKey=key;
-         num=0;
-       }
-       key++;
-     }
-     linesPerKeyTabP++;
-   }
-
-   delete [] lowRes;
-   delete [] coeff;
- }
+ if(bestNum<2) drop=true;
 
  if(iState){
    T2=uclock();
-   sprintf(textbuf[4],"%f Algo2", (float)(T2-T1)/UCLOCKS_PER_SEC);
+   sprintf(textbuf[6],"%2d%6d%6s", bestNum, maxKeys, isWss ? "WSS" : "NoWSS");
+   sprintf(textbuf[3],"%f FindKey", (float)(T2-T1)/UCLOCKS_PER_SEC);
    T1=T2;
  }
 
@@ -756,7 +741,6 @@ void nag_decrypt(void){
  delete [] bestCoeffs;   // kill this
 
 
-
  byte *lastV[2]={NULL, NULL};
  int lastVType[2];
  int vPhase=0;
@@ -777,50 +761,157 @@ void nag_decrypt(void){
  if(vPhase>0) vPhase=1;
  else         vPhase=0;
 
+ const bool xchg[4]={false, true, false, true};
+ const int  neg [4]={    0,    0,    -1,   -1};
 
- byte *lowDriftLinep= actVid2MemBufp->b;  // initalize in case nothing found else PF
- int firstI=0;
- for(int i=0; i<NAG_LINES; i++){
-   if(perm[i]>=32 && i-perm[i]<=0){
-     const cleMod4= (i       + vPhase ) & 3;
-     const encMod4= (perm[i] + vPhase ) & 3;
+ char *pGood=(char*)(actVid2MemBufp->b);
+ for(int line=NAG_LINES-1; line>0; line--){
+
+   char *p= (char*)(actVid2MemBufp->b + nagLineLogi2Phys( perm[line] ) * (vgax<<1));
+
+   if(perm[line]>=32){
+     const cleMod4= (line       + vPhase ) & 3;
+     const encMod4= (perm[line] + vPhase ) & 3;
      const type= cleMod4 ^ encMod4;
 
-     lowDriftLinep= actVid2MemBufp->b + nagLineLogi2Phys( perm[i] ) * (vgax<<1);
+     const int l=(line-perm[line]+32)<<8;
 
-     if(cleMod4!=encMod4) mod4Fix(lowDriftLinep, type);
+     pGood= p;
 
-     firstI= i;
-     break;
-   }
- }
-  
- for(int clearLine=0 ;clearLine<NAG_LINES; clearLine++){
-   byte * const linep= actVid2MemBufp->b + nagLineLogi2Phys( perm[clearLine] ) * (vgax<<1);
+     if(mmx){
+       long long colorAsmDataTab[4];
+       colorAsmDataTab[2]=0x00FF000000FF0000LL;
+       colorAsmDataTab[3]=0xFF00FF00FF00FF00LL;
 
-   if(perm[clearLine]>=32 && clearLine-perm[clearLine]<=0
-                                                  && firstI!=clearLine){
-     const cleMod4= (clearLine       + vPhase ) & 3;
-     const encMod4= (perm[clearLine] + vPhase ) & 3;
-     const type= cleMod4 ^ encMod4;
+       if(xchg[type]){
+         colorAsmDataTab[0]=
+                        cosPerLine[l>>8]       + (sinPerLine[l>>8]<<16)
+          +(            cosPerLine[l>>8] <<32) + (sinPerLine[l>>8]<<48);
+         colorAsmDataTab[1]=
+           ((0xFFFFLL - sinPerLine[l>>8])<<7 ) + (cosPerLine[l>>8]<<23)
+          +((0xFFFFLL - sinPerLine[l>>8])<<39) + (cosPerLine[l>>8]<<55);
+         if((perm[line] & 1) ^ vPhase){
+           colorAsmDataTab[0]^=0xFFFF0000FFFF0000LL;
+           colorAsmDataTab[1]^=0x0000FFFF0000FFFFLL;
+         }
+       }
+       else{
+         colorAsmDataTab[0]=
+           ((0xFFFFLL - sinPerLine[l>>8])    ) + (cosPerLine[l>>8]<<16)
+          +((0xFFFFLL - sinPerLine[l>>8])<<32) + (cosPerLine[l>>8]<<48);
+         colorAsmDataTab[1]=
+           (            cosPerLine[l>>8] <<7 ) + (sinPerLine[l>>8]<<23)
+          +(            cosPerLine[l>>8] <<39) + (sinPerLine[l>>8]<<55);
+         if((perm[line] & 1) ^ vPhase){
+           colorAsmDataTab[0]^=0x0000FFFF0000FFFFLL;
+           colorAsmDataTab[1]^=0xFFFF0000FFFF0000LL;
+         }
+       }
 
-     lowDriftLinep= actVid2MemBufp->b + nagLineLogi2Phys( perm[clearLine] ) * (vgax<<1);
 
-     if(cleMod4!=encMod4) mod4Fix(lowDriftLinep, type);
+       if(neg[type]){
+         colorAsmDataTab[0]^=0xFFFFFFFFFFFFFFFFLL;
+         colorAsmDataTab[1]^=0xFFFFFFFFFFFFFFFFLL;
+       }
+
+       asm("movq    (%%eax), %%mm1       \n\t"
+           "movq   8(%%eax), %%mm2       \n\t"
+           "movq  16(%%eax), %%mm3       \n\t"
+           "movq  24(%%eax), %%mm4       \n\t"
+
+           "movq  (%%esi, %%ecx), %%mm0  \n\t"
+           "movq %%mm1, %%mm5            \n\t"
+           "movq %%mm0, %%mm6            \n\t"
+           "psllw $8, %%mm0              \n\t"
+           "pmaddwd %%mm0, %%mm5         \n\t"
+           "pmaddwd %%mm2, %%mm0         \n\t"
+           "1:                           \n\t"
+             // Stall 1 Cyc
+           ".byte 0x0f, 0xdb, 0xeb       \n\t" // no more bugs PLEEEEZE "pand %%mm3, %%mm5            \n\t"
+           ".byte 0x0f, 0xdb, 0xf4       \n\t" // no more bugs PLEEEEZE "pand %%mm4, %%mm6            \n\t"
+
+           "psrld $23, %%mm0             \n\t"
+           "por %%mm6, %%mm5             \n\t"
+
+           "por %%mm5, %%mm0             \n\t"
+
+           "movq  %%mm0, (%%esi, %%ecx)  \n\t"
+
+           "movq 8(%%esi, %%ecx), %%mm0  \n\t"
+           "movq %%mm1, %%mm5            \n\t"
+
+           "movq %%mm0, %%mm6            \n\t"
+           "psllw $8, %%mm0              \n\t"
+
+           "pmaddwd %%mm0, %%mm5         \n\t"
+           "addl $8, %%ecx               \n\t"
+
+           "pmaddwd %%mm2, %%mm0         \n\t"
+           "  jnc 1b                     \n\t"
+           "emms                         \n\t"
+            :
+            : "a" (int(colorAsmDataTab)),
+              "S" (int(p) + (wndx<<1)),
+              "c" (-wndx<<1) 
+            : "%eax", "%ecx");
+     }
+     else{
+       for(int x=0; x<wndx; x+=4){
+         const int u= int(* p   );
+         const int v= int(*(p+2));
+
+         char u1;
+         char v1;
+         if((perm[line] & 1) ^ vPhase){
+           u1=+ cosPerLineMulX[l+u] - sinPerLineMulX[l+v];
+           v1=+ sinPerLineMulX[l+u] + cosPerLineMulX[l+v];
+         }
+         else{
+           u1=+ cosPerLineMulX[l+u] + sinPerLineMulX[l+v];
+           v1=- sinPerLineMulX[l+u] + cosPerLineMulX[l+v];
+         }
+
+         if(xchg[type]){
+           * p   = *(p+4)= (v1^neg[type]) - neg[type];
+           *(p+2)= *(p+6)= (u1^neg[type]) - neg[type];
+
+           p +=8;
+
+         }else{
+           * p   = *(p+4)= (u1^neg[type]) - neg[type];
+           *(p+2)= *(p+6)= (v1^neg[type]) - neg[type];
+
+          p +=8;
+       
+         }
+       }
+     }
    }
    else{
-     for(int x=0; x<(wndx<<1); x+=4){
-       linep[x  ]= lowDriftLinep[x  ];
-       linep[x+2]= lowDriftLinep[x+2];
-     }
+     asm("1:                           \n\t"
+         "movb  (%%esi, %%ecx), %%al   \n\t"
+         "movb 2(%%esi, %%ecx), %%bl   \n\t"
+         "movb %%al,  (%%edi, %%ecx)   \n\t"
+         "movb %%bl, 2(%%edi, %%ecx)   \n\t"
+         "movb 4(%%esi, %%ecx), %%al   \n\t"
+         "movb 6(%%esi, %%ecx), %%bl   \n\t"
+         "movb %%al, 4(%%edi, %%ecx)   \n\t"
+         "movb %%bl, 6(%%edi, %%ecx)   \n\t"
+         "movb 8(%%edi, %%ecx), %%al   \n\t"
+         "addl $8, %%ecx               \n\t"
+         "  jnc 1b                     \n\t"
+          :
+          : "D" (int(p    ) + (wndx<<1)),
+            "S" (int(pGood) + (wndx<<1)) ,
+            "c" (-wndx<<1) 
+          : "%eax", "%ebx", "%ecx");
    }
  }
 
 
-
  if(iState){
    T2=uclock();
-   sprintf(textbuf[5],"%f ColorDeco", (float)(T2-T1)/UCLOCKS_PER_SEC);
+   sprintf(textbuf[4],"%f ColorDeco", (float)(T2-T1)/UCLOCKS_PER_SEC);
    T1=T2;
  }
 
@@ -854,7 +945,7 @@ void nag_decrypt(void){
 
  for(int i=0; i<NAG_LINES; i++){
    const int encodLine= nagLineLogi2Phys(perm[i]);
-   const int clearLine= i;
+   const int clearLine= i << ((outy==wndy) ? 1 : 0);
 
    enc2cleVec[ encodLine ]= clearLine;
    cle2encVec[ clearLine ]= encodLine;
@@ -912,28 +1003,26 @@ void nag_decrypt(void){
 
  if(iState){
    T2=uclock();
-   sprintf(textbuf[6],"%f ReOrder", (float)(T2-T1)/UCLOCKS_PER_SEC);
+   sprintf(textbuf[5],"%f ReOrder", (float)(T2-T1)/UCLOCKS_PER_SEC);
    T1=T2;
  }
 
 
-
-
-
-
  if(iState){
-   T2=uclock();
+   T2=uclock();          
    c.init(255, 0, 0, yuvmode);
-   gprint(80,  80,  c.col, textbuf[0]);
-   gprint(90,  90,  c.col, textbuf[1]);
-   gprint(100, 100, c.col, textbuf[2]);
-   gprint(110, 110, c.col, textbuf[3]);
-   gprint(120, 120, c.col, textbuf[4]);
-   gprint(130, 130, c.col, textbuf[5]);
-   gprint(140, 140, c.col, textbuf[6]);
-/*   gprint(150, 150, c.col, textbuf[7]);
-   gprint(160, 160, c.col, textbuf[8]);
-   gprint(170, 170, c.col, textbuf[9]);*/
+   gprint(infoPosX, infoPosY+=10,  c.col, textbuf[0]);
+   gprint(infoPosX, infoPosY+=10,  c.col, textbuf[1]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[2]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[3]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[4]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[5]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[6]);
+/*   gprint(infoPosX, infoPosY+=10, c.col, textbuf[8]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[7]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[8]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[9]);*/
+   infoPosY+=5;
    T1=T2;
  }
 

Modified: trunk/2010/2_nag.h
==============================================================================
--- trunk/2010/2_nag.h	(original)
+++ trunk/2010/2_nag.h	Wed Jul  4 09:37:52 2007
@@ -16,6 +16,12 @@ struct BESTCOEFFS{
   int coeff[2];
 };
 
+struct SINFO{
+  unsigned line0:9;
+  unsigned line1:9;
+  unsigned key:15;
+};
+
 void nag_decrypt(void);
 
 #endif

Modified: trunk/2010/2_vc.cc
==============================================================================
--- trunk/2010/2_vc.cc	(original)
+++ trunk/2010/2_vc.cc	Wed Jul  4 09:37:52 2007
@@ -36,6 +36,8 @@
 #define DWN_SMP 1
 #define MAX_DRIFT 2
 #define DRIFT_TAB {1200, 400, 0, 400, 1200}
+//#define MAX_DRIFT 3
+//#define DRIFT_TAB {2400, 1200, 400, 0, 400, 1200, 2400}
 #define PHASE_CMP_STEP 3
 #define BAD_THRESHOLD1 0.8
 #define BAD_THRESHOLD2 200
@@ -61,6 +63,10 @@ extern bool iState;
 extern int yuvmode;
 extern int some;
 extern bool mmx;
+extern int infoPosX;
+extern int infoPosY;
+extern long uclockWaste;
+
 
 static inline float atan3(const float f1, const float f2){
   float out;
@@ -80,7 +86,6 @@ void vc_decrypt(void){
  int addapBuff[max_x];
  char textbuf[9][128];
  long T1=0, T2;
- long alg1=0, alg2=0, alg3=0;
  color c;
 
  const int lowResSize= mmx ? DWN_SMP : (DWN_SMP-1);
@@ -133,12 +138,15 @@ void vc_decrypt(void){
 
  int *convVec= new int[vcDiff>>(PHASE_CMP_STEP-1)];
 
+ long TCorr=0, TReSmp=0, TUVDetect=0, TPhaseDiff1=0,
+      TPhaseDiff2_Drift=0, TEdgeDetect=0;
+
  if(iState){
    T1=uclock();
  }
 
  static int edgeLut[512];
- static int phaseLut[128];
+ static int phaseLut[256];
  static bool first= true;
 
  static byte phasePerCutP[256];
@@ -155,8 +163,8 @@ void vc_decrypt(void){
      edgeLut[i]=int( log(j*EDGE_LOG_COEFF)*EDGE_COEFF );
    }
 
-   for(int i=0; i<128; i++){                           
-     const int j= max(min(i, PHASE_LIMIT), 1);
+   for(int i=0; i<256; i++){                           
+     const int j= max(min(mabs(i-128), PHASE_LIMIT), 1);
      phaseLut[i]=int( pow(j, PHASE_EXP)*PHASE_COEFF );
    }
 
@@ -181,8 +189,8 @@ void vc_decrypt(void){
 
  bool *black= new bool[wndy];
 
- int llinep=-(vgax<<2);
- int linep=-(vgax<<1);
+ byte * llinep=&actVid2MemBufp->b[ -(vgax<<2) ];
+ byte * linep= &actVid2MemBufp->b[ -(vgax<<1) ];
  for(int line=0; line<wndy; line++){
    llinep+=vgax<<1;
    linep+=vgax<<1;
@@ -217,7 +225,7 @@ void vc_decrypt(void){
          "popl %%ebp            \n\t"
           : 
           : "I" (DWN_SMP), "i" (1<<DWN_SMP),
-            "S" (actVid2MemBufp->b + linep + 1 + (vcStartX<<1)) ,
+            "S" (linep + 1 + (vcStartX<<1)) ,
             "d" (vcDiff>>DWN_SMP) ,
             "D" (lowRes[1]) 
           : "%eax", "%ebx", "%edx", "%esi", "%edi");
@@ -249,7 +257,7 @@ void vc_decrypt(void){
          "popl %%ebp            \n\t"
           : 
           : "I" (DWN_SMP), "i" (1<<DWN_SMP),
-            "S" (actVid2MemBufp->b + linep + 1 + (vcStartX<<1)) ,
+            "S" (linep + 1 + (vcStartX<<1)) ,
             "d" (vcDiff>>DWN_SMP) ,
             "D" (lowRes[1]) 
           : "%eax", "%ebx", "%edx", "%esi", "%edi");
@@ -260,7 +268,7 @@ void vc_decrypt(void){
 
    if(iState){
      T2=uclock();
-     alg3+=T2-T1;
+     TReSmp+=T2-T1-uclockWaste;
      T1=T2;
    }
 
@@ -323,18 +331,24 @@ void vc_decrypt(void){
                                                                  < bestDiff )
        black[line]=true;
 
+   if(iState){
+     T2=uclock();
+     TCorr+=T2-T1-uclockWaste;
+     T1=T2;
+   }
+
    if(line>2){
 
      int sumI=0;
      for(int x=-2; x<2; x+=2){
        const int nx= x + (vcDiff & ~1);
-       const int ul= char(actVid2MemBufp->b[ linep + (((vcStartX & ~1) +  x)<<1) + 0]);
-       const int vl= char(actVid2MemBufp->b[ linep + (((vcStartX & ~1) +  x)<<1) + 2]);
-       const int un= char(actVid2MemBufp->b[ linep + (((vcStartX & ~1) + nx)<<1) + 0]);
-       const int vn= char(actVid2MemBufp->b[ linep + (((vcStartX & ~1) + nx)<<1) + 2]);
+       const int ul= char(linep[ (((vcStartX & ~1) +  x)<<1) + 0]);
+       const int vl= char(linep[ (((vcStartX & ~1) +  x)<<1) + 2]);
+       const int un= char(linep[ (((vcStartX & ~1) + nx)<<1) + 0]);
+       const int vn= char(linep[ (((vcStartX & ~1) + nx)<<1) + 2]);
        sumI+= ul*vn - vl*un;
-//       actVid2MemBufp->b[linep + (((vcStartX & ~1) - x)<<1) + 1]=255;
-//       actVid2MemBufp->b[linep + (((vcStartX & ~1) - nx)<<1) + 1]=255;
+//       linep[ (((vcStartX & ~1) - x)<<1) + 1]=255;
+//       linep[ (((vcStartX & ~1) - nx)<<1) + 1]=255;
      }
   
      if(line>10 && line<250){
@@ -353,7 +367,7 @@ void vc_decrypt(void){
 
    if(iState){
      T2=uclock();
-     alg1+=T2-T1;
+     TUVDetect+=T2-T1-uclockWaste;
      T1=T2;
    }
  }
@@ -361,7 +375,8 @@ void vc_decrypt(void){
  deleteAlign(lowRes[0]);
  deleteAlign(lowRes[1]);
 
- byte *dir=new byte[257*(wndy + 1)];
+ byte *dir=new byte[(wndy + 1)<<8];
+ int *bestDir=new int[wndy + 1];
 
  const int driftPenalty[MAX_DRIFT*2+1]= DRIFT_TAB;
 
@@ -371,10 +386,8 @@ void vc_decrypt(void){
  byte *newPhaseErr=new byte[256], *lastPhaseErr=new byte[256];
  memset(lastPhaseErr, 0, 256);
 
- long tst1=0, tst2=0, tst3=0;
-
- llinep=-(vgax<<2);
- linep=-(vgax<<1);
+ llinep=&actVid2MemBufp->b[ -(vgax<<2) ];
+ linep= &actVid2MemBufp->b[ -(vgax<<1) ];
  for(int line=0; line<wndy; line++){
    llinep+=vgax<<1;
    linep+=vgax<<1;
@@ -383,7 +396,7 @@ void vc_decrypt(void){
    memset(newPhaseErr, 0, 256);
    bool noLeft=true;
 
-   dir[line*257 + 256]=0;
+   bestDir[line]=-1;
 
    if(line==0) continue;
 
@@ -394,22 +407,23 @@ void vc_decrypt(void){
    for(int x=0; x<vcDiff; x+= 1<<PHASE_CMP_STEP){
      int nx= x + (relCutPX & ~1);
      if(nx>=vcDiff) nx-= (vcDiff & ~1);
-     const int ul= char(actVid2MemBufp->b[llinep + (((vcStartX & ~1) +  x)<<1) + 0]);
-     const int vl= char(actVid2MemBufp->b[llinep + (((vcStartX & ~1) +  x)<<1) + 2]);
-     const int un= char(actVid2MemBufp->b[ linep + (((vcStartX & ~1) + nx)<<1) + 0]);
-     const int vn= char(actVid2MemBufp->b[ linep + (((vcStartX & ~1) + nx)<<1) + 2]);
+     const int ul= char(llinep[ (((vcStartX & ~1) +  x)<<1) + 0]);
+     const int vl= char(llinep[ (((vcStartX & ~1) +  x)<<1) + 2]);
+     const int un= char( linep[ (((vcStartX & ~1) + nx)<<1) + 0]);
+     const int vn= char( linep[ (((vcStartX & ~1) + nx)<<1) + 2]);
      const int p= x>>(PHASE_CMP_STEP-1);
      convVec[p  ]= ul*un + vl*vn;
      convVec[p+1]= ul*vn - vl*un;
      chromAmp+= mabs(un) + mabs(vn);
    }
+   if(line<4) chromAmp=1;
    
-//   actVid2MemBufp->b[linep + ((vcStartX +  0)<<1) + 1]=255;
-//   actVid2MemBufp->b[linep + ((vcStartX +  relCutPX)<<1) + 1]=200;
+//   linep[ ((vcStartX +  0)<<1) + 1]=255;
+//   linep[ ((vcStartX +  relCutPX)<<1) + 1]=200;
 
    if(iState){
      T2=uclock();
-     tst1+=T2-T1;
+     TPhaseDiff1+=T2-T1-uclockWaste;
      T1=T2;
    }
 
@@ -504,32 +518,46 @@ void vc_decrypt(void){
        const byte a= (line & 1) ^ ( vScore>0 ) ? -ang: ang;
 
        const byte phase1= a + phasePerCutP[ lastCutP ];
-   
-       for(int newCutP= newCutPStart; newCutP <= newCutPEnd; newCutP++){
-         int val= lastVal[lastCutP] - driftPenalty[drift];
-         const byte phaseErr= phase1 + phasePerCutP[ newCutP ];
-
-         if(chromAmp>AMP_THRESHOLD && line>4)
-           val-=  phaseLut[ mabs( char(phaseErr - 128) ) ]
-                + phaseLut[ mabs( char(phaseErr - lastPhaseErr[lastCutP]) ) ];// useless without phase errors from decomb-filter
-         else actVid2MemBufp->b[linep + ((vcStartX -  1)<<1) + 1]=255;
+          // FIX (ASM_OPTIMIZE)
+       if(chromAmp>AMP_THRESHOLD){
+         for(int newCutP= newCutPStart; newCutP <= newCutPEnd; newCutP++){
+           const byte phaseErr= phase1 + phasePerCutP[ newCutP ];
+           const int val=  lastVal[lastCutP] - driftPenalty[drift]
+                         - phaseLut[ phaseErr ]
+                         - phaseLut[ (phaseErr - lastPhaseErr[lastCutP] + 128) & 0xFF];// useless without phase errors from decomb-filter
      
-         if(newVal[newCutP] < val){
-           newVal[newCutP]= val;
-           dir[line*257 + newCutP]= lastCutP;
-           newPhaseErr[newCutP]= phaseErr;
+           if(newVal[newCutP] < val){
+             newVal[newCutP]= val;
+             dir[(line<<8) + newCutP]= lastCutP;
+             newPhaseErr[newCutP]= phaseErr;
         
-           noLeft=false;
+             noLeft=false;
+           }
+           drift++;
+         }
+       }
+       else{
+         linep[ ((vcStartX -  1)<<1) + 1]=255;
+         for(int newCutP= newCutPStart; newCutP <= newCutPEnd; newCutP++){
+           const byte phaseErr= phase1 + phasePerCutP[ newCutP ];
+           const int val=  lastVal[lastCutP] - driftPenalty[drift];
+     
+           if(newVal[newCutP] < val){
+             newVal[newCutP]= val;
+             dir[(line<<8) + newCutP]= lastCutP;
+             newPhaseErr[newCutP]= phaseErr;
+        
+             noLeft=false;
+           }
+           drift++;
          }
-         drift++;
        }
-
      }
    }
 
    if(iState){
      T2=uclock();
-     tst2+=T2-T1;
+     TPhaseDiff2_Drift+=T2-T1-uclockWaste;
      T1=T2;
    }
 
@@ -537,7 +565,7 @@ void vc_decrypt(void){
      for(int newCutP = 0; newCutP < 256; newCutP++){
        if(!noLeft && newVal[newCutP]==0) continue;
 
-       const byte * const p= &actVid2MemBufp->b[ linep + ((cutP2X[ newCutP ] + vcStartX)<<1) + 1 - 4 ];
+       const byte * const p= &linep[ ((cutP2X[ newCutP ] + vcStartX)<<1) + 1 - 4 ];
 
        const int diff=edgeLut[ mabs( + int(* p   ) + int(*(p+2))
                                      - int(*(p+4)) - int(*(p+6)) ) ];
@@ -547,7 +575,7 @@ void vc_decrypt(void){
 
        if(newVal[256] < newVal[newCutP]){
          newVal[256]= newVal[newCutP];
-         dir[line*257 + 256]= max(1, newCutP);        // FIX (QUICKHACK)
+         bestDir[line]= newCutP;
        }
 
      }
@@ -555,7 +583,7 @@ void vc_decrypt(void){
    
    if(iState){
      T2=uclock();
-     tst3+=T2-T1;
+     TEdgeDetect+=T2-T1-uclockWaste;
      T1=T2;
    }
 
@@ -566,13 +594,6 @@ void vc_decrypt(void){
    byte *tempErr=lastPhaseErr;
    lastPhaseErr=newPhaseErr;
    newPhaseErr=tempErr;
-
-   if(iState){
-     T2=uclock();
-     alg2+=T2-T1;
-     T1=T2;
-   }
-
  }
 
  delete [] convVec;
@@ -586,11 +607,11 @@ void vc_decrypt(void){
  if(showPoints){
    int bestCutP=-1;
    for(int line=wndy-1; line>0; line--){
-     if(dir[line*257 + 256]==0){
+     if(bestDir[line]==-1){
        bestCutP=-1;
        continue;
      }
-     if(bestCutP==-1) bestCutP= dir[line*257 + 256];
+     if(bestCutP==-1) bestCutP= bestDir[line];
      if(bestCutP<0 || bestCutP>=256){
        printf("one%d\n", bestCutP);
        bestCutP=-1;
@@ -602,23 +623,16 @@ void vc_decrypt(void){
      actVid2MemBufp->b[(line * vgax<<1) + ((vcStartX + bestX)<<1)+3]=0;
      actVid2MemBufp->b[(line * vgax<<1) + ((vcStartX + bestX)<<1)+5]=255;
 
-     bestCutP= dir[line*257 + bestCutP];
+     bestCutP= dir[(line<<8) + bestCutP];
    }
  }
 
- 
- if(iState){
-   T2=uclock();
-   sprintf(textbuf[3],"%f Misc", (float)(T2-T1)/UCLOCKS_PER_SEC);
-   T1=T2;
- }
-
  int cutpp[1000];
  byte copy_buff[max_x<<1];
 
  int bestCutP=-1;
- linep=(wndy-1) * (vgax<<1);
- llinep=wndy    * (vgax<<1);
+ linep= &actVid2MemBufp->b[ (wndy-1) * (vgax<<1) ];
+ llinep=&actVid2MemBufp->b[  wndy    * (vgax<<1) ];
  for(int line=wndy-2; line>0; line--){
    linep-= vgax<<1;
    llinep-= vgax<<1;
@@ -626,19 +640,19 @@ void vc_decrypt(void){
    cutpp[line]=0;
 
    if(black[line]){
-     actVid2MemBufp->b[linep + ((vcStartX - 4)<<1) + 1]=255;
-     actVid2MemBufp->b[linep + ((vcStartX - 4)<<1) + 3]=255;
+     linep[ ((vcStartX - 4)<<1) + 1 ]=255;
+     linep[ ((vcStartX - 4)<<1) + 3 ]=255;
    }
 
    if(showPoints){
-     actVid2MemBufp->b[linep+(vcStartX<<1)+1]=255;
-     actVid2MemBufp->b[linep+(vcStartX<<1)+3]=0;
+     linep[ (vcStartX<<1)+1 ]=255;
+     linep[ (vcStartX<<1)+3 ]=0;
    }
-   if(dir[line*257 + 256]==0){
+   if(bestDir[line]==-1){
      bestCutP=-1;
      continue;
    }
-   if(bestCutP==-1) bestCutP=dir[line*257 + 256];
+   if(bestCutP==-1) bestCutP=dir[(line<<8) + 256];
 
    if(bestCutP<0 || bestCutP>=256){
      printf("two%d\n", bestCutP);
@@ -658,7 +672,7 @@ void vc_decrypt(void){
    const char c2= cos2PerCutP[ cutP ];
 
    if( (line & 1) ^ ( vScore>0 ) ){
-     char *p= (char*)( &actVid2MemBufp->b[ linep+((vcStartX & ~1)<<1) ] );
+     char *p= (char*)( &linep[ ((vcStartX & ~1)<<1) ] );
      int x=0;
      for(; x<cutX; x+=4){
        const char u= * p;
@@ -708,33 +722,48 @@ void vc_decrypt(void){
 
 
  
-//   actVid2MemBufp->b[linep+((vcStartX)<<1)+1]= 255;
+//   linep[ ((vcStartX)<<1)+1 ]= 255;
    
-   memcpy(copy_buff, actVid2MemBufp->b + linep + (vcStartX<<1), vcDiff<<1);
+   memcpy(copy_buff, linep + (vcStartX<<1), vcDiff<<1);
 
-   memcpy(actVid2MemBufp->b + linep + (vcStartX<<1), copy_buff + (cutX<<1),
-                                                 (vcDiff-cutX)<<1);
-   memcpy(actVid2MemBufp->b + linep + (vcStartX<<1) + ((vcDiff-cutX)<<1),
-                                                                   copy_buff,
-                                                         cutX<<1);
+   memcpy(linep + (vcStartX<<1), copy_buff + (cutX<<1), (vcDiff-cutX)<<1);
+   memcpy(linep + (vcStartX<<1) + ((vcDiff-cutX)<<1), copy_buff, cutX<<1);
 
-   if( (line & 1) ^ ( vScore>0 ) ^ 1 ){
-     char *pl= (char*)( &actVid2MemBufp->b[llinep+ ((vcStartX<<1) & ~3) ] );
-     char *pn= (char*)( &actVid2MemBufp->b[ linep+ ((vcStartX<<1) & ~3) ] );
-     for(int x=0; x<vcDiff; x+=4){
+   if( !( (line & 1) ^ ( vScore>0 ) ) ){
+     char *pl= (char*)( &llinep[ ((vcStartX<<1) & ~3) ] );
+     char *pn= (char*)( &linep[  ((vcStartX<<1) & ~3) ] );
+/*     for(int x=0; x<vcDiff; x+=4){
        * pn   = *(pn+4)= * pl;
        *(pn+2)= *(pn+6)= *(pl+2);
        pn+=8;
        pl+=8;
-     }
+     }*/
+     asm("1:                           \n\t"
+         "movb  (%%esi, %%ecx), %%al   \n\t"
+         "movb 2(%%esi, %%ecx), %%bl   \n\t"
+         "movb %%al,  (%%edi, %%ecx)   \n\t"
+         "movb %%bl, 2(%%edi, %%ecx)   \n\t"
+//         "movb 4(%%esi, %%ecx), %%al   \n\t"
+//         "movb 6(%%esi, %%ecx), %%bl   \n\t"
+         "movb %%al, 4(%%edi, %%ecx)   \n\t"
+         "movb %%bl, 6(%%edi, %%ecx)   \n\t"
+//         "movb 8(%%edi, %%ecx), %%al   \n\t"
+         "addl $8, %%ecx               \n\t"
+         "  jnc 1b                     \n\t"
+          :
+          : "D" (int(pn) + (vcDiff<<1)),
+            "S" (int(pl) + (vcDiff<<1)) ,
+            "c" (-vcDiff<<1) 
+          : "%eax", "%ebx", "%ecx");
    }
 
    cutpp[line]=cutX;
 
 
-   bestCutP= dir[line*257 + bestCutP];
+   bestCutP= dir[(line<<8) + bestCutP];
  }
 
+/*
  float avgCutP=0;
  for(int line=50; line<200; line++){
    avgCutP+=float(cutpp[line]);
@@ -742,36 +771,36 @@ void vc_decrypt(void){
 
  avgCutP/=(200.0-50.0);
  printf("avg%f\n", avgCutP-float(vcDiff)/2.0);
-  
+  */
   
  printf("vScore %d\n", vScore);
 
 
  if(iState){
    T2=uclock();
-   sprintf(textbuf[0],"%f Corr", (float)(alg1)/UCLOCKS_PER_SEC);
-   sprintf(textbuf[1],"%f Edge-Search", (float)(alg2)/UCLOCKS_PER_SEC);
-   sprintf(textbuf[2],"%f ReSample", (float)(alg3)/UCLOCKS_PER_SEC);
-   sprintf(textbuf[4],"%f Rotate", (float)(T2-T1)/UCLOCKS_PER_SEC);
-   sprintf(textbuf[5],"%f tst1", (float)(tst1)/UCLOCKS_PER_SEC);
-   sprintf(textbuf[6],"%f tst2", (float)(tst2)/UCLOCKS_PER_SEC);
-   sprintf(textbuf[7],"%f tst3", (float)(tst3)/UCLOCKS_PER_SEC);
+   sprintf(textbuf[0],"%f ReSample", (float)(TReSmp)/UCLOCKS_PER_SEC);
+   sprintf(textbuf[1],"%f Correlate", (float)(TCorr)/UCLOCKS_PER_SEC);
+   sprintf(textbuf[2],"%f UVDetect", (float)(TUVDetect)/UCLOCKS_PER_SEC);
+   sprintf(textbuf[3],"%f FindPhaseDiff1", (float)(TPhaseDiff1)/UCLOCKS_PER_SEC);
+   sprintf(textbuf[4],"%f FindPhaseDiff2 / Drift", (float)(TPhaseDiff2_Drift)/UCLOCKS_PER_SEC);
+   sprintf(textbuf[5],"%f EdgeDetect", (float)(TEdgeDetect)/UCLOCKS_PER_SEC);
+   sprintf(textbuf[6],"%f Rotate / ColorFix", (float)(T2-T1-uclockWaste)/UCLOCKS_PER_SEC);
    c.init(255, 0, 0, yuvmode);
-   gprint(80,  80,  c.col, textbuf[0]);
-   gprint(90,  90,  c.col, textbuf[1]);
-   gprint(100, 100, c.col, textbuf[2]);
-   gprint(110, 110, c.col, textbuf[3]);
-   gprint(120, 120, c.col, textbuf[4]);
-   gprint(130, 130, c.col, textbuf[5]);
-   gprint(140, 140, c.col, textbuf[6]);
-   gprint(150, 150, c.col, textbuf[7]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[0]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[1]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[2]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[3]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[4]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[5]);
+   gprint(infoPosX, infoPosY+=10, c.col, textbuf[6]);
+   infoPosY+=5;
    T1=T2;                           
  }
 
 
+  delete [] bestDir;
   delete [] dir;
   delete [] black;
-//  delete [] phaseDiff;
 }
 
 



More information about the Mndiff-dev mailing list