[MPlayer-dev-eng] [PATCH] DC10+ TVOut e.a. major update

Thu Jan 24 11:29:56 CET 2002

Hello,

This patch removes the libjpeg dependancies and it includes a custom
(mostly copypasted) jpeg encoder. The encoders uses the DCT and the
bitwrite operations from libavcodec. So vo_zr now requires
libavcodec. Compilation is tested with and without mmx. (note: this patch
doesn't depend on the patch that I sent to ffmpeg-devel, it includes a
custom version of the broken function for the time being)

Enhancements:
-better speed and accuracy because of the MMX dct from libavcodec (on
non-MMX systems jpeg_fdct_ifast is used)
-horizontal decimation support
-playback in black and white (optional for speed increase (factor of 2))
-support added for YUY2 colorspace 
-smarter 'scaling decision', no need for -zrfi anymore

Please apply.

Greetings,

Rik.

--------
Nothing is ever a total loss; it can always serve as a bad example.
-------------- next part --------------
diff -Naur main/Makefile main.dev/Makefile

--- main/Makefile	Wed Jan 23 23:24:56 2002
+++ main.dev/Makefile	Wed Jan 23 23:26:21 2002
@@ -41,7 +41,7 @@
 VO_LIBS = -Llibvo -lvo
 VO_INC = -Ilibvo
 endif
-V_LIBS = $(X_LIB) $(MP1E_LIB) $(GGI_LIB) $(MLIB_LIB) $(JPEG_LIB) $(PNG_LIB) $(SDL_LIB) $(SVGA_LIB) $(AA_LIB) $(DIRECTFB_LIB)
+V_LIBS = $(X_LIB) $(MP1E_LIB) $(GGI_LIB) $(MLIB_LIB) $(PNG_LIB) $(SDL_LIB) $(SVGA_LIB) $(AA_LIB) $(DIRECTFB_LIB)
 
 AO_LIBS = -Llibao2 -lao2
 A_LIBS = $(ALSA_LIB) $(NAS_LIB) $(MAD_LIB) $(VORBIS_LIB) $(SGIAUDIO_LIB)
diff -Naur main/configure main.dev/configure
--- main/configure	Fri Jan 18 18:35:30 2002
+++ main.dev/configure	Wed Jan 23 23:26:21 2002
@@ -705,7 +705,6 @@
 _xv=auto
 _sdl=auto
 _nas=auto
-_jpeg=auto
 _png=auto
 _gl=auto
 _ggi=auto
@@ -778,8 +777,6 @@
   --disable-sdl)	_sdl=no		;;
   --enable-nas)		_nas=yes	;;
   --disable-nas)	_nas=no		;;
-  --enable-jpeg)	_jpeg=yes	;;
-  --disable-jpeg)	_jpeg=no	;;
   --enable-png)		_png=yes	;;
   --disable-png)	_png=no		;;
   --enable-gl)		_gl=yes		;;
@@ -1863,35 +1860,6 @@
 fi
 echores "$_dvb"
 
-echocheck "JPEG support"
-if test "$_jpeg" = auto; then
-  _jpeg=no
-cat > $TMPC << EOF
-#include <stdio.h>
-#include <jpeglib.h>
-int main(void) { return 0; }
-EOF
-  cc_check -ljpeg  && _jpeg=yes
-fi
-echores "$_jpeg"
-
-echocheck "zr"
-if test "$_zr" = yes ; then
-  if test "$_jpeg" = yes ; then
-    _ld_jpeg='-ljpeg'
-    _def_zr='#define HAVE_ZR 1'
-    _vosrc="$_vosrc vo_zr.c"
-    _vomodules="zr $_vomodules"
-    echores "$_zr"
-  else
-    echores "jpeglib is required by zr, sorry"
-    _def_zr='#undef HAVE_ZR'
-  fi
-else
-  _def_zr='#undef HAVE_ZR'
-  echores "$_zr"
-fi
-
 echocheck "PNG support"
 if test "$_png" = auto ; then
   _png=no
@@ -2530,6 +2498,21 @@
   _codecmodules="libavcodec.so $_codecmodules"
 fi
 
+echocheck "zr"
+if test "$_zr" = yes ; then
+  if test "$_libavcodec" = yes ; then
+    _def_zr='#define HAVE_ZR 1'
+    _vosrc="$_vosrc vo_zr.c jpeg_enc.c"
+    _vomodules="zr $_vomodules"
+    echores "$_zr"
+  else
+    echores "libavcodec (static) is required by zr, sorry"
+    _def_zr='#undef HAVE_ZR'
+  fi
+else
+  _def_zr='#undef HAVE_ZR'
+  echores "$_zr"
+fi
 
 # FIXME : variables don't have a "standard" name so check this one day
 if test "$_divx4linux" = auto ; then
@@ -2866,7 +2849,7 @@
 CC = $_cc
 AWK = $_awk
 # OPTFLAGS = -O4 $_profile $_debug $_march $_mcpu -pipe -fomit-frame-pointer -ffast-math
-OPTFLAGS = -Djpeg_fdct_ifast=jpeg_fdct_ifast2 $CFLAGS
+OPTFLAGS = $CFLAGS
 EXTRA_INC = $_inc_extra $_inc_gtk
 WIN32_PATH = -DWIN32_PATH=\\"$_win32libdir\\"
 
@@ -2889,7 +2872,6 @@
 GGI_LIB = $_ld_ggi
 MLIB_LIB =  $_ld_mlib
 MLIB_INC = $_inc_mlib
-JPEG_LIB = $_ld_jpeg
 PNG_LIB = $_ld_png
 SDL_LIB = $_ld_sdl
 SVGA_LIB = $_ld_svga
diff -Naur main/libvo/jpeg_enc.c main.dev/libvo/jpeg_enc.c
--- main/libvo/jpeg_enc.c	Thu Jan  1 01:00:00 1970
+++ main.dev/libvo/jpeg_enc.c	Thu Jan 24 10:53:41 2002
@@ -0,0 +1,899 @@
+/* Straightforward (to be) optimized JPEG encoder for the YUV422 format 
+ * based on mjpeg code from ffmpeg. 
+ *
+ * Copyright (c) 2002, Rik Snel
+ * Parts from ffmpeg Copyright (c) 2000, 2001 Gerard Lantau
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * For an excellent introduction to the JPEG format, see:
+ * http://www.ece.purdue.edu/~bourman/grad-labs/lab8/pdf/lab.pdf
+ */
+
+
+/* stuff from libavcodec/common.h */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "config.h"
+#ifdef USE_FASTMEMCPY
+#include "fastmemcpy.h"
+#endif
+#include "../mp_msg.h"
+#include "../libavcodec/common.h"
+#include "../libavcodec/dsputil.h"
+
+
+int height, width, fields, cheap_upsample, qscale, bw = 0, first = 1;
+
+/* from dsputils.c */
+
+DCTELEM **blck;
+
+extern void (*av_fdct)(DCTELEM *b);
+
+UINT8 zr_zigzag_direct[64] = {
+    0, 1, 8, 16, 9, 2, 3, 10,
+    17, 24, 32, 25, 18, 11, 4, 5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13, 6, 7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+/* bit output */
+
+PutBitContext pb;
+
+/* from mpegvideo.c */
+
+#define QMAT_SHIFT 25
+#define QMAT_SHIFT_MMX 19
+
+static const unsigned short aanscales[64] = {
+    /* precomputed values scaled up by 14 bits */
+    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+    8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+    4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+};
+
+
+static unsigned int simple_mmx_permutation[64]={
+	0x00, 0x08, 0x01, 0x09, 0x04, 0x0C, 0x05, 0x0D,
+	0x10, 0x18, 0x11, 0x19, 0x14, 0x1C, 0x15, 0x1D,
+	0x02, 0x0A, 0x03, 0x0B, 0x06, 0x0E, 0x07, 0x0F,
+	0x12, 0x1A, 0x13, 0x1B, 0x16, 0x1E, 0x17, 0x1F,
+	0x20, 0x28, 0x21, 0x29, 0x24, 0x2C, 0x25, 0x2D,
+	0x30, 0x38, 0x31, 0x39, 0x34, 0x3C, 0x35, 0x3D,
+	0x22, 0x2A, 0x23, 0x2B, 0x26, 0x2E, 0x27, 0x2F,
+	0x32, 0x3A, 0x33, 0x3B, 0x36, 0x3E, 0x37, 0x3F,
+};
+
+#if 0
+void block_permute(short int *block)
+{
+    int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+    int i;
+
+    for(i=0;i<8;i++) {
+        tmp1 = block[1];
+        tmp2 = block[2];
+        tmp3 = block[3];
+        tmp4 = block[4];
+        tmp5 = block[5];
+        tmp6 = block[6];
+        block[1] = tmp2;
+        block[2] = tmp4;
+        block[3] = tmp6;
+        block[4] = tmp1;
+        block[5] = tmp3;
+        block[6] = tmp5;
+        block += 8;
+    }
+}
+#endif
+
+int q_intra_matrix[64];
+
+static int dct_quantize(DCTELEM *block, int n,
+                        int qscale)
+{
+    int i, j, level, last_non_zero, q;
+    const int *qmat;
+
+    av_fdct (block);
+
+    /* we need this permutation so that we correct the IDCT
+       permutation. will be moved into DCT code */
+    //block_permute(block);
+
+    /*if (n < 4)
+        q = s->y_dc_scale;
+    else
+        q = s->c_dc_scale;
+    q = q << 3;*/
+    q = 64;   
+    /* note: block[0] is assumed to be positive */
+    block[0] = (block[0] + (q >> 1)) / q;
+    i = 1;
+    last_non_zero = 0;
+
+    qmat = q_intra_matrix;
+    for(;i<64;i++) {
+        j = zr_zigzag_direct[i];
+        level = block[j];
+        level = level * qmat[j];
+        /* XXX: slight error for the low range. Test should be equivalent to
+           (level <= -(1 << (QMAT_SHIFT - 3)) || level >= (1 <<
+           (QMAT_SHIFT - 3)))
+        */
+        if (((level << (31 - (QMAT_SHIFT - 3))) >> (31 - (QMAT_SHIFT - 3))) != 
+            level) {
+            level = level / (1 << (QMAT_SHIFT - 3));
+            /* XXX: currently, this code is not optimal. the range should be:
+               mpeg1: -255..255
+               mpeg2: -2048..2047
+               h263:  -128..127
+               mpeg4: -2048..2047
+            */
+            if (level > 255)
+                level = 255;
+            else if (level < -255)
+                level = -255;
+            block[j] = level;
+            last_non_zero = i;
+        } else {
+            block[j] = 0;
+        }
+	
+    }
+    return last_non_zero;
+}
+
+static int dct_quantize_mmx(DCTELEM *block, int n, int qscale)
+{
+    int i, j, level, last_non_zero, q;
+    const int *qmat;
+    DCTELEM *b = block;
+
+    /*for (i = 0; i < 8; i++) {
+	    printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2],
+			    b[8*i+3], b[8*i+4], b[8*i+5], b[8*i+6], b[8*i+7]);
+    }*/
+    av_fdct (block);
+    /*for (i = 0; i < 8; i++) {
+	    printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2],
+			    b[8*i+3], b[8*i+4], b[8*i+5], b[8*i+6], b[8*i+7]);
+    }*/
+
+
+    /* we need this permutation so that we correct the IDCT
+       permutation. will be moved into DCT code */
+    //block_permute(block);
+
+    //if (n < 2)
+        q = 8;
+    /*else
+        q = 8;*/
+    
+    /* note: block[0] is assumed to be positive */
+    block[0] = (block[0] + (q >> 1)) / q;
+    i = 1;
+    last_non_zero = 0;
+    qmat = q_intra_matrix;
+
+    for(;i<64;i++) {
+        j = zr_zigzag_direct[i];
+        level = block[j];
+        level = level * qmat[j];
+        /* XXX: slight error for the low range. Test should be equivalent to
+           (level <= -(1 << (QMAT_SHIFT_MMX - 3)) || level >= (1 <<
+           (QMAT_SHIFT_MMX - 3)))
+        */
+        if (((level << (31 - (QMAT_SHIFT_MMX - 3))) >> (31 - (QMAT_SHIFT_MMX - 3))) != 
+            level) {
+            level = level / (1 << (QMAT_SHIFT_MMX - 3));
+            /* XXX: currently, this code is not optimal. the range should be:
+               mpeg1: -255..255
+               mpeg2: -2048..2047
+               h263:  -128..127
+               mpeg4: -2048..2047
+	    *  jpeg: -1024..1023   11 bit */
+            if (level > 1023)
+                level = 1023;
+            else if (level < -1024)
+                level = -1024;
+            block[j] = level;
+            last_non_zero = i;
+        } else {
+            block[j] = 0;
+        }
+    }
+    /*for (i = 0; i < 8; i++) {
+	    printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2],
+			    b[8*i+3], b[8*i+4], b[8*i+5], b[8*i+6], b[8*i+7]);
+    }*/
+
+    return last_non_zero;
+}
+
+static void convert_matrix(int *qmat, const unsigned short *quant_matrix, 
+		int qscale)
+{
+    int i;
+
+    if (av_fdct == jpeg_fdct_ifast) {
+        for(i=0;i<64;i++) {
+            /* 16 <= qscale * quant_matrix[i] <= 7905 */
+            /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
+            
+            qmat[i] = (int)(((unsigned long long)1 << (QMAT_SHIFT + 11)) / 
+                            (aanscales[i] * qscale * quant_matrix[i]));
+        }
+    } else {
+        for(i=0;i<64;i++) {
+            /* We can safely suppose that 16 <= quant_matrix[i] <= 255
+               So 16 <= qscale * quant_matrix[i] <= 7905
+               so (1 << QMAT_SHIFT) / 16 >= qmat[i] >= (1 << QMAT_SHIFT) / 7905
+            */
+            qmat[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
+        }
+    }
+}
+
+#define SOF0	0xC0
+#define SOI	0xD8
+#define	EOI	0xD9
+#define DQT	0xDB
+#define DHT	0xC4
+#define SOS	0xDA
+
+/* this is almost the quantisation table, used for luminance and chrominance */
+/*short int zr_default_intra_matrix[64] = {
+    16,  11,  10,  16,  24,  40,  51,  61,
+    12,  12,  14,  19,  26,  58,  60,  55,
+    14,  13,  16,  24,  40,  57,  69,  56,
+    14,  17,  22,  29,  51,  87,  80,  62,
+    18,  22,  37,  56,  68, 109, 103,  77,
+    24,  35,  55,  64,  81, 104, 113,  92,
+    49,  64,  78,  87, 103, 121, 120, 101,
+    72,  92,  95,  98, 112, 100, 103,  99
+};*/
+/*
+short int default_intra_matrix[64] = {
+	8, 16, 19, 22, 26, 27, 29, 34,
+	16, 16, 22, 24, 27, 29, 34, 37,
+	19, 22, 26, 27, 29, 34, 34, 38,
+	22, 22, 26, 27, 29, 34, 37, 40,
+	22, 26, 27, 29, 32, 35, 40, 48,
+	26, 27, 29, 32, 35, 40, 48, 58,
+	26, 27, 29, 34, 38, 46, 56, 69,
+	27, 29, 35, 38, 46, 56, 69, 83
+};
+*/
+extern short int default_intra_matrix[64];
+
+short int intra_matrix[64];
+
+/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
+/* IMPORTANT: these are only valid for 8-bit data precision! */
+static const unsigned char bits_dc_luminance[17] =
+{ /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
+static const unsigned char val_dc_luminance[] =
+{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+
+#if 0
+static const unsigned char bits_dc_chrominance[17] =
+{ /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
+static const unsigned char val_dc_chrominance[] =
+{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+#endif
+
+static const unsigned char bits_ac_luminance[17] =
+{ /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
+static const unsigned char val_ac_luminance[] =
+{ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+  0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+  0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+  0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+  0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+  0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+  0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+  0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+  0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+  0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+  0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+  0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+  0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+  0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+  0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+  0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+  0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+  0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+  0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+  0xf9, 0xfa 
+};
+
+#if 0
+static const unsigned char bits_ac_chrominance[17] =
+{ /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
+
+static const unsigned char val_ac_chrominance[] =
+{ 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+  0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+  0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+  0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+  0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+  0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+  0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+  0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+  0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+  0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+  0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+  0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+  0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+  0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+  0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+  0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+  0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+  0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+  0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+  0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+  0xf9, 0xfa 
+};
+#endif
+
+unsigned char huff_size_dc_luminance[12];
+unsigned short huff_code_dc_luminance[12];
+#if 0
+unsigned char huff_size_dc_chrominance[12];
+unsigned short huff_code_dc_chrominance[12];
+#endif
+
+unsigned char huff_size_ac_luminance[256];
+unsigned short huff_code_ac_luminance[256];
+#if 0
+unsigned char huff_size_ac_chrominance[256];
+unsigned short huff_code_ac_chrominance[256];
+#endif 
+
+int last_dc[3];
+int block_last_index[4];
+
+/* isn't this function nicer than the one in the libjpeg ? */
+static void build_huffman_codes(unsigned char *huff_size, 
+		unsigned short *huff_code, const unsigned char *bits_table, 
+		const unsigned char *val_table)
+{
+    int i, j, k,nb, code, sym;
+
+    code = 0;
+    k = 0;
+    for(i=1;i<=16;i++) {
+        nb = bits_table[i];
+        for(j=0;j<nb;j++) {
+            sym = val_table[k++];
+            huff_size[sym] = i;
+            huff_code[sym] = code;
+            code++;
+        }
+        code <<= 1;
+    }
+}
+
+int zr_mjpeg_init()
+{
+    /* build all the huffman tables */
+    build_huffman_codes(huff_size_dc_luminance, huff_code_dc_luminance,
+                        bits_dc_luminance, val_dc_luminance);
+    //build_huffman_codes(huff_size_dc_chrominance, huff_code_dc_chrominance,
+    //                    bits_dc_chrominance, val_dc_chrominance);
+    build_huffman_codes(huff_size_ac_luminance, huff_code_ac_luminance,
+                        bits_ac_luminance, val_ac_luminance);
+    //build_huffman_codes(huff_size_ac_chrominance, huff_code_ac_chrominance,
+    //                    bits_ac_chrominance, val_ac_chrominance);
+    
+    return 0;
+}
+
+void zr_mjpeg_close()
+{
+}
+
+static inline void put_marker(PutBitContext *p, int code)
+{
+    put_bits(p, 8, 0xff);
+    put_bits(p, 8, code);
+}
+
+/* table_class: 0 = DC coef, 1 = AC coefs */
+static int put_huffman_table(int table_class, int table_id,
+                             const unsigned char *bits_table, 
+			     const unsigned char *value_table)
+{
+    PutBitContext *p = &pb;
+    int n, i;
+
+    put_bits(p, 4, table_class);
+    put_bits(p, 4, table_id);
+
+    n = 0;
+    for(i=1;i<=16;i++) {
+        n += bits_table[i];
+        put_bits(p, 8, bits_table[i]);
+    }
+
+    for(i=0;i<n;i++)
+        put_bits(p, 8, value_table[i]);
+
+    return n + 17;
+}
+
+static void jpeg_qtable_header()
+{
+    PutBitContext *p = &pb;
+    int i, j, size;
+
+    /* quant matrixes */
+    put_marker(p, DQT);
+    put_bits(p, 16, 2 + 1 * (1 + 64));
+    put_bits(p, 4, 0); /* 8 bit precision */
+    put_bits(p, 4, 0); /* table 0 */
+    for(i=0;i<64;i++) {
+        j = zr_zigzag_direct[i];
+        put_bits(p, 8, intra_matrix[j]);
+    }
+}
+
+static void jpeg_htable_header() {
+    PutBitContext *p = &pb;
+    int i, j, size;
+    unsigned char *ptr;
+    /* huffman table */
+    put_marker(p, DHT);
+    flush_put_bits(p);
+    ptr = p->buf_ptr;
+    put_bits(p, 16, 0); /* patched later */
+    size = 2;
+    size += put_huffman_table(0, 0, bits_dc_luminance, val_dc_luminance);
+  //  size += put_huffman_table(0, 1, bits_dc_chrominance, val_dc_chrominance);
+    
+    ptr[0] = size >> 8;
+    ptr[1] = size;
+    put_marker(p, DHT);
+    flush_put_bits(p);
+    ptr = p->buf_ptr;
+    put_bits(p, 16, 0); /* patched later */
+    size = 2;
+    size += put_huffman_table(1, 0, bits_ac_luminance, val_ac_luminance);
+   // size += put_huffman_table(1, 1, bits_ac_chrominance, val_ac_chrominance);
+    ptr[0] = size >> 8;
+    ptr[1] = size;
+}
+
+void zr_mjpeg_picture_header()
+{
+    put_marker(&pb, SOI);
+
+    if (first) {
+    	jpeg_qtable_header();
+    	jpeg_htable_header();
+	first = 0;
+    }
+    put_marker(&pb, SOF0);
+
+    put_bits(&pb, 16, 17);
+    put_bits(&pb, 8, 8); /* 8 bits/component */
+    put_bits(&pb, 16, height);
+    put_bits(&pb, 16, width);
+    put_bits(&pb, 8, 3); /* 3 components */
+    
+    /* Y component */
+    put_bits(&pb, 8, 0); /* component number */
+    put_bits(&pb, 4, 2); /* H factor */
+    put_bits(&pb, 4, 1); /* V factor */
+    put_bits(&pb, 8, 0); /* select matrix */
+    
+    /* Cb component */
+    put_bits(&pb, 8, 1); /* component number */
+    put_bits(&pb, 4, 1); /* H factor */
+    put_bits(&pb, 4, 1); /* V factor */
+    put_bits(&pb, 8, 0); /* select matrix */
+
+    /* Cr component */
+    put_bits(&pb, 8, 2); /* component number */
+    put_bits(&pb, 4, 1); /* H factor */
+    put_bits(&pb, 4, 1); /* V factor */
+    put_bits(&pb, 8, 0); /* select matrix */
+
+
+    /* scan header */
+    put_marker(&pb, SOS);
+    put_bits(&pb, 16, 12); /* length */
+    put_bits(&pb, 8, 3); /* 3 components */
+    
+    /* Y component */
+    put_bits(&pb, 8, 0); /* index */
+    put_bits(&pb, 4, 0); /* DC huffman table index */
+    put_bits(&pb, 4, 0); /* AC huffman table index */
+    
+    /* Cb component */
+    put_bits(&pb, 8, 1); /* index */
+    put_bits(&pb, 4, 0); /* DC huffman table index */
+    put_bits(&pb, 4, 0); /* AC huffman table index */
+    
+    /* Cr component */
+    put_bits(&pb, 8, 2); /* index */
+    put_bits(&pb, 4, 0); /* DC huffman table index */
+    put_bits(&pb, 4, 0); /* AC huffman table index */
+
+    put_bits(&pb, 8, 0); /* Ss (not used) */
+    put_bits(&pb, 8, 63); /* Se (not used) */
+    put_bits(&pb, 8, 0); /* (not used) */
+}
+
+static void zr_flush_buffer(PutBitContext *s)
+{
+    int size;
+    if (s->write_data) {
+        size = s->buf_ptr - s->buf;
+        if (size > 0)
+            s->write_data(s->opaque, s->buf, size);
+        s->buf_ptr = s->buf;
+        s->data_out_size += size;
+    }
+}
+
+/* pad the end of the output stream with ones */
+void zr_jflush_put_bits(PutBitContext *s)
+{
+    unsigned int b;
+    s->bit_buf |= ~1U >> s->bit_cnt; /* set all the unused bits to one */
+
+    while (s->bit_cnt > 0) {
+        b = s->bit_buf >> 24;
+        *s->buf_ptr++ = b;
+        if (b == 0xff)
+            *s->buf_ptr++ = 0;
+        s->bit_buf<<=8;
+        s->bit_cnt-=8;
+    }
+    zr_flush_buffer(s);
+    s->bit_cnt=0;
+    s->bit_buf=0;
+}
+
+void zr_mjpeg_picture_trailer()
+{
+    zr_jflush_put_bits(&pb);
+    put_marker(&pb, EOI);
+}
+
+static inline void encode_dc(int val, unsigned char *huff_size, 
+		unsigned short *huff_code)
+{
+    int mant, nbits;
+
+    if (val == 0) {
+	 //   printf("dc val=0 ");
+        jput_bits(&pb, huff_size[0], huff_code[0]);
+	//printf("dc encoding %d %d\n", huff_size[0], huff_code[0]);
+    } else {
+        mant = val;
+        if (val < 0) {
+            val = -val;
+            mant--;
+        }
+        
+        /* compute the log (XXX: optimize) */
+        nbits = 0;
+        while (val != 0) {
+            val = val >> 1;
+            nbits++;
+        }
+	/*nbits = av_log2(val);*/
+            
+	//printf("dc ");
+        jput_bits(&pb, huff_size[nbits], huff_code[nbits]);
+	//printf("dc encoding %d %d\n", huff_size[nbits], huff_code[nbits]);
+        
+	//printf("dc ");
+        jput_bits(&pb, nbits, mant & ((1 << nbits) - 1));
+	//printf("dc encoding %d %d\n", huff_size[nbits], huff_code[nbits]);
+    }
+}
+
+static void encode_block(DCTELEM *b, int n)
+{
+    int mant, nbits, code, i, j;
+    int component, dc, run, last_index, val;
+    unsigned char *huff_size_ac;
+    unsigned short *huff_code_ac;
+    
+    /* DC coef */
+    component = (n <= 1 ? 0 : n - 2 + 1);
+    dc = b[0]; /* overflow is impossible */
+    /*for (i = 0; i < 8; i++) {
+	    printf("%i %i %i %i %i %i %i %i\n", b[8*i], b[8*i+1], b[8*i+2],
+			    b[8*i+3], b[8*i+4], b[8+i*5], b[8+i*6], b[8+i*7]);
+    }*/
+    val = dc - last_dc[component];
+    //if (n < 2) {
+        encode_dc(val, huff_size_dc_luminance, huff_code_dc_luminance);
+        huff_size_ac = huff_size_ac_luminance;
+        huff_code_ac = huff_code_ac_luminance;
+    //} else {
+    //    encode_dc(val, huff_size_dc_chrominance, huff_code_dc_chrominance);
+    //    huff_size_ac = huff_size_ac_chrominance;
+    //    huff_code_ac = huff_code_ac_chrominance;
+    //}
+    last_dc[component] = dc;
+    
+    /* AC coefs */
+    
+    run = 0;
+    last_index = block_last_index[n];
+    for(i=1;i<=last_index;i++) {
+        j = zr_zigzag_direct[i];
+        val = b[j];
+        if (val == 0) {
+            run++;
+        } else {
+            while (run >= 16) {
+		//printf("ac 16 white ");
+                jput_bits(&pb, huff_size_ac[0xf0], huff_code_ac[0xf0]);
+                run -= 16;
+            }
+            mant = val;
+            if (val < 0) {
+                val = -val;
+                mant--;
+            }
+            
+            /* compute the log (XXX: optimize) */
+            nbits = 0;
+            while (val != 0) {
+                val = val >> 1;
+                nbits++;
+            }
+            code = (run << 4) | nbits;
+
+	    //printf("ac ");
+            jput_bits(&pb, huff_size_ac[code], huff_code_ac[code]);
+        
+	    //printf("ac ");
+            jput_bits(&pb, nbits, mant & ((1 << nbits) - 1));
+            run = 0;
+        }
+    }
+
+    /* output EOB only if not already 64 values */
+    if (last_index < 63 || run != 0) {
+	//printf("ac EOB ");
+        jput_bits(&pb, huff_size_ac[0], huff_code_ac[0]);
+    }
+}
+
+void zr_mjpeg_encode_mb(DCTELEM **bla)
+{
+    encode_block(*(bla), 0);
+    encode_block(*(bla+1), 1);
+    if (bw) {
+    	jput_bits(&pb, 12, 512+128+8+2); /* 2 times code for 'no color'
+				      * 001010001010 */
+    } else {
+	    encode_block(*(bla+2), 2);
+	    encode_block(*(bla+3), 3);
+    }
+}
+
+int mb_width, mb_height, mb_x, mb_y;
+unsigned char *y_data, *u_data, *v_data;
+int y_ps, u_ps, v_ps, y_rs, u_rs, v_rs;
+char code[256*1024];
+/* this function can take all kinds of YUV colorspaces
+ * YV12, YVYU, UYVY. The necesary parameters must be set up by te caller
+ * y_ps means "y pixel size", y_rs means "y row size".
+ * For YUYV, for example, is u = y + 1, v = y + 3, y_ps = 2, u_ps = 4
+ * v_ps = 4, y_rs = u_rs = v_rs.
+ *  
+ * The data is straightened out at the moment it is put in DCT
+ * blocks, there are therefore no spurious memcopies involved */
+/* Notice that w must be a multiple of 16 and h must be a multiple of
+ * fields*8 */
+/* We produce YUV422 jpegs, the colors must be subsampled horizontally,
+ * if the colors are also subsampled vertically, then this function
+ * performs cheap upsampling (better solution will be: a DCT that is
+ * optimized in the case that every two rows are the same) */
+/* cu = 0 means 'No cheap upsampling'
+ * cu = 1 means 'perform cheap upsampling' */
+void mjpeg_encoder_init(int w, int h, 
+		unsigned char* y, int y_psize, int y_rsize, 
+		unsigned char* u, int u_psize, int u_rsize,
+		unsigned char* v, int v_psize, int v_rsize,
+		int f, int cu, int q, int b) {
+	int i;
+	mp_msg(MSGT_VO, MSGL_V, "JPEnc init: %dx%d %p %d %d %p %d %d %p %d %d\n",
+			w, h, y, y_psize, y_rsize, 
+			u, u_psize, u_rsize,
+			v, v_psize, v_rsize);
+	y_data = y; u_data = u; v_data = v;
+	y_ps = y_psize; u_ps = u_psize; v_ps = v_psize;
+	y_rs = y_rsize*f; 
+	u_rs = u_rsize*f; 
+	v_rs = v_rsize*f;
+	width = w;
+	height = h/f;
+	fields = f;
+	qscale = q;
+	cheap_upsample = cu;
+	mb_width = width/16;
+	mb_height = height/8;
+	bw = b;
+	zr_mjpeg_init();
+	i = 0;
+	intra_matrix[0] = default_intra_matrix[0];
+	for (i = 1; i < 64; i++) {
+		intra_matrix[i] = (default_intra_matrix[i]*qscale) >> 3;
+	}
+	if (
+#ifdef HAVE_MMX
+			av_fdct != fdct_mmx && 
+#endif
+			av_fdct != jpeg_fdct_ifast) {
+		/* libavcodec is probably not yet initialized */
+		av_fdct = jpeg_fdct_ifast;
+#ifdef HAVE_MMX
+		dsputil_init_mmx();
+#endif
+	}
+	convert_matrix(q_intra_matrix, intra_matrix, 8);
+	blck = malloc(4*sizeof(DCTELEM*));
+	blck[0] = malloc(64*sizeof(DCTELEM));
+	blck[1] = malloc(64*sizeof(DCTELEM));
+	blck[2] = malloc(64*sizeof(DCTELEM));
+	blck[3] = malloc(64*sizeof(DCTELEM));
+}	
+
+int mjpeg_encode_frame(char *bufr, int field) {
+	int i, j, k, l;
+	short int *dest;
+	unsigned char *source;
+	/* initialize the buffer */
+	if (field == 1) {
+		y_data += y_rs/2;
+		u_data += u_rs/2;
+		v_data += v_rs/2;
+	}
+	init_put_bits(&pb, bufr, 1024*256, NULL, NULL);
+
+	zr_mjpeg_picture_header();
+
+	last_dc[0] = 128; last_dc[1] = 128; last_dc[2] = 128;
+	mb_x = 0;
+	mb_y = 0;
+	for (mb_y = 0; mb_y < mb_height; mb_y++) {
+		for (mb_x = 0; mb_x < mb_width; mb_x++) {
+			//printf("Processing macroblock mb_x=%d, mb_y=%d, mb_width=%d, mb_height=%d, size=%d\n", mb_x, mb_y, mb_width, mb_height, pb.buf_ptr - pb.buf);
+			/* fill 2 Y macroblocks and one U and one V */
+			source = mb_y * 8 * y_rs + 16 * y_ps * mb_x + y_data;
+			dest = blck[0];
+			for (i = 0; i < 8; i++) {
+				for (j = 0; j < 8; j++) {
+					dest[j] = source[j*y_ps];
+				}
+				dest += 8;
+				source += y_rs;
+			}
+			source = mb_y * 8 * y_rs + (16*mb_x + 8)*y_ps + y_data;
+			dest = blck[1];
+			for (i = 0; i < 8; i++) {
+				for (j = 0; j < 8; j++) {
+					dest[j] = source[j*y_ps];
+				}
+				dest += 8;
+				source += y_rs;
+			}
+			if (!bw) {
+			if (cheap_upsample) {
+				source = mb_y*4*u_rs + 8*mb_x*u_ps + u_data;
+				dest = blck[2];
+				for (i = 0; i < 4; i++) {
+					for (j = 0; j < 8; j++) {
+						dest[j] = source[j*u_ps];
+						dest[j+8] = source[j*u_ps];
+					}
+					dest += 16;
+					source += u_rs;
+				}
+				source = mb_y*4*v_rs + 8*mb_x*v_ps + v_data;
+				dest = blck[3];
+				for (i = 0; i < 4; i++) {
+					for (j = 0; j < 8; j++) {
+						dest[j] = source[j*v_ps];
+						dest[j+8] = source[j*v_ps];
+					}
+					dest += 16;
+					source += u_rs;
+				}
+			} else {
+				source = mb_y*8*u_rs + 8*mb_x*u_ps + u_data;
+				dest = blck[2];
+				for (i = 0; i < 8; i++) {
+					for (j = 0; j < 8; j++) {
+						dest[j] = source[j*u_ps];
+					}
+					dest += 8;
+					source += u_rs;
+				}
+				source = mb_y*8*v_rs + 8*mb_x*v_ps + v_data;
+				dest = blck[3];
+				for (i = 0; i < 8; i++) {
+					for (j = 0; j < 8; j++) {
+						dest[j] = source[j*v_ps];
+					}
+					dest += 8;
+					source += u_rs;
+				}
+			}
+			}
+			/* so, **blck is filled now... */
+
+			for(i = 0; i < 2; i++) {
+				if (av_fdct == jpeg_fdct_ifast)
+					block_last_index[i] = 
+						dct_quantize(blck[i], 
+								i, qscale);
+				else
+					block_last_index[i] = 
+						dct_quantize_mmx(blck[i],
+								i, qscale);
+			}
+			if (!bw) {
+			for(i = 2; i < 4; i++) {
+				if (av_fdct == jpeg_fdct_ifast)
+					block_last_index[i] = 
+						dct_quantize(blck[i], 
+								i, qscale);
+				else
+					block_last_index[i] = 
+						dct_quantize_mmx(blck[i],
+								i, qscale);
+			}
+			}
+				zr_mjpeg_encode_mb(blck);
+		}
+	}
+	emms_c();
+	zr_mjpeg_picture_trailer();
+	flush_put_bits(&pb);	
+	zr_mjpeg_close();
+	if (field == 1) {
+		y_data -= y_rs/2;
+		u_data -= u_rs/2;
+		v_data -= v_rs/2;
+	}
+	return pb.buf_ptr - pb.buf;
+}
+
diff -Naur main/libvo/vo_zr.c main.dev/libvo/vo_zr.c
--- main/libvo/vo_zr.c	Sun Jan 20 13:34:11 2002
+++ main.dev/libvo/vo_zr.c	Thu Jan 24 10:13:24 2002
@@ -18,7 +18,6 @@
 #include "zoran.h"
 
 #include "config.h"
-#define ZR_USES_LIBJPEG
 
 #include "video_out.h"
 #include "video_out_internal.h"
@@ -40,13 +39,15 @@
 
 static int image_width;
 static int image_height;
-static int off_y, off_c, stride; /* for use by 'draw slice' */
+static int off_y, off_c, stride; /* for use by 'draw slice/frame' */
 static int framenum;
 static int fields = 1; /* currently no interlacing */
-static int forceinter = 0;
+static int zrfd = 0;
+static int bw = 0; /* if bw == 1, then display in black&white */
 static int vdec = 1;
+static int hdec = 1;
 static int size;
-static int quality = 70;
+static int quality = 1;
 
 typedef struct {
 	int width;
@@ -80,134 +81,11 @@
 char *device = NULL;
 
 
-#ifdef ZR_USES_LIBJPEG
-#include<jpeglib.h>
-int ccount;
-unsigned char *ccbuf;
-struct jpeg_compress_struct cinfo;
-struct jpeg_destination_mgr jdest;
-struct jpeg_error_mgr jerr;
-
-/* minimal destination handler to output to buffer */
-METHODDEF(void) init_destination(struct jpeg_compress_struct *cinfo) {
-//	printf("init_destination called %p %d\n", ccbuf, ccount);
-	cinfo->dest->next_output_byte = (JOCTET*)(ccbuf+ccount);
-	cinfo->dest->free_in_buffer = MJPEG_SIZE - ccount;
-}
-
-METHODDEF(boolean) empty_output_buffer(struct jpeg_compress_struct *cinfo) {
-//	printf("empty_output_buffer called\n");
-	mp_msg(MSGT_VO, MSGL_ERR, "empty_output_buffer called, may not happen because buffer must me large enough\n");
-	return(FALSE);
-}
-
-METHODDEF(void) term_destination(struct jpeg_compress_struct *cinfo) {
-//	printf("term_destination called %p %d\n", ccbuf, ccount);
-	ccount = MJPEG_SIZE - cinfo->dest->free_in_buffer;
-}
-/* end of minimal destination handler */
-
-JSAMPARRAY ***jsi;
-
-#else
-#include "../libavcodec/avcodec.h"
-AVCodec *codec;
-AVCodecContext codec_context;
-AVPicture picture;
-#endif 
-
-static int jpegdct = JDCT_IFAST; 
-
-int init_codec() {
-#ifdef ZR_USES_LIBJPEG
-	int i, j, k;
-	cinfo.err = jpeg_std_error(&jerr);
-	jpeg_create_compress(&cinfo);
-
-	cinfo.dest = &jdest;
-	cinfo.dest->init_destination = init_destination;
-	cinfo.dest->empty_output_buffer = empty_output_buffer;
-	cinfo.dest->term_destination = term_destination;
-
-	cinfo.input_components = 3;
-
-	jpeg_set_defaults(&cinfo);
-
-	cinfo.image_width = image_width;
-	cinfo.image_height = image_height/fields;
-	cinfo.input_gamma = 1.0;
-	cinfo.in_color_space = JCS_YCbCr;
-	cinfo.raw_data_in = TRUE;
-	cinfo.comp_info[0].h_samp_factor = 2;
-	cinfo.comp_info[0].v_samp_factor = 1;
-	cinfo.comp_info[1].h_samp_factor = 1;
-	cinfo.comp_info[1].v_samp_factor = 1;
-	cinfo.comp_info[2].h_samp_factor = 1;
-	cinfo.comp_info[2].v_samp_factor = 1;
-	cinfo.dct_method = jpegdct;
-	jpeg_set_quality(&cinfo, quality, FALSE);
-	jsi = malloc(sizeof(JSAMPARRAY**)*fields);
-
-	/* Just some clutter to give libjpeg the pointers,
-	 * and I don't want to recalculate everything everytime
-	 * it is needed */
-	for (k = 0; k < fields; k++) {
-	jsi[k] = malloc(sizeof(JSAMPARRAY*)*image_height/(8*fields));
-
-	for (i = 0; i < image_height/(8*fields); i++) {
-		jsi[k][i] = malloc(3*sizeof(JSAMPARRAY));
-		jsi[k][i][0] = malloc(8*sizeof(JSAMPROW));
-		jsi[k][i][1] = malloc(8*sizeof(JSAMPROW));
-		jsi[k][i][2] = malloc(8*sizeof(JSAMPROW));
-		for (j = 0; j < 8; j++) {
-			jsi[k][i][0][j] = (JSAMPROW)(image + 
-					(fields*(8*i + j) + k)*image_width);
-			jsi[k][i][1][j] = (JSAMPROW)(image + size + 
-					(fields*(8*i + j)/2)*image_width/2);
-			jsi[k][i][2][j] = (JSAMPROW)(image + 3*size/2 + 
-					(fields*(8*i + j)/2)*image_width/2);
-		}
-	}
-
-	}
-#else
-	AVCodecContext *c = &codec_context;
-	codec = avcodec_find_encoder(CODEC_ID_MJPEG);
-	if (!codec) {
-		/* maybe libavcodec was not initialized */
-		avcodec_init();
-		avcodec_register_all();
-		codec = avcodec_find_encoder(CODEC_ID_MJPEG);
-		if (!codec) {
-			mp_msg(MSGT_VO, MSGL_ERR, "MJPG codec not found in libavcodec\n");
-			return 1;
-		}
-	}
-	/* put default values */
-	memset(c, 0, sizeof(*c));
-
-	c->width = image_width;
-	c->height = image_height;
-	c->bit_rate = 4000000;
-	c->frame_rate = 25*FRAME_RATE_BASE;
-	//c->gop_size = 1;
-	c->pix_fmt = PIX_FMT_YUV422P;
-
-	if (avcodec_open(c, codec) < 0) {
-		mp_msg(MSGT_VO, MSGL_ERR, "MJPG codec could not be opened\n");
-		return 1;
-	}
-
-	picture.data[0] = image;
-	picture.data[1] = image + size;
-	picture.data[2] = image + 3*size/2;
-	picture.linesize[0] = image_width;
-	picture.linesize[1] = image_width/2;
-	picture.linesize[2] = image_width/2;
-#endif 
-	return 0;
-}
-
+extern int mjpeg_encode_frame(char *bufr, int field);
+extern void mjpeg_encoder_init(int w, int h, unsigned char *y,
+		int y_psize, int y_rsize, unsigned char *u,
+		int u_psize, int u_rsize, unsigned char *v,
+		int v_psize, int v_rsize, int f, int cu, int q, int b);
 
 int zoran_getcap() {
 	char* dev = device ? device : VO_ZR_DEFAULT_DEVICE;
@@ -252,10 +130,10 @@
 	return 0;
 }
 	
-int init_zoran() {
+int init_zoran(int zrhdec, int zrvdec) {
 	/* center the image, and stretch it as far as possible (try to keep
 	 * aspect) and check if it fits */
-	if (image_width > vc.maxwidth) {
+	if (image_width/hdec > vc.maxwidth) {
 		mp_msg(MSGT_VO, MSGL_ERR, "movie to be played is too wide, max width currenty %d\n", vc.maxwidth);
 		return 1;
 	}
@@ -266,23 +144,15 @@
 	}
 
 	zp.decimation = 0;
-	zp.HorDcm = (vc.maxwidth >= 2*(int)image_width) ? 2 : 1;
-	zp.VerDcm = 1;
-	if (zp.HorDcm == 2 && 4*image_width <= vc.maxwidth && 
-			4*image_height/fields <= vc.maxheight) {
-		zp.HorDcm = 4;
-		zp.VerDcm = 2;
-	}
-	if (((forceinter == 0 && vdec >= 2) || (forceinter == 1 && vdec == 4)) && 4*image_height/fields <= vc.maxheight) {
-		zp.VerDcm = 2;
-	}
+	zp.HorDcm = zrhdec; 
+	zp.VerDcm = zrvdec;
 	zp.TmpDcm = 1;
 	zp.field_per_buff = fields;
-	zp.img_x = (vc.maxwidth - zp.HorDcm*(int)image_width)/2;
+	zp.img_x = (vc.maxwidth - zp.HorDcm*(int)image_width/hdec)/2;
 	zp.img_y = (vc.maxheight - zp.VerDcm*(3-fields)*(int)image_height)/4;
-	zp.img_width = zp.HorDcm*image_width;
+	zp.img_width = zp.HorDcm*image_width/hdec;
 	zp.img_height = zp.VerDcm*image_height/fields;
-	mp_msg(MSGT_VO, MSGL_V, "zr: geometry (after 'scaling'): %dx%d+%d+%d fields=%d, w=%d, h=%d\n", zp.img_width, zp.img_height, zp.img_x, zp.img_y, fields, image_width, image_height);
+	mp_msg(MSGT_VO, MSGL_V, "zr: geometry (after 'scaling'): %dx%d+%d+%d fields=%d, w=%d, h=%d\n", zp.img_width, (3-fields)*zp.img_height, zp.img_x, zp.img_y, fields, image_width/hdec, image_height);
 
 	if (ioctl(vdes, BUZIOC_S_PARAMS, &zp) < 0) {
 		mp_msg(MSGT_VO, MSGL_ERR, "error setting display parameters\n");
@@ -327,11 +197,15 @@
 static uint32_t init(uint32_t width, uint32_t height, uint32_t d_width, 
 	uint32_t d_height, uint32_t fullscreen, char *title, uint32_t format)
 {
-	int j;
+	int j, stretchx, stretchy;
 	/* this allows to crop parts from incoming picture,
 	 * for easy 512x240 -> 352x240 */
 	/* These values must be multples of 2 */
-
+	if (format != IMGFMT_YV12 && format != IMGFMT_YUY2) {
+		printf("vo_zr called with wrong format");
+		exit(1);
+	}
+	stride = 2*width;
 	if (g.set) {
 		if (g.width%2 != 0 || g.height%2 != 0 ||
 				g.xoff%2 != 0 || g.yoff%2 != 0) {
@@ -360,14 +234,63 @@
 	/* we must know the maximum resolution of the device
 	 * it differs for DC10+ and buz for example */
 	zoran_getcap(); /*must be called before init_zoran */
-	if (g.height/vdec > vc.maxheight/2 || (forceinter == 1 && vdec == 1))
+	/* make the scaling decision
+	 * we are capable of stretching the image in the horizontal
+	 * direction by factors 1, 2 and 4
+	 * we can stretch the image in the vertical direction by a factor
+	 * of 1 and 2 AND we must decide about interlacing */
+	if (g.width > vc.maxwidth/2 || g.height > vc.maxheight/2) {
+		stretchx = 1;
+		stretchy = 1;
 		fields = 2;
-	printf("fields=%d\n", fields);
+		if (vdec == 2) {
+			fields = 1;
+		} else if (vdec == 4) {
+			stretchy = 2;
+		}
+		stretchx = hdec;
+	} else if (g.width > vc.maxwidth/4 || g.height > vc.maxheight/4) {
+		stretchx = 2;
+		stretchy = 1;
+		fields = 1;
+		if (vdec == 2) {
+			stretchy = 2;
+		} else if (vdec == 4) {
+			if (!zrfd) {
+				mp_msg(MSGT_VO, MSGL_WARN, "vo_zr: vertical decimation too high, changing to 2 (use -zrfd to keep vdec=4)\n");
+				vdec = 2;
+			}
+			stretchy = 2;
+		}
+		if (hdec == 2) {
+			stretchx = 4;
+		} else if (hdec == 4){
+			if (!zrfd) {
+				mp_msg(MSGT_VO, MSGL_WARN, "vo_zr: horizontal decimation too high, changing to 2 (use -zrfd to keep hdec=4)\n");
+				hdec = 2;
+			}
+			stretchx = 4;
+		}
+	} else {
+		/* output image is maximally stretched */
+		stretchx = 4;
+		stretchy = 2;
+		fields = 1;
+		if (vdec != 1 && !zrfd) {
+			mp_msg(MSGT_VO, MSGL_WARN, "vo_zr: vertical decimation too high, changing to 1 (use -zrfd to keep vdec=%d)\n", vdec);
+			vdec = 1;
+		}
+
+		if (hdec != 1 && !zrfd) {
+			mp_msg(MSGT_VO, MSGL_WARN, "vo_zr: vertical decimation too high, changing to 1 (use -zrfd to keep hdec=%d)\n", hdec);
+			hdec = 1;
+		}
+	}
 	/* the height must be a multiple of fields*8 and the width
 	 * must be a multiple of 16 */
 	/* add some black borders to make it so, and center the image*/
 	image_height = fields*8*((g.height/vdec - 1)/(fields*8) + 1);
-	image_width = 16*((g.width - 1)/16 + 1);
+	image_width = (hdec*16)*((g.width - 1)/(hdec*16) + 1);
 	off_y = (image_height - g.height/vdec)/2;
 	if (off_y%2 != 0) off_y++;
 	off_y *= image_width;
@@ -377,7 +300,7 @@
 	off_c += (image_width - g.width)/4;
 	framenum = 0;
 	size = image_width*image_height;
-	mp_msg(MSGT_VO, MSGL_V, "input: %dx%d, cropped: %dx%d, output: %dx%d, off_y=%d, off_c=%d\n", width, height, g.width, g.height, image_width, image_height, off_y, off_c);
+	mp_msg(MSGT_VO, MSGL_V, "input: %dx%d, cropped: %dx%d, output: %dx%d, off_y=%d, off_c=%d\n", width/hdec, height, g.width, g.height, image_width, image_height, off_y, off_c);
 	
 	image = malloc(2*size); /* this buffer allows for YUV422 data,
 				 * so it is a bit too big for YUV420 */
@@ -386,20 +309,38 @@
 		return 1;
 	}
 	/* and make sure that the borders are _really_ black */
-	memset(image, 0, image_width*image_height);
-	memset(image + size, 0x80, image_width*image_height/4);
-	memset(image + 3*size/2, 0x80, image_width*image_height/4);
-
-	if (init_codec()) {
-		return 1;
+	switch (format) {
+		case IMGFMT_YV12:
+			memset(image, 0, image_width*image_height);
+			memset(image + size, 0x80, image_width*image_height/4);
+			memset(image + 3*size/2, 0x80, image_width*image_height/4);
+			mjpeg_encoder_init(image_width/hdec, image_height,
+					image, hdec, image_width,
+					image + image_width*image_height, 
+					hdec, image_width/2,
+					image + 3*image_width*image_height/2, 
+					hdec, image_width/2, fields, 1, 
+					quality, bw);
+			break;
+		case IMGFMT_YUY2:
+			for (j = 0; j < 2*size; j+=4) {
+				image[j] = 0;
+				image[j+1] = 0x80;
+				image[j+2] = 0;
+				image[j+3] = 0x80;
+			}
+			mjpeg_encoder_init(image_width/hdec, image_height,
+					image, hdec*2, image_width*2,
+					image + 1, hdec*4, image_width*2,
+					image + 3, hdec*4, image_width*2,
+					fields, 0, quality, bw);
+			break;
+		default:
+			mp_msg(MSGT_VO, MSGL_FATAL, "internal inconsistency in vo_zr\n");
 	}
-	
-	if (init_zoran()) {
-#ifdef ZR_USES_LIBJPEG
-		jpeg_destroy_compress(&cinfo);
-#else
-		avcodec_close(&codec_context);
-#endif
+
+
+	if (init_zoran(stretchx, stretchy)) {
 		return 1;
 	}
 
@@ -414,12 +355,9 @@
 }
 
 static void flip_page (void) {
-#ifdef ZR_USES_LIBJPEG
 	int i, j, k;
-#else
-	AVCodecContext *c = &codec_context;
-#endif
-
+	/*FILE *fp;
+	char filename[100];*/
 	/* do we have a free buffer? */
 	if (queue-synco < zrq.count) {
 		frame = queue;
@@ -429,26 +367,20 @@
 		frame = zs.frame;
 		synco++;
 	}
-
-#ifdef ZR_USES_LIBJPEG
-	ccbuf = buf + frame*zrq.size;
-	ccount = 0;
-	k = fields;
-	for (j=0; j < k; j++) {
-
-	jpeg_start_compress(&cinfo, TRUE);
-	i=0;
-	while (cinfo.next_scanline < cinfo.image_height) {
-		jpeg_write_raw_data(&cinfo, jsi[j][i], 8);
-		i++;
-	}
-	jpeg_finish_compress(&cinfo);
-
-	}
-#else
-	avcodec_encode_video(c, buf + frame*zrq.size, MJPEG_SIZE, &picture);
-#endif
-
+	k=0;
+	for (i = 0; i < fields; i++) 
+		k+=mjpeg_encode_frame(buf+frame*zrq.size+k, i);
+	/* Warning, Quantization and Huffman tables are only
+	 * written in the first frame by default (to preserver bandwidth) */
+	/*sprintf(filename, "test%04d.jpg", framenum);
+	fp = fopen(filename, "w");
+	if (!fp) exit(1);
+	fwrite(buf+frame*zrq.size, 1, k, fp);
+	fclose(fp);*/
+	/*fp = fopen("test1.jpg", "r");
+	fread(buf+frame*zrq.size, 1, 2126, fp);
+	fclose(fp);*/
+	
 	if (ioctl(vdes, BUZIOC_QBUF_PLAY, &frame) < 0) 
 		mp_msg(MSGT_VO, MSGL_ERR,
 				"error queueing buffer for playback");
@@ -459,22 +391,27 @@
 }
 
 static uint32_t draw_frame(uint8_t * src[]) {
+	int i;
+	char *source, *dest;
+	//printf("draw frame called\n");
+	source = src[0] + 2*g.yoff*image_width + 2*g.xoff;
+	dest = image + 2*off_y;
+	for (i = 0; i < g.height/vdec; i++) {
+		memcpy(dest, source, image_width*2);
+		dest += 2*image_width;
+		source += vdec*stride;
+	}
 	return 0;
 }
 
 static uint32_t query_format(uint32_t format) {
 	if(format==IMGFMT_YV12) return 1;
+	if(format==IMGFMT_YUY2) return 1;
 	return 0;
 }
 
 static void uninit(void) {
 	uninit_zoran();
-
-#ifdef ZR_USES_LIBJPEG
-	jpeg_destroy_compress(&cinfo);
-#else
-	avcodec_close(&codec_context);
-#endif
 }
 
 static void check_events(void) {
@@ -528,7 +465,7 @@
 		src+=stride[0];
 
 	}
-	{
+	if (!bw) {
     		// copy U+V:
 		uint8_t *src1=srcimg[1];
 		uint8_t *src2=srcimg[2];
@@ -565,11 +502,17 @@
 	strcpy(device, param);
 	mp_msg(MSGT_VO, MSGL_V, "zr: using device %s\n", device);
 	return 1;
-    } else if (!strcasecmp(opt, "zrfi")) {
+    } else if (!strcasecmp(opt, "zrbw")) {
+	    if (param != NULL) {
+		    return ERR_OUT_OF_RANGE;
+	    }
+	    bw = 1;
+	    return 1;
+    } else if (!strcasecmp(opt, "zrfd")) {
 	    if (param != NULL) {
 		    return ERR_OUT_OF_RANGE;
 	    }
-	    forceinter = 1;
+	    zrfd = 1;
 	    return 1;
     } else if (!strcasecmp(opt, "zrcrop")){
 	if (param == NULL) return ERR_MISSING_PARAM;
@@ -584,6 +527,11 @@
 	g.set = 1;
 	mp_msg(MSGT_VO, MSGL_V, "zr: cropping %s\n", param);
 	return 1;
+    }else if (!strcasecmp(opt, "zrhdec")) {
+        i = atoi(param);
+	if (i != 1 && i != 2 && i != 4) return ERR_OUT_OF_RANGE;
+	hdec = i;
+	return 1;
     }else if (!strcasecmp(opt, "zrvdec")) {
         i = atoi(param);
 	if (i != 1 && i != 2 && i != 4) return ERR_OUT_OF_RANGE;
@@ -591,23 +539,9 @@
 	return 1;
     }else if (!strcasecmp(opt, "zrquality")) {
         i = atoi(param);
-	if (i < 30 || i > 100) return ERR_OUT_OF_RANGE;
+	if (i < 1 || i > 20) return ERR_OUT_OF_RANGE;
 	quality = i;
 	return 1;
-    }else if (!strcasecmp(opt, "zrdct")) {
-	if (param == NULL) return ERR_MISSING_PARAM;
-	if (!strcasecmp(param, "IFAST")) {
-            jpegdct = JDCT_IFAST;
-	    return 1;
-	} else if (!strcasecmp(param, "ISLOW")) {
-            jpegdct = JDCT_ISLOW;
-	    return 1;
-	} else if (!strcasecmp(param, "FLOAT")) {
-            jpegdct = JDCT_FLOAT;
-	    return 1;
-	} else {
-           return ERR_OUT_OF_RANGE;
-        }
     }else if (!strcasecmp(opt, "zrnorm")) {
 	if (param == NULL) return ERR_MISSING_PARAM;
 	if (!strcasecmp(param, "NTSC")) {
@@ -631,11 +565,13 @@
 		    "              you want to see as an x-style geometry string\n"
 		    "              example: -zrcrop 352x288+16+0\n"
 		    "  -zrvdec     vertical decimation 1, 2 or 4\n"
-		    "  -zrfi       force interlacing ('wide screen')\n"
-		    "              (by default we only interlace if the movie\n"
-		    "              is higher than half of the screen height)\n"
-		    "  -zrquality  jpeg compression quality 30-100\n"
-                    "  -zrdct      specify DCT method: IFAST, ISLOW or FLOAT\n"
+		    "  -zrhdec     horizontal decimation 1, 2 or 4\n"
+		    "  -zrfd       decimation is only done if the primitive\n"
+		    "              hardware upscaler can correct for the decimation,\n"
+		    "              this switch allows you to see the effects\n"
+		    "              of too much decimation\n"
+		    "  -zrbw       display in black&white (speed increase)\n"
+		    "  -zrquality  jpeg compression quality [BEST] 1 - 20 [VERY BAD]\n"
 		    "  -zrdev      playback device (example -zrdev /dev/video1\n"
 		    "  -zrnorm     specify norm PAL/NTSC [dev: leave at current setting]\n"
 		    "\n"
@@ -652,16 +588,18 @@
     if(device)
       free(device);
     device=NULL;
-  } else if (!strcasecmp(param, "zrfi"))
-    forceinter=0;
+  } else if (!strcasecmp(param, "zrbw"))
+    bw=0;
+  else if (!strcasecmp(param, "zrfd"))
+    zrfd=0;
   else if (!strcasecmp(param, "zrcrop"))
     g.set = g.xoff = g.yoff = 0;
+  else if (!strcasecmp(param, "zrhdec"))
+    hdec = 1;
   else if (!strcasecmp(param, "zrvdec"))
     vdec = 1;
   else if (!strcasecmp(param, "zrquality"))
-    quality = 70;
-  else if (!strcasecmp(param, "zrdct"))
-    jpegdct = JDCT_IFAST;
+    quality = 1;
   else if (!strcasecmp(param, "zrnorm"))
     norm = VIDEO_MODE_AUTO;