[FFmpeg-devel] [PATCH] use x86_reg in libswscale

Reimar Döffinger Reimar.Doeffinger
Sun Nov 2 20:40:40 CET 2008


Hello,
I thought libpostprocess was the last one, but it seems libswscale did
not compile on mingw64 either.
Attached is a proposed patch. It changes the types in the functions
where it seems easily possible (no prototypes for those functions
anywhere) and otherwise casts them (hm, actually seems like none of
those cases remain).

Greetings,
Reimar D?ffinger
-------------- next part --------------
Index: rgb2rgb_template.c
===================================================================
--- rgb2rgb_template.c	(revision 27878)
+++ rgb2rgb_template.c	(working copy)
@@ -1354,7 +1354,7 @@
 
 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
 {
-    long idx = 15 - src_size;
+    x86_reg idx = 15 - src_size;
     const uint8_t *s = src-idx;
     uint8_t *d = dst-idx;
 #ifdef HAVE_MMX
@@ -1420,7 +1420,7 @@
 {
     unsigned i;
 #ifdef HAVE_MMX
-    long mmx_size= 23 - src_size;
+    x86_reg mmx_size= 23 - src_size;
     __asm__ volatile (
     "test             %%"REG_a", %%"REG_a"          \n\t"
     "jns                     2f                     \n\t"
@@ -1491,7 +1491,7 @@
                                            long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
 {
     long y;
-    const long chromWidth= width>>1;
+    const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y++)
     {
 #ifdef HAVE_MMX
@@ -1643,7 +1643,7 @@
                                            long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
 {
     long y;
-    const long chromWidth= width>>1;
+    const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y++)
     {
 #ifdef HAVE_MMX
@@ -1773,7 +1773,7 @@
                                       long lumStride, long chromStride, long srcStride)
 {
     long y;
-    const long chromWidth= width>>1;
+    const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y+=2)
     {
 #ifdef HAVE_MMX
@@ -2011,7 +2011,7 @@
                                       long lumStride, long chromStride, long srcStride)
 {
     long y;
-    const long chromWidth= width>>1;
+    const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y+=2)
     {
 #ifdef HAVE_MMX
@@ -2138,7 +2138,7 @@
                                        long lumStride, long chromStride, long srcStride)
 {
     long y;
-    const long chromWidth= width>>1;
+    const x86_reg chromWidth= width>>1;
 #ifdef HAVE_MMX
     for (y=0; y<height-2; y+=2)
     {
@@ -2211,7 +2211,7 @@
             MOVNTQ"                  %%mm0, (%1, %%"REG_a") \n\t"
             "add                        $8,      %%"REG_a"  \n\t"
             " js                        1b                  \n\t"
-            : : "r" (src+width*3), "r" (ydst+width), "g" (-width)
+            : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
             : "%"REG_a, "%"REG_d
             );
             ydst += lumStride;
@@ -2455,7 +2455,7 @@
         "add                    $16, %%"REG_a"  \n\t"
         "cmp                     %3, %%"REG_a"  \n\t"
         " jb                     1b             \n\t"
-        ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
+        ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
         : "memory", "%"REG_a""
         );
 #else
@@ -2481,7 +2481,7 @@
         "add                    $16, %%"REG_a"  \n\t"
         "cmp                     %3, %%"REG_a"  \n\t"
         " jb                     1b             \n\t"
-        ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
+        ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
         : "memory", "%"REG_a
         );
 #endif
@@ -2619,14 +2619,14 @@
                                         long srcStride1, long srcStride2,
                                         long srcStride3, long dstStride)
 {
-    long y,x,w,h;
+    long y,w,h;
     w=width/2; h=height;
     for (y=0;y<h;y++){
     const uint8_t* yp=src1+srcStride1*y;
     const uint8_t* up=src2+srcStride2*(y>>2);
     const uint8_t* vp=src3+srcStride3*(y>>2);
     uint8_t* d=dst+dstStride*y;
-    x=0;
+    x86_reg x=0;
 #ifdef HAVE_MMX
     for (;x<w-7;x+=8)
     {
Index: swscale.c
===================================================================
--- swscale.c	(revision 27878)
+++ swscale.c	(working copy)
@@ -1435,13 +1435,13 @@
 static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
 {
     uint8_t *fragmentA;
-    long imm8OfPShufW1A;
-    long imm8OfPShufW2A;
-    long fragmentLengthA;
+    x86_reg imm8OfPShufW1A;
+    x86_reg imm8OfPShufW2A;
+    x86_reg fragmentLengthA;
     uint8_t *fragmentB;
-    long imm8OfPShufW1B;
-    long imm8OfPShufW2B;
-    long fragmentLengthB;
+    x86_reg imm8OfPShufW1B;
+    x86_reg imm8OfPShufW2B;
+    x86_reg fragmentLengthB;
     int fragmentPos;
 
     int xpos, i;
Index: swscale_template.c
===================================================================
--- swscale_template.c	(revision 27878)
+++ swscale_template.c	(working copy)
@@ -893,7 +893,7 @@
 
 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
                                     int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, x86_reg dstW, x86_reg chrDstW)
 {
 #ifdef HAVE_MMX
     if(!(c->flags & SWS_BITEXACT)){
@@ -936,15 +936,15 @@
 }
 
 static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
+                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, x86_reg dstW, x86_reg chrDstW)
 {
     int i;
 #ifdef HAVE_MMX
     if(!(c->flags & SWS_BITEXACT)){
-        long p= uDest ? 3 : 1;
+        x86_reg p= uDest ? 3 : 1;
         uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
         uint8_t *dst[3]= {dest, uDest, vDest};
-        long counter[3] = {dstW, chrDstW, chrDstW};
+        x86_reg counter[3] = {dstW, chrDstW, chrDstW};
 
         if (c->flags & SWS_ACCURATE_RND){
             while(p--){
@@ -1004,10 +1004,10 @@
  */
 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
                                        int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
-                                       uint8_t *dest, long dstW, long dstY)
+                                       uint8_t *dest, x86_reg dstW, x86_reg dstY)
 {
 #ifdef HAVE_MMX
-    long dummy=0;
+    x86_reg dummy=0;
     if(!(c->flags & SWS_BITEXACT)){
         if (c->flags & SWS_ACCURATE_RND){
             switch(c->dstFormat){
@@ -1462,7 +1462,7 @@
 
 //FIXME yuy2* can read up to 7 samples too much
 
-static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, x86_reg width, uint32_t *unused)
 {
 #ifdef HAVE_MMX
     __asm__ volatile(
@@ -1487,7 +1487,7 @@
 #endif
 }
 
-static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, x86_reg width, uint32_t *unused)
 {
 #ifdef HAVE_MMX
     __asm__ volatile(
@@ -1524,7 +1524,7 @@
 
 /* This is almost identical to the previous, end exists only because
  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, x86_reg width, uint32_t *unused)
 {
 #ifdef HAVE_MMX
     __asm__ volatile(
@@ -1548,7 +1548,7 @@
 #endif
 }
 
-static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, x86_reg width, uint32_t *unused)
 {
 #ifdef HAVE_MMX
     __asm__ volatile(
@@ -1584,7 +1584,7 @@
 }
 
 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
-static inline void RENAME(name)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)\
+static inline void RENAME(name)(uint8_t *dst, uint8_t *src, x86_reg width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++)\
@@ -1605,7 +1605,7 @@
 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
 
 #define BGR2UV(type, name, shr, shg, shb, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S)\
-static inline void RENAME(name)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\
+static inline void RENAME(name)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, x86_reg width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++)\
@@ -1618,7 +1618,7 @@
         dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
     }\
 }\
-static inline void RENAME(name ## _half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\
+static inline void RENAME(name ## _half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, x86_reg width, uint32_t *unused)\
 {\
     int i;\
     for (i=0; i<width; i++)\
@@ -1644,7 +1644,7 @@
 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
 
 #ifdef HAVE_MMX
-static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, uint8_t *src, long width, int srcFormat)
+static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, uint8_t *src, x86_reg width, int srcFormat)
 {
 
     if(srcFormat == PIX_FMT_BGR24){
@@ -1697,7 +1697,7 @@
     );
 }
 
-static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, long width, int srcFormat)
+static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, x86_reg width, int srcFormat)
 {
     __asm__ volatile(
         "movq                    24+%4, %%mm6       \n\t"
@@ -1756,7 +1756,7 @@
 }
 #endif
 
-static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, x86_reg width, uint32_t *unused)
 {
 #ifdef HAVE_MMX
     RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
@@ -1773,7 +1773,7 @@
 #endif /* HAVE_MMX */
 }
 
-static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, x86_reg width, uint32_t *unused)
 {
 #ifdef HAVE_MMX
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
@@ -1792,7 +1792,7 @@
     assert(src1 == src2);
 }
 
-static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, x86_reg width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++)
@@ -1807,7 +1807,7 @@
     assert(src1 == src2);
 }
 
-static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, x86_reg width, uint32_t *unused)
 {
 #ifdef HAVE_MMX
     RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
@@ -1824,7 +1824,7 @@
 #endif
 }
 
-static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, x86_reg width, uint32_t *unused)
 {
     int i;
     assert(src1==src2);
@@ -1843,7 +1843,7 @@
 #endif
 }
 
-static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, x86_reg width, uint32_t *unused)
 {
     int i;
     assert(src1==src2);
@@ -1859,7 +1859,7 @@
 }
 
 
-static inline void RENAME(palToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *pal)
+static inline void RENAME(palToY)(uint8_t *dst, uint8_t *src, x86_reg width, uint32_t *pal)
 {
     int i;
     for (i=0; i<width; i++)
@@ -1870,7 +1870,7 @@
     }
 }
 
-static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *pal)
+static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, x86_reg width, uint32_t *pal)
 {
     int i;
     assert(src1 == src2);
@@ -1883,7 +1883,7 @@
     }
 }
 
-static inline void RENAME(monowhite2Y)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(monowhite2Y)(uint8_t *dst, uint8_t *src, x86_reg width, uint32_t *unused)
 {
     int i, j;
     for (i=0; i<width/8; i++){
@@ -1893,7 +1893,7 @@
     }
 }
 
-static inline void RENAME(monoblack2Y)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(monoblack2Y)(uint8_t *dst, uint8_t *src, x86_reg width, uint32_t *unused)
 {
     int i, j;
     for (i=0; i<width/8; i++){
@@ -1905,13 +1905,13 @@
 
 // bilinear / bicubic scaling
 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
-                                  int16_t *filter, int16_t *filterPos, long filterSize)
+                                  int16_t *filter, int16_t *filterPos, x86_reg filterSize)
 {
 #ifdef HAVE_MMX
     assert(filterSize % 4 == 0 && filterSize>0);
     if (filterSize==4) // Always true for upscaling, sometimes for down, too.
     {
-        long counter= -2*dstW;
+        x86_reg counter= -2*dstW;
         filter-= counter*2;
         filterPos-= counter/2;
         dst-= counter/2;
@@ -1957,7 +1957,7 @@
     }
     else if (filterSize==8)
     {
-        long counter= -2*dstW;
+        x86_reg counter= -2*dstW;
         filter-= counter*4;
         filterPos-= counter/2;
         dst-= counter/2;
@@ -2015,7 +2015,7 @@
     else
     {
         uint8_t *offset = src+filterSize;
-        long counter= -2*dstW;
+        x86_reg counter= -2*dstW;
         //filter-= counter*filterSize/2;
         filterPos-= counter/2;
         dst-= counter/2;
@@ -2086,7 +2086,7 @@
 #endif /* HAVE_MMX */
 }
       // *** horizontal scale Y line to temp buffer
-static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc,
+static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, x86_reg dstWidth, uint8_t *src, int srcW, int xInc,
                                    int flags, int canMMX2BeUsed, int16_t *hLumFilter,
                                    int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
                                    int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
@@ -2249,7 +2249,7 @@
         else
         {
 #endif /* HAVE_MMX2 */
-        long xInc_shr16 = xInc >> 16;
+        x86_reg xInc_shr16 = xInc >> 16;
         uint16_t xInc_mask = xInc & 0xffff;
         //NO MMX just normal asm ...
         __asm__ volatile(
@@ -2321,7 +2321,7 @@
     }
 }
 
-inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2,
+inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, x86_reg dstWidth, uint8_t *src1, uint8_t *src2,
                                    int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
                                    int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
                                    int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
@@ -2535,7 +2535,7 @@
         else
         {
 #endif /* HAVE_MMX2 */
-            long xInc_shr16 = (long) (xInc >> 16);
+            x86_reg xInc_shr16 = (x86_reg) (xInc >> 16);
             uint16_t xInc_mask = xInc & 0xffff;
             __asm__ volatile(
             "xor %%"REG_a", %%"REG_a"               \n\t" // i
@@ -2573,9 +2573,9 @@
 /* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
    which is needed to support GCC 4.0. */
 #if defined(ARCH_X86_64) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-            :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
+            :: "m" (src1), "m" (dst), "g" ((x86_reg)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
 #else
-            :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
+            :: "m" (src1), "m" (dst), "m" ((x86_reg)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
 #endif
             "r" (src2)
             : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
Index: yuv2rgb.c
===================================================================
--- yuv2rgb.c	(revision 27878)
+++ yuv2rgb.c	(working copy)
@@ -38,6 +38,7 @@
 #include "rgb2rgb.h"
 #include "swscale.h"
 #include "swscale_internal.h"
+#include "libavutil/x86_cpu.h"
 
 #define DITHER1XBPP // only for MMX
 
Index: yuv2rgb_template.c
===================================================================
--- yuv2rgb_template.c	(revision 27878)
+++ yuv2rgb_template.c	(working copy)
@@ -141,7 +141,7 @@
         uint8_t *py = src[0] + y*srcStride[0];
         uint8_t *pu = src[1] + (y>>1)*srcStride[1];
         uint8_t *pv = src[2] + (y>>1)*srcStride[2];
-        long index= -h_size/2;
+        x86_reg index= -h_size/2;
 
         c->blueDither= ff_dither8[y&1];
         c->greenDither= ff_dither4[y&1];
@@ -235,7 +235,7 @@
         uint8_t *py = src[0] + y*srcStride[0];
         uint8_t *pu = src[1] + (y>>1)*srcStride[1];
         uint8_t *pv = src[2] + (y>>1)*srcStride[2];
-        long index= -h_size/2;
+        x86_reg index= -h_size/2;
 
         c->blueDither= ff_dither8[y&1];
         c->greenDither= ff_dither8[y&1];
@@ -322,7 +322,7 @@
         uint8_t *py = src[0] + y*srcStride[0];
         uint8_t *pu = src[1] + (y>>1)*srcStride[1];
         uint8_t *pv = src[2] + (y>>1)*srcStride[2];
-        long index= -h_size/2;
+        x86_reg index= -h_size/2;
 
         /* This MMX assembly code deals with a SINGLE scan line at a time,
          * it converts 8 pixels in each iteration. */
@@ -468,7 +468,7 @@
         uint8_t *py = src[0] + y*srcStride[0];
         uint8_t *pu = src[1] + (y>>1)*srcStride[1];
         uint8_t *pv = src[2] + (y>>1)*srcStride[2];
-        long index= -h_size/2;
+        x86_reg index= -h_size/2;
 
         /* This MMX assembly code deals with a SINGLE scan line at a time,
          * it converts 8 pixels in each iteration. */



More information about the ffmpeg-devel mailing list