[FFmpeg-devel] [PATCH 3/5] swscale: use dsp_mask and AV_CPU_FLAG_* internally

Janne Grunau janne-ffmpeg
Tue Sep 28 00:25:01 CEST 2010


no change for sws_get(Cached)?Context users
---
 libswscale/bfin/swscale_bfin.c   |    2 +-
 libswscale/colorspace-test.c     |   10 ++--
 libswscale/ppc/yuv2rgb_altivec.c |    2 +-
 libswscale/rgb2rgb.c             |   11 +++--
 libswscale/rgb2rgb.h             |    2 +-
 libswscale/swscale.c             |   25 ++++++------
 libswscale/swscale_template.c    |    5 +-
 libswscale/utils.c               |   78 +++++++++++++++++++++++---------------
 libswscale/x86/yuv2rgb_mmx.c     |    5 +-
 libswscale/yuv2rgb.c             |    4 +-
 10 files changed, 82 insertions(+), 62 deletions(-)

diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c
index ce2f172..edc4afa 100644
--- a/libswscale/bfin/swscale_bfin.c
+++ b/libswscale/bfin/swscale_bfin.c
@@ -79,7 +79,7 @@ static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], i
 void ff_bfin_get_unscaled_swscale(SwsContext *c)
 {
     SwsFunc swScale = c->swScale;
-    if (c->flags & SWS_CPU_CAPS_BFIN)
+    if (c->dsp_mask & AV_CPU_FLAG_BFIN)
         if (c->dstFormat == PIX_FMT_YUV420P)
             if (c->srcFormat == PIX_FMT_UYVY422) {
                 av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n");
diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c
index 5a48065..3a5ef2e 100644
--- a/libswscale/colorspace-test.c
+++ b/libswscale/colorspace-test.c
@@ -33,7 +33,7 @@
 
 #define FUNC(s,d,n) {s,d,#n,n}
 
-static int cpu_caps;
+static unsigned cpu_caps;
 
 static char *args_parse(int argc, char *argv[])
 {
@@ -42,13 +42,13 @@ static char *args_parse(int argc, char *argv[])
     while ((o = getopt(argc, argv, "m23")) != -1) {
         switch (o) {
         case 'm':
-            cpu_caps |= SWS_CPU_CAPS_MMX;
+            cpu_caps |= AV_CPU_FLAG_MMX;
             break;
         case '2':
-            cpu_caps |= SWS_CPU_CAPS_MMX2;
+            cpu_caps |= AV_CPU_FLAG_MMX2;
             break;
         case '3':
-            cpu_caps |= SWS_CPU_CAPS_3DNOW;
+            cpu_caps |= AV_CPU_FLAG_3DNOW;
             break;
         default:
             av_log(NULL, AV_LOG_ERROR, "Unknown option %c\n", o);
@@ -72,7 +72,7 @@ int main(int argc, char **argv)
     av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n");
     args_parse(argc, argv);
     av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps);
-    sws_rgb2rgb_init(cpu_caps);
+    sws_rgb2rgb_init(0, cpu_caps);
 
     for(funcNum=0; ; funcNum++) {
         struct func_info_s {
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index bf605bc..2355ff5 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -692,7 +692,7 @@ static int altivec_uyvy_rgb32 (SwsContext *c,
 */
 SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
 {
-    if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
+    if (!(c->dsp_mask & AV_CPU_FLAG_ALTIVEC))
         return NULL;
 
     /*
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 7226853..d1494c7 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -26,6 +26,7 @@
 #include "config.h"
 #include "libavutil/x86_cpu.h"
 #include "libavutil/bswap.h"
+#include "libavutil/cpu.h"
 #include "rgb2rgb.h"
 #include "swscale.h"
 #include "swscale_internal.h"
@@ -199,16 +200,16 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
  32-bit C version, and and&add trick by Michael Niedermayer
 */
 
-void sws_rgb2rgb_init(int flags)
+void sws_rgb2rgb_init(int flags, unsigned dsp_mask)
 {
 #if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
-    if (flags & SWS_CPU_CAPS_SSE2)
+    if (dsp_mask & AV_CPU_FLAG_SSE2)
         rgb2rgb_init_SSE2();
-    else if (flags & SWS_CPU_CAPS_MMX2)
+    else if (dsp_mask & AV_CPU_FLAG_MMX2)
         rgb2rgb_init_MMX2();
-    else if (flags & SWS_CPU_CAPS_3DNOW)
+    else if (dsp_mask & AV_CPU_FLAG_3DNOW)
         rgb2rgb_init_3DNOW();
-    else if (flags & SWS_CPU_CAPS_MMX)
+    else if (dsp_mask & AV_CPU_FLAG_MMX)
         rgb2rgb_init_MMX();
     else
 #endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index 31e21af..3b9dd33 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -166,6 +166,6 @@ extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const u
                             long width, long height,
                             long lumStride, long chromStride, long srcStride);
 
-void sws_rgb2rgb_init(int flags);
+void sws_rgb2rgb_init(int flags, unsigned dsp_mask);
 
 #endif /* SWSCALE_RGB2RGB_H */
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index fbfc006..97c0ade 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -65,6 +65,7 @@ untested special converters
 #include "libavutil/avutil.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/bswap.h"
+#include "libavutil/cpu.h"
 #include "libavutil/pixdesc.h"
 
 #undef MOVNTQ
@@ -1257,17 +1258,17 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
 SwsFunc ff_getSwsFunc(SwsContext *c)
 {
 #if CONFIG_RUNTIME_CPUDETECT
-    int flags = c->flags;
+    unsigned dsp_mask = c->dsp_mask;
 
 #if ARCH_X86
     // ordered per speed fastest first
-    if (flags & SWS_CPU_CAPS_MMX2) {
+    if (dsp_mask & AV_CPU_FLAG_MMX2) {
         sws_init_swScale_MMX2(c);
         return swScale_MMX2;
-    } else if (flags & SWS_CPU_CAPS_3DNOW) {
+    } else if (dsp_mask & AV_CPU_FLAG_3DNOW) {
         sws_init_swScale_3DNow(c);
         return swScale_3DNow;
-    } else if (flags & SWS_CPU_CAPS_MMX) {
+    } else if (dsp_mask & AV_CPU_FLAG_MMX) {
         sws_init_swScale_MMX(c);
         return swScale_MMX;
     } else {
@@ -1277,7 +1278,7 @@ SwsFunc ff_getSwsFunc(SwsContext *c)
 
 #else
 #if COMPILE_ALTIVEC
-    if (flags & SWS_CPU_CAPS_ALTIVEC) {
+    if (dsp_mask & AV_CPU_FLAG_ALTIVEC) {
         sws_init_swScale_altivec(c);
         return swScale_altivec;
     } else {
@@ -1694,15 +1695,15 @@ int ff_hardcodedcpuflags(void)
 {
     int flags = 0;
 #if   COMPILE_TEMPLATE_MMX2
-    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
+    flags |= AV_CPU_FLAG_MMX|AV_CPU_FLAG_MMX2;
 #elif COMPILE_TEMPLATE_AMD3DNOW
-    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
+    flags |= AV_CPU_FLAG_MMX|AV_CPU_FLAG_3DNOW;
 #elif COMPILE_TEMPLATE_MMX
-    flags |= SWS_CPU_CAPS_MMX;
+    flags |= AV_CPU_FLAG_MMX;
 #elif COMPILE_TEMPLATE_ALTIVEC
-    flags |= SWS_CPU_CAPS_ALTIVEC;
+    flags |= AV_CPU_FLAG_ALTIVEC;
 #elif ARCH_BFIN
-    flags |= SWS_CPU_CAPS_BFIN;
+    flags |= AV_CPU_FLAG_BFIN;
 #endif
     return flags;
 }
@@ -1789,7 +1790,7 @@ void ff_get_unscaled_swscale(SwsContext *c)
         c->swScale= uyvyToYuv422Wrapper;
 
 #if COMPILE_ALTIVEC
-    if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
+    if ((c->dsp_mask & AV_CPU_FLAG_ALTIVEC) &&
         !(c->flags & SWS_BITEXACT) &&
         srcFormat == PIX_FMT_YUV420P) {
         // unscaled YV12 -> packed YUV, we want speed
@@ -1819,7 +1820,7 @@ void ff_get_unscaled_swscale(SwsContext *c)
             c->swScale= planarCopyWrapper;
     }
 #if ARCH_BFIN
-    if (flags & SWS_CPU_CAPS_BFIN)
+    if (dsp_mask & AV_CPU_FLAG_BFIN)
         ff_bfin_get_unscaled_swscale (c);
 #endif
 }
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 42c4a8b..dfbf850 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -2567,6 +2567,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
     const int chrXInc= c->chrXInc;
     const enum PixelFormat dstFormat= c->dstFormat;
     const int flags= c->flags;
+    const unsigned dsp_mask= c->dsp_mask;
     int16_t *vLumFilterPos= c->vLumFilterPos;
     int16_t *vChrFilterPos= c->vChrFilterPos;
     int16_t *hLumFilterPos= c->hLumFilterPos;
@@ -2905,9 +2906,9 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
 
 #if COMPILE_TEMPLATE_MMX
-    if (flags & SWS_CPU_CAPS_MMX2 )  __asm__ volatile("sfence":::"memory");
+    if (dsp_mask & AV_CPU_FLAG_MMX2 )  __asm__ volatile("sfence":::"memory");
     /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
-    if (flags & SWS_CPU_CAPS_3DNOW)  __asm__ volatile("femms" :::"memory");
+    if (dsp_mask & AV_CPU_FLAG_3DNOW)  __asm__ volatile("femms" :::"memory");
     else                             __asm__ volatile("emms"  :::"memory");
 #endif
     /* store changed local vars back in the context */
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 24dc1fc..cf14bfb 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -172,6 +172,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist
 
 static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
                       int srcW, int dstW, int filterAlign, int one, int flags,
+                      unsigned dsp_mask,
                       SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
 {
     int i;
@@ -183,7 +184,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
     const int64_t fone= 1LL<<54;
     int ret= -1;
 #if ARCH_X86
-    if (flags & SWS_CPU_CAPS_MMX)
+    if (dsp_mask & AV_CPU_FLAG_MMX)
         __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
 #endif
 
@@ -402,7 +403,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
         if (min>minFilterSize) minFilterSize= min;
     }
 
-    if (flags & SWS_CPU_CAPS_ALTIVEC) {
+    if (dsp_mask & AV_CPU_FLAG_ALTIVEC) {
         // we can handle the special case 4,
         // so we don't want to go to the full 8
         if (minFilterSize < 5)
@@ -417,7 +418,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
             filterAlign = 1;
     }
 
-    if (flags & SWS_CPU_CAPS_MMX) {
+    if (dsp_mask & AV_CPU_FLAG_MMX) {
         // special case for unscaled vertical filtering
         if (minFilterSize == 1 && filterAlign == 2)
             filterAlign= 1;
@@ -674,6 +675,8 @@ static void getSubSampleFactors(int *h, int *v, enum PixelFormat format)
 }
 
 static int update_flags_cpu(int flags);
+static unsigned sws_cpu_caps2av_cpu_flags(int flags);
+static int strip_cpu_flags(int flags);
 
 int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation)
 {
@@ -689,13 +692,14 @@ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange
 
     c->dstFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->dstFormat]);
     c->srcFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->srcFormat]);
-    c->flags = update_flags_cpu(c->flags);
+    if (!(c->dsp_mask & AV_CPU_FLAG_FORCE))
+        c->dsp_mask = sws_cpu_caps2av_cpu_flags(update_flags_cpu(c->flags));
 
     ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
     //FIXME factorize
 
 #if HAVE_ALTIVEC
-    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
+    if (c->dsp_mask & AV_CPU_FLAG_ALTIVEC)
         ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness, contrast, saturation);
 #endif
     return 0;
@@ -789,14 +793,17 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter){
     enum PixelFormat srcFormat= c->srcFormat;
     enum PixelFormat dstFormat= c->dstFormat;
 
+    if (c->dsp_mask & AV_CPU_FLAG_FORCE)
+        dsp_mask = c->dsp_mask;
+    else
     dsp_mask = c->dsp_mask = sws_cpu_caps2av_cpu_flags(update_flags_cpu(c->flags));
 
-    flags= c->flags = update_flags_cpu(c->flags);
+    flags= c->flags = strip_cpu_flags(c->flags);
 #if ARCH_X86
-    if (flags & SWS_CPU_CAPS_MMX)
+    if (dsp_mask & AV_CPU_FLAG_MMX)
         __asm__ volatile("emms\n\t"::: "memory");
 #endif
-    if (!rgb15to16) sws_rgb2rgb_init(flags);
+    if (!rgb15to16) sws_rgb2rgb_init(flags, dsp_mask);
 
     unscaled = (srcW == dstW && srcH == dstH);
 
@@ -889,7 +896,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter){
         }
     }
 
-    if (flags & SWS_CPU_CAPS_MMX2) {
+    if (dsp_mask & AV_CPU_FLAG_MMX2) {
         c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
         if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) {
             if (flags&SWS_PRINT_INFO)
@@ -915,7 +922,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter){
             c->chrXInc+= 20;
         }
         //we don't use the x86 asm scaler if MMX is available
-        else if (flags & SWS_CPU_CAPS_MMX) {
+        else if (dsp_mask & AV_CPU_FLAG_MMX) {
             c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
             c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
         }
@@ -958,18 +965,20 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter){
 #endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */
         {
             const int filterAlign=
-                (flags & SWS_CPU_CAPS_MMX) ? 4 :
-                (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
+                (dsp_mask & AV_CPU_FLAG_MMX) ? 4 :
+                (dsp_mask & AV_CPU_FLAG_ALTIVEC) ? 8 :
                 1;
 
             if (initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
                            srcW      ,       dstW, filterAlign, 1<<14,
                            (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
+                           dsp_mask,
                            srcFilter->lumH, dstFilter->lumH, c->param) < 0)
                 goto fail;
             if (initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
                            c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
                            (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
+                           dsp_mask,
                            srcFilter->chrH, dstFilter->chrH, c->param) < 0)
                 goto fail;
         }
@@ -978,18 +987,20 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter){
     /* precalculate vertical scaler filter coefficients */
     {
         const int filterAlign=
-            (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
-            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
+            (dsp_mask & AV_CPU_FLAG_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
+            (dsp_mask & AV_CPU_FLAG_ALTIVEC) ? 8 :
             1;
 
         if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
                        srcH      ,        dstH, filterAlign, (1<<12),
                        (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
+                       dsp_mask,
                        srcFilter->lumV, dstFilter->lumV, c->param) < 0)
             goto fail;
         if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
                        c->chrSrcH, c->chrDstH, filterAlign, (1<<12),
                        (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
+                       dsp_mask,
                        srcFilter->chrV, dstFilter->chrV, c->param) < 0)
             goto fail;
 
@@ -1095,18 +1106,18 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter){
 #endif
                sws_format_name(dstFormat));
 
-        if (flags & SWS_CPU_CAPS_MMX2)
+        if (dsp_mask & AV_CPU_FLAG_MMX2)
             av_log(c, AV_LOG_INFO, "using MMX2\n");
-        else if (flags & SWS_CPU_CAPS_3DNOW)
+        else if (dsp_mask & AV_CPU_FLAG_3DNOW)
             av_log(c, AV_LOG_INFO, "using 3DNOW\n");
-        else if (flags & SWS_CPU_CAPS_MMX)
+        else if (dsp_mask & AV_CPU_FLAG_MMX)
             av_log(c, AV_LOG_INFO, "using MMX\n");
-        else if (flags & SWS_CPU_CAPS_ALTIVEC)
+        else if (dsp_mask & AV_CPU_FLAG_ALTIVEC)
             av_log(c, AV_LOG_INFO, "using AltiVec\n");
         else
             av_log(c, AV_LOG_INFO, "using C\n");
 
-        if (flags & SWS_CPU_CAPS_MMX) {
+        if (dsp_mask & AV_CPU_FLAG_MMX) {
             if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
                 av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
             else {
@@ -1136,31 +1147,31 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter){
         }
         if (isPlanarYUV(dstFormat)) {
             if (c->vLumFilterSize==1)
-                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
             else
-                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         } else {
             if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
                 av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
-                       "      2-tap scaler for vertical chrominance scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                       "      2-tap scaler for vertical chrominance scaling (BGR)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
             else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
-                av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
             else
-                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         }
 
         if (dstFormat==PIX_FMT_BGR24)
             av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n",
-                   (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
+                   (dsp_mask & AV_CPU_FLAG_MMX2) ? "MMX2" : ((dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C"));
         else if (dstFormat==PIX_FMT_RGB32)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         else if (dstFormat==PIX_FMT_BGR565)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         else if (dstFormat==PIX_FMT_BGR555)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
         else if (dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE ||
                  dstFormat == PIX_FMT_BGR444BE || dstFormat == PIX_FMT_BGR444LE)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n", (dsp_mask & AV_CPU_FLAG_MMX) ? "MMX" : "C");
 
         av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
         av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
@@ -1184,7 +1195,9 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat,
     if(!(c=sws_alloc_context()))
         return NULL;
 
-    c->flags= flags;
+    // set AV_CPU_FLAG_FORCE to prevent auto detection in sws_init_context()
+    c->dsp_mask = AV_CPU_FLAG_FORCE|sws_cpu_caps2av_cpu_flags(update_flags_cpu(flags));
+    c->flags= strip_cpu_flags(flags);
     c->srcW= srcW;
     c->srcH= srcH;
     c->dstW= dstW;
@@ -1572,11 +1585,13 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context,
                                         SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
 {
     static const double default_param[2] = {SWS_PARAM_DEFAULT, SWS_PARAM_DEFAULT};
+    // set AV_CPU_FLAG_FORCE to prevent auto detection in sws_init_context()
+    unsigned dsp_mask= AV_CPU_FLAG_FORCE|sws_cpu_caps2av_cpu_flags(update_flags_cpu(flags));
 
     if (!param)
         param = default_param;
 
-    flags = update_flags_cpu(flags);
+    flags= strip_cpu_flags(flags);
 
     if (context &&
         (context->srcW      != srcW      ||
@@ -1586,6 +1601,7 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context,
          context->dstH      != dstH      ||
          context->dstFormat != dstFormat ||
          context->flags     != flags     ||
+         context->dsp_mask  != dsp_mask  ||
          context->param[0]  != param[0]  ||
          context->param[1]  != param[1])) {
         sws_freeContext(context);
diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c
index 6478311..c54a0a0 100644
--- a/libswscale/x86/yuv2rgb_mmx.c
+++ b/libswscale/x86/yuv2rgb_mmx.c
@@ -34,6 +34,7 @@
 #include "libswscale/swscale.h"
 #include "libswscale/swscale_internal.h"
 #include "libavutil/x86_cpu.h"
+#include "libavutil/cpu.h"
 
 #define DITHER1XBPP // only for MMX
 
@@ -63,7 +64,7 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
 
 SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
 {
-    if (c->flags & SWS_CPU_CAPS_MMX2) {
+    if (c->dsp_mask & AV_CPU_FLAG_MMX2) {
         switch (c->dstFormat) {
         case PIX_FMT_RGB32:
             if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
@@ -81,7 +82,7 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
         case PIX_FMT_RGB555: return yuv420_rgb15_MMX2;
         }
     }
-    if (c->flags & SWS_CPU_CAPS_MMX) {
+    if (c->dsp_mask & AV_CPU_FLAG_MMX) {
         switch (c->dstFormat) {
         case PIX_FMT_RGB32:
             if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 77d3d39..c354566 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -553,12 +553,12 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
     t = ff_yuv2rgb_init_mlib(c);
 #endif
 #if HAVE_ALTIVEC
-    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
+    if (c->dsp_mask & AV_CPU_FLAG_ALTIVEC)
         t = ff_yuv2rgb_init_altivec(c);
 #endif
 
 #if ARCH_BFIN
-    if (c->flags & SWS_CPU_CAPS_BFIN)
+    if (c->dsp_mask & AV_CPU_FLAG_BFIN)
         t = ff_yuv2rgb_get_func_ptr_bfin(c);
 #endif
 
-- 
1.7.3




More information about the ffmpeg-devel mailing list