[MPlayer-cvslog] r38145 - in trunk/libmpcodecs: vf_eq.c vf_eq2.c

reimar subversion at mplayerhq.hu
Mon May 27 23:02:14 EEST 2019


Author: reimar
Date: Mon May 27 23:02:14 2019
New Revision: 38145

Log:
vf_eq.c, vf_eq2.c: Add SSE2 support.

The code is actually identical, it would be
even nicer if it was actually shared.

Modified:
   trunk/libmpcodecs/vf_eq.c
   trunk/libmpcodecs/vf_eq2.c

Modified: trunk/libmpcodecs/vf_eq.c
==============================================================================
--- trunk/libmpcodecs/vf_eq.c	Mon May 27 23:02:13 2019	(r38144)
+++ trunk/libmpcodecs/vf_eq.c	Mon May 27 23:02:14 2019	(r38145)
@@ -104,6 +104,45 @@ static void process_MMX(unsigned char *d
 }
 #endif
 
+#if HAVE_EMMINTRIN_H
+#include <emmintrin.h>
+
+ATTR_TARGET_SSE2
+static void process_SSE2(unsigned char *dest, int dstride, unsigned char *src, int sstride,
+                    int w, int h, int brightness, int contrast)
+{
+    int scaled_contrast = ((contrast+100)*256*16)/100;
+    int scaled_brightness = ((brightness+100)*511)/200-128 - scaled_contrast/32;
+    __m128i mmcontrast = _mm_set1_epi16(scaled_contrast);
+    __m128i mmbrightness = _mm_set1_epi16(scaled_brightness);
+    __m128i zero = _mm_setzero_si128();
+    while (h--) {
+        int i;
+        for (i = 0; i < w - 15; i += 16)
+        {
+            __m128i mmsrc = _mm_loadu_si128((const __m128i *)(src + i));
+            __m128i srclo = _mm_unpacklo_epi8(mmsrc, zero);
+            __m128i srchi = _mm_unpackhi_epi8(mmsrc, zero);
+            srclo = _mm_slli_epi16(srclo, 4);
+            srchi = _mm_slli_epi16(srchi, 4);
+            srclo = _mm_mulhi_epu16(srclo, mmcontrast);
+            srchi = _mm_mulhi_epu16(srchi, mmcontrast);
+            srclo = _mm_add_epi16(srclo, mmbrightness);
+            srchi = _mm_add_epi16(srchi, mmbrightness);
+            _mm_storeu_si128((__m128i *)(dest + i), _mm_packus_epi16(srclo, srchi));
+        }
+        for (; i < w; i++)
+        {
+            int pel = ((src[i] * scaled_contrast)>>12) + scaled_brightness;
+            if(pel&768) pel = (-pel)>>31;
+            dest[i] = pel;
+        }
+        src += sstride;
+        dest += dstride;
+    }
+}
+#endif
+
 static void process_C(unsigned char *dest, int dstride, unsigned char *src, int sstride,
                     int w, int h, int brightness, int contrast)
 {
@@ -230,6 +269,9 @@ static int vf_open(vf_instance_t *vf, ch
 #if HAVE_MMX_INLINE
         if(gCpuCaps.hasMMX) process = process_MMX;
 #endif
+#if HAVE_EMMINTRIN_H
+        if(gCpuCaps.hasSSE2) process = process_SSE2;
+#endif
 
         return 1;
 }

Modified: trunk/libmpcodecs/vf_eq2.c
==============================================================================
--- trunk/libmpcodecs/vf_eq2.c	Mon May 27 23:02:13 2019	(r38144)
+++ trunk/libmpcodecs/vf_eq2.c	Mon May 27 23:02:14 2019	(r38145)
@@ -188,6 +188,45 @@ void affine_1d_MMX (eq2_param_t *par, un
 }
 #endif
 
+#if HAVE_EMMINTRIN_H
+#include <emmintrin.h>
+
+ATTR_TARGET_SSE2
+static void affine_1d_SSE2(eq2_param_t *par, unsigned char *dst, unsigned char *src,
+  unsigned w, unsigned h, unsigned dstride, unsigned sstride)
+{
+    int scaled_contrast = par->c * 256 * 16;
+    int scaled_brightness = ((par->b+1.0)*511)/2-128 - scaled_contrast/32;
+    __m128i mmcontrast = _mm_set1_epi16(scaled_contrast);
+    __m128i mmbrightness = _mm_set1_epi16(scaled_brightness);
+    __m128i zero = _mm_setzero_si128();
+    while (h--) {
+        int i;
+        for (i = 0; i < w - 15; i += 16)
+        {
+            __m128i mmsrc = _mm_loadu_si128((const __m128i *)(src + i));
+            __m128i srclo = _mm_unpacklo_epi8(mmsrc, zero);
+            __m128i srchi = _mm_unpackhi_epi8(mmsrc, zero);
+            srclo = _mm_slli_epi16(srclo, 4);
+            srchi = _mm_slli_epi16(srchi, 4);
+            srclo = _mm_mulhi_epu16(srclo, mmcontrast);
+            srchi = _mm_mulhi_epu16(srchi, mmcontrast);
+            srclo = _mm_add_epi16(srclo, mmbrightness);
+            srchi = _mm_add_epi16(srchi, mmbrightness);
+            _mm_storeu_si128((__m128i *)(dst + i), _mm_packus_epi16(srclo, srchi));
+        }
+        for (; i < w; i++)
+        {
+            int pel = ((src[i] * scaled_contrast)>>12) + scaled_brightness;
+            if(pel&768) pel = (-pel)>>31;
+            dst[i] = pel;
+        }
+        src += sstride;
+        dst += dstride;
+    }
+}
+#endif
+
 static
 void apply_lut (eq2_param_t *par, unsigned char *dst, unsigned char *src,
   unsigned w, unsigned h, unsigned dstride, unsigned sstride)
@@ -289,6 +328,11 @@ void check_values (eq2_param_t *par)
   if ((par->c == 1.0) && (par->b == 0.0) && (par->g == 1.0)) {
     par->adjust = NULL;
   }
+#if HAVE_EMMINTRIN_H
+  else if (par->g == 1.0 && gCpuCaps.hasSSE2) {
+    par->adjust = &affine_1d_SSE2;
+  }
+#endif
 #if HAVE_MMX_INLINE
   else if (par->g == 1.0 && gCpuCaps.hasMMX) {
     par->adjust = &affine_1d_MMX;


More information about the MPlayer-cvslog mailing list