[MPlayer-dev-eng] [PATCH 3/3] vf_eq.c, vf_eq2.c: Add SSE2 support.
Reimar Döffinger
Reimar.Doeffinger at gmx.de
Tue May 21 22:32:51 EEST 2019
The code is actually identical, it would be
even nicer if it was actually shared.
---
libmpcodecs/vf_eq.c | 42 ++++++++++++++++++++++++++++++++++++++++++
libmpcodecs/vf_eq2.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 86 insertions(+)
diff --git a/libmpcodecs/vf_eq.c b/libmpcodecs/vf_eq.c
index 1ee3a5970..eff657290 100644
--- a/libmpcodecs/vf_eq.c
+++ b/libmpcodecs/vf_eq.c
@@ -104,6 +104,45 @@ static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, in
}
#endif
+#if HAVE_EMMINTRIN_H
+#include <emmintrin.h>
+
+ATTR_TARGET_SSE2
+static void process_SSE2(unsigned char *dest, int dstride, unsigned char *src, int sstride,
+ int w, int h, int brightness, int contrast)
+{
+ int scaled_contrast = ((contrast+100)*256*16)/100;
+ int scaled_brightness = ((brightness+100)*511)/200-128 - scaled_contrast/32;
+ __m128i mmcontrast = _mm_set1_epi16(scaled_contrast);
+ __m128i mmbrightness = _mm_set1_epi16(scaled_brightness);
+ __m128i zero = _mm_setzero_si128();
+ while (h--) {
+ int i;
+ for (i = 0; i < w - 15; i += 16)
+ {
+ __m128i mmsrc = _mm_loadu_si128((const __m128i *)(src + i));
+ __m128i srclo = _mm_unpacklo_epi8(mmsrc, zero);
+ __m128i srchi = _mm_unpackhi_epi8(mmsrc, zero);
+ srclo = _mm_slli_epi16(srclo, 4);
+ srchi = _mm_slli_epi16(srchi, 4);
+ srclo = _mm_mulhi_epu16(srclo, mmcontrast);
+ srchi = _mm_mulhi_epu16(srchi, mmcontrast);
+ srclo = _mm_add_epi16(srclo, mmbrightness);
+ srchi = _mm_add_epi16(srchi, mmbrightness);
+ _mm_storeu_si128((__m128i *)(dest + i), _mm_packus_epi16(srclo, srchi));
+ }
+ for (; i < w; i++)
+ {
+ int pel = ((src[i] * scaled_contrast)>>12) + scaled_brightness;
+ if(pel&768) pel = (-pel)>>31;
+ dest[i] = pel;
+ }
+ src += sstride;
+ dest += dstride;
+ }
+}
+#endif
+
static void process_C(unsigned char *dest, int dstride, unsigned char *src, int sstride,
int w, int h, int brightness, int contrast)
{
@@ -230,6 +269,9 @@ static int vf_open(vf_instance_t *vf, char *args)
#if HAVE_MMX_INLINE
if(gCpuCaps.hasMMX) process = process_MMX;
#endif
+#if HAVE_EMMINTRIN_H
+ if(gCpuCaps.hasSSE2) process = process_SSE2;
+#endif
return 1;
}
diff --git a/libmpcodecs/vf_eq2.c b/libmpcodecs/vf_eq2.c
index e02a59c88..d32e23bd3 100644
--- a/libmpcodecs/vf_eq2.c
+++ b/libmpcodecs/vf_eq2.c
@@ -188,6 +188,45 @@ void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src,
}
#endif
+#if HAVE_EMMINTRIN_H
+#include <emmintrin.h>
+
+ATTR_TARGET_SSE2
+static void affine_1d_SSE2(eq2_param_t *par, unsigned char *dst, unsigned char *src,
+ unsigned w, unsigned h, unsigned dstride, unsigned sstride)
+{
+ int scaled_contrast = par->c * 256 * 16;
+ int scaled_brightness = ((par->b+1.0)*511)/2-128 - scaled_contrast/32;
+ __m128i mmcontrast = _mm_set1_epi16(scaled_contrast);
+ __m128i mmbrightness = _mm_set1_epi16(scaled_brightness);
+ __m128i zero = _mm_setzero_si128();
+ while (h--) {
+ int i;
+ for (i = 0; i < w - 15; i += 16)
+ {
+ __m128i mmsrc = _mm_loadu_si128((const __m128i *)(src + i));
+ __m128i srclo = _mm_unpacklo_epi8(mmsrc, zero);
+ __m128i srchi = _mm_unpackhi_epi8(mmsrc, zero);
+ srclo = _mm_slli_epi16(srclo, 4);
+ srchi = _mm_slli_epi16(srchi, 4);
+ srclo = _mm_mulhi_epu16(srclo, mmcontrast);
+ srchi = _mm_mulhi_epu16(srchi, mmcontrast);
+ srclo = _mm_add_epi16(srclo, mmbrightness);
+ srchi = _mm_add_epi16(srchi, mmbrightness);
+ _mm_storeu_si128((__m128i *)(dst + i), _mm_packus_epi16(srclo, srchi));
+ }
+ for (; i < w; i++)
+ {
+ int pel = ((src[i] * scaled_contrast)>>12) + scaled_brightness;
+ if(pel&768) pel = (-pel)>>31;
+ dst[i] = pel;
+ }
+ src += sstride;
+ dst += dstride;
+ }
+}
+#endif
+
static
void apply_lut (eq2_param_t *par, unsigned char *dst, unsigned char *src,
unsigned w, unsigned h, unsigned dstride, unsigned sstride)
@@ -289,6 +328,11 @@ void check_values (eq2_param_t *par)
if ((par->c == 1.0) && (par->b == 0.0) && (par->g == 1.0)) {
par->adjust = NULL;
}
+#if HAVE_EMMINTRIN_H
+ else if (par->g == 1.0 && gCpuCaps.hasSSE2) {
+ par->adjust = &affine_1d_SSE2;
+ }
+#endif
#if HAVE_MMX_INLINE
else if (par->g == 1.0 && gCpuCaps.hasMMX) {
par->adjust = &affine_1d_MMX;
--
2.20.1
More information about the MPlayer-dev-eng
mailing list