[MPlayer-cvslog] r38145 - in trunk/libmpcodecs: vf_eq.c vf_eq2.c
reimar
subversion at mplayerhq.hu
Mon May 27 23:02:14 EEST 2019
Author: reimar
Date: Mon May 27 23:02:14 2019
New Revision: 38145
Log:
vf_eq.c, vf_eq2.c: Add SSE2 support.
The code is actually identical, it would be
even nicer if it was actually shared.
Modified:
trunk/libmpcodecs/vf_eq.c
trunk/libmpcodecs/vf_eq2.c
Modified: trunk/libmpcodecs/vf_eq.c
==============================================================================
--- trunk/libmpcodecs/vf_eq.c Mon May 27 23:02:13 2019 (r38144)
+++ trunk/libmpcodecs/vf_eq.c Mon May 27 23:02:14 2019 (r38145)
@@ -104,6 +104,45 @@ static void process_MMX(unsigned char *d
}
#endif
+#if HAVE_EMMINTRIN_H
+#include <emmintrin.h>
+
+ATTR_TARGET_SSE2
+static void process_SSE2(unsigned char *dest, int dstride, unsigned char *src, int sstride,
+ int w, int h, int brightness, int contrast)
+{
+ int scaled_contrast = ((contrast+100)*256*16)/100;
+ int scaled_brightness = ((brightness+100)*511)/200-128 - scaled_contrast/32;
+ __m128i mmcontrast = _mm_set1_epi16(scaled_contrast);
+ __m128i mmbrightness = _mm_set1_epi16(scaled_brightness);
+ __m128i zero = _mm_setzero_si128();
+ while (h--) {
+ int i;
+ for (i = 0; i < w - 15; i += 16)
+ {
+ __m128i mmsrc = _mm_loadu_si128((const __m128i *)(src + i));
+ __m128i srclo = _mm_unpacklo_epi8(mmsrc, zero);
+ __m128i srchi = _mm_unpackhi_epi8(mmsrc, zero);
+ srclo = _mm_slli_epi16(srclo, 4);
+ srchi = _mm_slli_epi16(srchi, 4);
+ srclo = _mm_mulhi_epu16(srclo, mmcontrast);
+ srchi = _mm_mulhi_epu16(srchi, mmcontrast);
+ srclo = _mm_add_epi16(srclo, mmbrightness);
+ srchi = _mm_add_epi16(srchi, mmbrightness);
+ _mm_storeu_si128((__m128i *)(dest + i), _mm_packus_epi16(srclo, srchi));
+ }
+ for (; i < w; i++)
+ {
+ int pel = ((src[i] * scaled_contrast)>>12) + scaled_brightness;
+ if(pel&768) pel = (-pel)>>31;
+ dest[i] = pel;
+ }
+ src += sstride;
+ dest += dstride;
+ }
+}
+#endif
+
static void process_C(unsigned char *dest, int dstride, unsigned char *src, int sstride,
int w, int h, int brightness, int contrast)
{
@@ -230,6 +269,9 @@ static int vf_open(vf_instance_t *vf, ch
#if HAVE_MMX_INLINE
if(gCpuCaps.hasMMX) process = process_MMX;
#endif
+#if HAVE_EMMINTRIN_H
+ if(gCpuCaps.hasSSE2) process = process_SSE2;
+#endif
return 1;
}
Modified: trunk/libmpcodecs/vf_eq2.c
==============================================================================
--- trunk/libmpcodecs/vf_eq2.c Mon May 27 23:02:13 2019 (r38144)
+++ trunk/libmpcodecs/vf_eq2.c Mon May 27 23:02:14 2019 (r38145)
@@ -188,6 +188,45 @@ void affine_1d_MMX (eq2_param_t *par, un
}
#endif
+#if HAVE_EMMINTRIN_H
+#include <emmintrin.h>
+
+ATTR_TARGET_SSE2
+static void affine_1d_SSE2(eq2_param_t *par, unsigned char *dst, unsigned char *src,
+ unsigned w, unsigned h, unsigned dstride, unsigned sstride)
+{
+ int scaled_contrast = par->c * 256 * 16;
+ int scaled_brightness = ((par->b+1.0)*511)/2-128 - scaled_contrast/32;
+ __m128i mmcontrast = _mm_set1_epi16(scaled_contrast);
+ __m128i mmbrightness = _mm_set1_epi16(scaled_brightness);
+ __m128i zero = _mm_setzero_si128();
+ while (h--) {
+ int i;
+ for (i = 0; i < w - 15; i += 16)
+ {
+ __m128i mmsrc = _mm_loadu_si128((const __m128i *)(src + i));
+ __m128i srclo = _mm_unpacklo_epi8(mmsrc, zero);
+ __m128i srchi = _mm_unpackhi_epi8(mmsrc, zero);
+ srclo = _mm_slli_epi16(srclo, 4);
+ srchi = _mm_slli_epi16(srchi, 4);
+ srclo = _mm_mulhi_epu16(srclo, mmcontrast);
+ srchi = _mm_mulhi_epu16(srchi, mmcontrast);
+ srclo = _mm_add_epi16(srclo, mmbrightness);
+ srchi = _mm_add_epi16(srchi, mmbrightness);
+ _mm_storeu_si128((__m128i *)(dst + i), _mm_packus_epi16(srclo, srchi));
+ }
+ for (; i < w; i++)
+ {
+ int pel = ((src[i] * scaled_contrast)>>12) + scaled_brightness;
+ if(pel&768) pel = (-pel)>>31;
+ dst[i] = pel;
+ }
+ src += sstride;
+ dst += dstride;
+ }
+}
+#endif
+
static
void apply_lut (eq2_param_t *par, unsigned char *dst, unsigned char *src,
unsigned w, unsigned h, unsigned dstride, unsigned sstride)
@@ -289,6 +328,11 @@ void check_values (eq2_param_t *par)
if ((par->c == 1.0) && (par->b == 0.0) && (par->g == 1.0)) {
par->adjust = NULL;
}
+#if HAVE_EMMINTRIN_H
+ else if (par->g == 1.0 && gCpuCaps.hasSSE2) {
+ par->adjust = &affine_1d_SSE2;
+ }
+#endif
#if HAVE_MMX_INLINE
else if (par->g == 1.0 && gCpuCaps.hasMMX) {
par->adjust = &affine_1d_MMX;
More information about the MPlayer-cvslog
mailing list