[MPlayer-cvslog] r38153 - trunk/libvo/vo_vdpau.c

reimar subversion at mplayerhq.hu
Sun Nov 17 18:32:17 EET 2019


Author: reimar
Date: Sun Nov 17 18:32:16 2019
New Revision: 38153

Log:
vo_vdpau: SSE2 optimization of OSD code.

Modified:
   trunk/libvo/vo_vdpau.c

Modified: trunk/libvo/vo_vdpau.c
==============================================================================
--- trunk/libvo/vo_vdpau.c	Fri Sep 27 10:29:07 2019	(r38152)
+++ trunk/libvo/vo_vdpau.c	Sun Nov 17 18:32:16 2019	(r38153)
@@ -824,6 +824,38 @@ static void check_events(void)
     }
 }
 
+#if HAVE_EMMINTRIN_H
+#include <emmintrin.h>
+
+#include "cpudetect.h"
+
+ATTR_TARGET_SSE2
+static void copyosd_SSE2(int w, int h, unsigned char *src, unsigned char *srca, int stride)
+{
+    unsigned char *dst = index_data;
+    __m128i zero = _mm_setzero_si128();
+    while (h--) {
+        int i;
+        for (i = 0; i < w - 15; i += 16)
+        {
+            __m128i mmsrc = _mm_loadu_si128((const __m128i *)(src + i));
+            __m128i mmsrca = _mm_loadu_si128((const __m128i *)(srca + i));
+            mmsrca = _mm_sub_epi8(zero, mmsrca);
+            _mm_storeu_si128((__m128i *)(dst + 0), _mm_unpacklo_epi8(mmsrc, mmsrca));
+            _mm_storeu_si128((__m128i *)(dst + 16), _mm_unpackhi_epi8(mmsrc, mmsrca));
+            dst += 32;
+        }
+        for (; i < w; i++)
+        {
+            *dst++ = src  [i];
+            *dst++ = -srca[i];
+        }
+        src += stride;
+        srca += stride;
+    }
+}
+#endif
+
 static void draw_osd_I8A8(int x0,int y0, int w,int h, unsigned char *src,
                           unsigned char *srca, int stride)
 {
@@ -845,11 +877,18 @@ static void draw_osd_I8A8(int x0,int y0,
     }
 
     // index_data creation, component order - I, A, I, A, .....
-    for (i = 0; i < h; i++)
-        for (j = 0; j < w; j++) {
-            index_data[i*2*w + j*2]     =  src [i*stride + j];
-            index_data[i*2*w + j*2 + 1] = -srca[i*stride + j];
-        }
+#if HAVE_EMMINTRIN_H
+    if (gCpuCaps.hasSSE2) copyosd_SSE2(w, h, src, srca, stride);
+    else
+#endif
+    {
+        unsigned char *dst = index_data;
+        for (i = 0; i < h; i++)
+            for (j = 0; j < w; j++) {
+                *dst++ = src [i*stride + j];
+                *dst++ = -srca[i*stride + j];
+            }
+    }
 
     output_indexed_rect_vid.x0 = x0;
     output_indexed_rect_vid.y0 = y0;


More information about the MPlayer-cvslog mailing list