[FFmpeg-cvslog] swscale/x86/yuv2rgb: Detemplatize
Ramiro Polla
git at videolan.org
Wed Jul 10 13:27:00 EEST 2024
ffmpeg | branch: master | Ramiro Polla <ramiro.polla at gmail.com> | Thu Jul 4 11:40:18 2024 +0200| [ac6263945ae802605635c99534d23c3c681a0f34] | committer: Ramiro Polla
swscale/x86/yuv2rgb: Detemplatize
Every function in yuv2rgb_template.c is only compiled exactly
once, so detemplatize it.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ac6263945ae802605635c99534d23c3c681a0f34
---
libswscale/x86/yuv2rgb.c | 167 ++++++++++++++++++++++++++++++++-
libswscale/x86/yuv2rgb_template.c | 188 --------------------------------------
2 files changed, 162 insertions(+), 193 deletions(-)
diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index ddc7cca2c8..68e903c6ad 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -1,7 +1,8 @@
/*
* software YUV to RGB converter
*
- * Copyright (C) 2009 Konstantin Shishkov
+ * Copyright (C) 2001-2007 Michael Niedermayer
+ * Copyright (C) 2009-2010 Konstantin Shishkov
*
* MMX/MMXEXT template stuff (needed for fast movntq support),
* 1,4,8bpp support and context / deglobalize stuff
@@ -39,10 +40,166 @@
#if HAVE_X86ASM
-//SSSE3 versions
-#undef RENAME
-#define RENAME(a) a ## _ssse3
-#include "yuv2rgb_template.c"
+#define YUV2RGB_LOOP(depth) \
+ h_size = (c->dstW + 7) & ~7; \
+ if (h_size * depth > FFABS(dstStride[0])) \
+ h_size -= 8; \
+ \
+ vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \
+ \
+ for (y = 0; y < srcSliceH; y++) { \
+ uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \
+ const uint8_t *py = src[0] + y * srcStride[0]; \
+ const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
+ const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
+ x86_reg index = -h_size / 2; \
+
+extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
+ const uint8_t *pv_index, const uint64_t *pointer_c_dither,
+ const uint8_t *py_2index);
+extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
+ const uint8_t *pv_index, const uint64_t *pointer_c_dither,
+ const uint8_t *py_2index);
+
+extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
+ const uint8_t *pv_index, const uint64_t *pointer_c_dither,
+ const uint8_t *py_2index);
+extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
+ const uint8_t *pv_index, const uint64_t *pointer_c_dither,
+ const uint8_t *py_2index);
+extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
+ const uint8_t *pv_index, const uint64_t *pointer_c_dither,
+ const uint8_t *py_2index);
+extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
+ const uint8_t *pv_index, const uint64_t *pointer_c_dither,
+ const uint8_t *py_2index);
+extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
+ const uint8_t *pv_index, const uint64_t *pointer_c_dither,
+ const uint8_t *py_2index, const uint8_t *pa_2index);
+extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
+ const uint8_t *pv_index, const uint64_t *pointer_c_dither,
+ const uint8_t *py_2index, const uint8_t *pa_2index);
+
+static inline int yuv420_rgb15_ssse3(SwsContext *c, const uint8_t *src[],
+ int srcStride[],
+ int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[])
+{
+ int y, h_size, vshift;
+
+ YUV2RGB_LOOP(2)
+
+ c->blueDither = ff_dither8[y & 1];
+ c->greenDither = ff_dither8[y & 1];
+ c->redDither = ff_dither8[(y + 1) & 1];
+
+ ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
+ }
+ return srcSliceH;
+}
+
+static inline int yuv420_rgb16_ssse3(SwsContext *c, const uint8_t *src[],
+ int srcStride[],
+ int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[])
+{
+ int y, h_size, vshift;
+
+ YUV2RGB_LOOP(2)
+
+ c->blueDither = ff_dither8[y & 1];
+ c->greenDither = ff_dither4[y & 1];
+ c->redDither = ff_dither8[(y + 1) & 1];
+
+ ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
+ }
+ return srcSliceH;
+}
+
+static inline int yuv420_rgb32_ssse3(SwsContext *c, const uint8_t *src[],
+ int srcStride[],
+ int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[])
+{
+ int y, h_size, vshift;
+
+ YUV2RGB_LOOP(4)
+
+ ff_yuv_420_rgb32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
+ }
+ return srcSliceH;
+}
+
+static inline int yuv420_bgr32_ssse3(SwsContext *c, const uint8_t *src[],
+ int srcStride[],
+ int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[])
+{
+ int y, h_size, vshift;
+
+ YUV2RGB_LOOP(4)
+
+ ff_yuv_420_bgr32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
+ }
+ return srcSliceH;
+}
+
+static inline int yuva420_rgb32_ssse3(SwsContext *c, const uint8_t *src[],
+ int srcStride[],
+ int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[])
+{
+ int y, h_size, vshift;
+ YUV2RGB_LOOP(4)
+
+ const uint8_t *pa = src[3] + y * srcStride[3];
+ ff_yuva_420_rgb32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index);
+ }
+ return srcSliceH;
+}
+
+static inline int yuva420_bgr32_ssse3(SwsContext *c, const uint8_t *src[],
+ int srcStride[],
+ int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[])
+{
+ int y, h_size, vshift;
+
+ YUV2RGB_LOOP(4)
+
+ const uint8_t *pa = src[3] + y * srcStride[3];
+ ff_yuva_420_bgr32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index);
+ }
+ return srcSliceH;
+}
+
+static inline int yuv420_rgb24_ssse3(SwsContext *c, const uint8_t *src[],
+ int srcStride[],
+ int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[])
+{
+ int y, h_size, vshift;
+
+ YUV2RGB_LOOP(3)
+
+ ff_yuv_420_rgb24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
+ }
+ return srcSliceH;
+}
+
+static inline int yuv420_bgr24_ssse3(SwsContext *c, const uint8_t *src[],
+ int srcStride[],
+ int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[])
+{
+ int y, h_size, vshift;
+
+ YUV2RGB_LOOP(3)
+
+ ff_yuv_420_bgr24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
+ }
+ return srcSliceH;
+}
#endif /* HAVE_X86ASM */
diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c
deleted file mode 100644
index abaf80eec2..0000000000
--- a/libswscale/x86/yuv2rgb_template.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * software YUV to RGB converter
- *
- * Copyright (C) 2001-2007 Michael Niedermayer
- * (c) 2010 Konstantin Shishkov
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdint.h>
-
-#include "libavutil/x86/asm.h"
-#include "libswscale/swscale_internal.h"
-
-#define YUV2RGB_LOOP(depth) \
- h_size = (c->dstW + 7) & ~7; \
- if (h_size * depth > FFABS(dstStride[0])) \
- h_size -= 8; \
- \
- vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \
- \
- for (y = 0; y < srcSliceH; y++) { \
- uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \
- const uint8_t *py = src[0] + y * srcStride[0]; \
- const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
- const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
- x86_reg index = -h_size / 2; \
-
-extern void RENAME(ff_yuv_420_rgb24)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
- const uint8_t *pv_index, const uint64_t *pointer_c_dither,
- const uint8_t *py_2index);
-extern void RENAME(ff_yuv_420_bgr24)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
- const uint8_t *pv_index, const uint64_t *pointer_c_dither,
- const uint8_t *py_2index);
-
-extern void RENAME(ff_yuv_420_rgb15)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
- const uint8_t *pv_index, const uint64_t *pointer_c_dither,
- const uint8_t *py_2index);
-extern void RENAME(ff_yuv_420_rgb16)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
- const uint8_t *pv_index, const uint64_t *pointer_c_dither,
- const uint8_t *py_2index);
-extern void RENAME(ff_yuv_420_rgb32)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
- const uint8_t *pv_index, const uint64_t *pointer_c_dither,
- const uint8_t *py_2index);
-extern void RENAME(ff_yuv_420_bgr32)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
- const uint8_t *pv_index, const uint64_t *pointer_c_dither,
- const uint8_t *py_2index);
-extern void RENAME(ff_yuva_420_rgb32)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
- const uint8_t *pv_index, const uint64_t *pointer_c_dither,
- const uint8_t *py_2index, const uint8_t *pa_2index);
-extern void RENAME(ff_yuva_420_bgr32)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
- const uint8_t *pv_index, const uint64_t *pointer_c_dither,
- const uint8_t *py_2index, const uint8_t *pa_2index);
-
-static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[],
- int srcStride[],
- int srcSliceY, int srcSliceH,
- uint8_t *dst[], int dstStride[])
-{
- int y, h_size, vshift;
-
- YUV2RGB_LOOP(2)
-
- c->blueDither = ff_dither8[y & 1];
- c->greenDither = ff_dither8[y & 1];
- c->redDither = ff_dither8[(y + 1) & 1];
-
- RENAME(ff_yuv_420_rgb15)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
- }
- return srcSliceH;
-}
-
-static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
- int srcStride[],
- int srcSliceY, int srcSliceH,
- uint8_t *dst[], int dstStride[])
-{
- int y, h_size, vshift;
-
- YUV2RGB_LOOP(2)
-
- c->blueDither = ff_dither8[y & 1];
- c->greenDither = ff_dither4[y & 1];
- c->redDither = ff_dither8[(y + 1) & 1];
-
- RENAME(ff_yuv_420_rgb16)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
- }
- return srcSliceH;
-}
-
-static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
- int srcStride[],
- int srcSliceY, int srcSliceH,
- uint8_t *dst[], int dstStride[])
-{
- int y, h_size, vshift;
-
- YUV2RGB_LOOP(4)
-
- RENAME(ff_yuv_420_rgb32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
- }
- return srcSliceH;
-}
-
-static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
- int srcStride[],
- int srcSliceY, int srcSliceH,
- uint8_t *dst[], int dstStride[])
-{
- int y, h_size, vshift;
-
- YUV2RGB_LOOP(4)
-
- RENAME(ff_yuv_420_bgr32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
- }
- return srcSliceH;
-}
-
-static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
- int srcStride[],
- int srcSliceY, int srcSliceH,
- uint8_t *dst[], int dstStride[])
-{
- int y, h_size, vshift;
- YUV2RGB_LOOP(4)
-
- const uint8_t *pa = src[3] + y * srcStride[3];
- RENAME(ff_yuva_420_rgb32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index);
- }
- return srcSliceH;
-}
-
-static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
- int srcStride[],
- int srcSliceY, int srcSliceH,
- uint8_t *dst[], int dstStride[])
-{
- int y, h_size, vshift;
-
- YUV2RGB_LOOP(4)
-
- const uint8_t *pa = src[3] + y * srcStride[3];
- RENAME(ff_yuva_420_bgr32)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index);
- }
- return srcSliceH;
-}
-
-static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[],
- int srcStride[],
- int srcSliceY, int srcSliceH,
- uint8_t *dst[], int dstStride[])
-{
- int y, h_size, vshift;
-
- YUV2RGB_LOOP(3)
-
- RENAME(ff_yuv_420_rgb24)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
- }
- return srcSliceH;
-}
-
-static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
- int srcStride[],
- int srcSliceY, int srcSliceH,
- uint8_t *dst[], int dstStride[])
-{
- int y, h_size, vshift;
-
- YUV2RGB_LOOP(3)
-
- RENAME(ff_yuv_420_bgr24)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
- }
- return srcSliceH;
-}
More information about the ffmpeg-cvslog
mailing list