[MPlayer-dev-eng] [PATCH 7/9] libswscale: Optimize yuv2rgb conversion for avr32
Hans-Christian Egtvedt
hans-christian.egtvedt at atmel.com
Mon Feb 16 17:16:56 CET 2009
Implemented by Ronny Pedersen.
Signed-off-by: Hans-Christian Egtvedt <hans-christian.egtvedt at atmel.com>
---
libswscale/Makefile | 1 +
libswscale/pico-avr32.h | 134 +++++++++++++
libswscale/swscale_internal.h | 3 +
libswscale/yuv2rgb.c | 11 +-
libswscale/yuv2rgb_avr32.c | 413 +++++++++++++++++++++++++++++++++++++++++
5 files changed, 561 insertions(+), 1 deletions(-)
create mode 100644 libswscale/pico-avr32.h
create mode 100644 libswscale/yuv2rgb_avr32.c
diff --git a/libswscale/Makefile b/libswscale/Makefile
index a959661..cd4da7f 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -7,6 +7,7 @@ HEADERS = swscale.h
OBJS = rgb2rgb.o swscale.o swscale_avoption.o
+OBJS-$(ARCH_AVR32) += yuv2rgb_avr32.o
OBJS-$(ARCH_BFIN) += internal_bfin.o swscale_bfin.o yuv2rgb_bfin.o
OBJS-$(CONFIG_GPL) += yuv2rgb.o
OBJS-$(CONFIG_MLIB) += yuv2rgb_mlib.o
diff --git a/libswscale/pico-avr32.h b/libswscale/pico-avr32.h
new file mode 100644
index 0000000..2df5c2e
--- /dev/null
+++ b/libswscale/pico-avr32.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2007-2009 Atmel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * 3. The name of ATMEL may not be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+#ifndef __PICO_H__
+#define __PICO_H__
+
+/* Coprocessor Number */
+#define PICO_CPNO 1
+
+/* Pixel Coprocessor Register file */
+#define PICO_REGVECT_INPIX2 cr0
+#define PICO_REGVECT_INPIX1 cr1
+#define PICO_REGVECT_INPIX0 cr2
+#define PICO_REGVECT_OUTPIX2 cr3
+#define PICO_REGVECT_OUTPIX1 cr4
+#define PICO_REGVECT_OUTPIX0 cr5
+#define PICO_REGVECT_COEFF0_A cr6
+#define PICO_REGVECT_COEFF0_B cr7
+#define PICO_REGVECT_COEFF1_A cr8
+#define PICO_REGVECT_COEFF1_B cr9
+#define PICO_REGVECT_COEFF2_A cr10
+#define PICO_REGVECT_COEFF2_B cr11
+#define PICO_REGVECT_VMU0_OUT cr12
+#define PICO_REGVECT_VMU1_OUT cr13
+#define PICO_REGVECT_VMU2_OUT cr14
+#define PICO_REGVECT_CONFIG cr15
+
+#define PICO_INPIX2 0
+#define PICO_INPIX1 1
+#define PICO_INPIX0 2
+#define PICO_OUTPIX2 3
+#define PICO_OUTPIX1 4
+#define PICO_OUTPIX0 5
+#define PICO_COEFF0_A 6
+#define PICO_COEFF0_B 7
+#define PICO_COEFF1_A 8
+#define PICO_COEFF1_B 9
+#define PICO_COEFF2_A 10
+#define PICO_COEFF2_B 11
+#define PICO_VMU0_OUT 12
+#define PICO_VMU1_OUT 13
+#define PICO_VMU2_OUT 14
+#define PICO_CONFIG 15
+
+/* Config Register */
+#define PICO_COEFF_FRAC_BITS 0
+#define PICO_COEFF_FRAC_BITS_WIDTH 4
+#define PICO_OFFSET_FRAC_BITS 4
+#define PICO_OFFSET_FRAC_BITS_WIDTH 4
+#define PICO_INPUT_MODE 8
+#define PICO_INPUT_MODE_WIDTH 2
+#define PICO_OUTPUT_MODE 10
+
+#define PICO_TRANSFORMATION_MODE 0
+#define PICO_HOR_FILTER_MODE 1
+#define PICO_VERT_FILTER_MODE 2
+
+#define PICO_PLANAR_MODE 1
+#define PICO_PACKED_MODE 0
+
+/* Bits in coefficients */
+#define PICO_COEFF_BITS 12
+
+/* Operation bits */
+#define PICO_USE_ACC (1 << 2)
+#define PICO_SINGLE_VECTOR (1 << 3)
+
+#define __str(x...) #x
+#define __xstr(x...) __str(x)
+
+#define PICO_PUT_W(pico_reg, x) \
+ __builtin_mvrc_w(PICO_CPNO, pico_reg, x);
+#define PICO_GET_W(pico_reg) \
+ __builtin_mvcr_w(PICO_CPNO, pico_reg)
+
+#define PICO_PUT_D(pico_reg, x) \
+ __builtin_mvrc_d(PICO_CPNO, pico_reg, x);
+#define PICO_GET_D(pico_reg) \
+ __builtin_mvcr_d(PICO_CPNO, pico_reg)
+
+#define PICO_STCM_W(ptr, pico_regs...) \
+ asm volatile ("stcm.w\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr));
+#define PICO_STCM_D(ptr, pico_regs...) \
+ asm volatile ("stcm.d\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr));
+
+#define PICO_STCM_W_DEC(ptr, pico_regs...) \
+ asm volatile ("stcm.w\tcp" __xstr(PICO_CPNO) ", --%0," __xstr(pico_regs) : "+r"(ptr));
+#define PICO_STCM_D_DEC(ptr, pico_regs...) \
+ asm volatile ("stcm.d\tcp" __xstr(PICO_CPNO) ", --%0," __xstr(pico_regs) : "+r"(ptr));
+
+#define PICO_LDCM_W(ptr, pico_regs...) \
+ asm volatile ("ldcm.w\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr));
+#define PICO_LDCM_D(ptr, pico_regs...) \
+ asm volatile ("ldcm.d\tcp" __xstr(PICO_CPNO) ", %0," __xstr(pico_regs) :: "r"(ptr));
+
+#define PICO_LDCM_W_INC(ptr, pico_regs...) \
+ asm volatile ("ldcm.w\tcp" __xstr(PICO_CPNO) ", %0++," __xstr(pico_regs) : "+r"(ptr));
+#define PICO_LDCM_D_INC(ptr, pico_regs...) \
+ asm volatile ("ldcm.d\tcp" __xstr(PICO_CPNO) ", %0++," __xstr(pico_regs) : "+r"(ptr));
+
+#define PICO_OP(op, dst_addr, addr0, addr1, addr2) \
+ __builtin_cop(PICO_CPNO, addr0, addr1, addr2, op | dst_addr);
+
+#endif /* __PICO_H__ */
+
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index cf15742..c317a68 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -222,6 +222,9 @@ void altivec_yuv2packedX (SwsContext *c,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, int dstW, int dstY);
+SwsFunc yuv2rgb_init_avr32 (SwsContext *c);
+int yuv2rgb_c_init_tables_avr32 (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation);
+
const char *sws_format_name(int format);
//FIXME replace this with something faster
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index fe90a04..a8bec94 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -520,7 +520,12 @@ SwsFunc sws_yuv2rgb_get_func_ptr (SwsContext *c)
if (t) return t;
}
#endif
-
+#ifdef ARCH_AVR32
+ {
+ SwsFunc t = yuv2rgb_init_avr32(c);
+ if (t) return t;
+ }
+#endif
#if ARCH_BFIN
if (c->flags & SWS_CPU_CAPS_BFIN)
{
@@ -613,6 +618,10 @@ int sws_yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRa
//printf("%lld %lld %lld %lld %lld\n", cy, crv, cbu, cgu, cgv);
oy -= 256*brightness;
+#ifdef ARCH_AVR32
+ yuv2rgb_c_init_tables_avr32 (c, inv_table, fullRange, brightness, contrast, saturation);
+#endif
+
for (i = 0; i < 1024; i++) {
int j;
diff --git a/libswscale/yuv2rgb_avr32.c b/libswscale/yuv2rgb_avr32.c
new file mode 100644
index 0000000..7283e6f
--- /dev/null
+++ b/libswscale/yuv2rgb_avr32.c
@@ -0,0 +1,413 @@
+/*
+ * Copyright (c) 2007 Atmel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * 3. The name of ATMEL may not be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+#include <libavutil/avutil.h>
+
+#include "swscale.h"
+#include "swscale_internal.h"
+#include "pico-avr32.h"
+
+#define RGB(uv_part) \
+ __asm__ volatile ( \
+ "ld.w\t%0, %3[%7:" uv_part " << 2]\n\t" /* tmp = c->table_gV[V] */ \
+ "ld.w\t%1, %4[%8:" uv_part " << 2]\n\t" /* g = c->table_gU[U] */ \
+ "ld.w\t%2, %5[%8:" uv_part " << 2]\n\t" /* b = c->table_bU[U] */ \
+ "add\t%1, %0\n\t" /* g += tmp */\
+ "ld.w\t%0, %6[%7:" uv_part " << 2]" /* r = c->table_rV[V] */ \
+ : "=&r" (r), "=&r" (g), "=&r" (b) \
+ : "r" (&c->table_gV[0]), "r" (&c->table_gU[0]),"r" (&c->table_bU[0]), \
+ "r" (&c->table_rV[0]), "r" (V), "r" (U));
+
+#undef YUV2RGB1
+#define YUV2RGB1(dst, src, y, idx) \
+ { int tmp2; __asm__ volatile ( \
+ "ld.ub\t%0, %3[2*%8]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
+ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 24) & 0xFF] */ \
+ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 24) & 0xFF] */ \
+ "st.b\t%7[6*%8 + 0], %1\n\t" /* dst_1[2] = tmp; */ \
+ "st.b\t%7[6*%8 + 1], %2\n\t" /* dst_1[1] = tmp; */ \
+ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 24) & 0xFF] */ \
+ "ld.ub\t%0, %3[2*%8 + 1]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
+ "st.b\t%7[6*%8 + 2], %1\n\t" /* dst_1[0] = tmp; */ \
+ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 16) & 0xFF] */ \
+ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 16) & 0xFF] */ \
+ "st.b\t%7[6*%8 + 3], %1\n\t" /* dst_1[5] = tmp; */ \
+ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 16) & 0xFF] */ \
+ "st.b\t%7[6*%8 + 4], %2\n\t" /* dst_1[4] = tmp; */ \
+ "st.b\t%7[6*%8 + 5], %1" /* dst_1[3] = tmp; */ \
+ : "=&r" (y), "=&r" (tmp), "=&r" (tmp2) \
+ : "r" (src), "r" (r), "r" (g), "r" (b), "r" (dst), "i" (idx)); }
+
+#undef YUV2RGB2
+#define YUV2RGB2(dst, src, y, idx) \
+ { int tmp2; __asm__ volatile ( \
+ "ld.ub\t%0, %3[2*%8]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
+ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 24) & 0xFF] */ \
+ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 24) & 0xFF] */ \
+ "st.b\t%7[6*%8 + 0], %1\n\t" /* dst_1[2] = tmp; */ \
+ "st.b\t%7[6*%8 + 1], %2\n\t" /* dst_1[1] = tmp; */ \
+ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 24) & 0xFF] */ \
+ "ld.ub\t%0, %3[2*%8 + 1]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
+ "st.b\t%7[6*%8 + 2], %1\n\t" /* dst_1[0] = tmp; */ \
+ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 16) & 0xFF] */ \
+ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 16) & 0xFF] */ \
+ "st.b\t%7[6*%8 + 3], %1\n\t" /* dst_1[5] = tmp; */ \
+ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 16) & 0xFF] */ \
+ "st.b\t%7[6*%8 + 4], %2\n\t" /* dst_1[4] = tmp; */ \
+ "st.b\t%7[6*%8 + 5], %1" /* dst_1[3] = tmp; */ \
+ : "=&r" (y), "=&r" (tmp), "=&r" (tmp2) \
+ : "r" (src), "r" (r), "r" (g), "r" (b), "r" (dst), "i" (idx)); }
+
+
+#undef YUV2BGR1
+#define YUV2BGR1(dst, src, y, idx) \
+ { int tmp2; __asm__ volatile ( \
+ "ld.ub\t%0, %3[2*%8]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
+ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 24) & 0xFF] */ \
+ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 24) & 0xFF] */ \
+ "st.b\t%7[6*%8 + 2], %1\n\t" /* dst_1[2] = tmp; */ \
+ "st.b\t%7[6*%8 + 1], %2\n\t" /* dst_1[1] = tmp; */ \
+ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 24) & 0xFF] */ \
+ "ld.ub\t%0, %3[2*%8 + 1]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
+ "st.b\t%7[6*%8 + 0], %1\n\t" /* dst_1[0] = tmp; */ \
+ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 16) & 0xFF] */ \
+ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 16) & 0xFF] */ \
+ "st.b\t%7[6*%8 + 5], %1\n\t" /* dst_1[5] = tmp; */ \
+ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 16) & 0xFF] */ \
+ "st.b\t%7[6*%8 + 4], %2\n\t" /* dst_1[4] = tmp; */ \
+ "st.b\t%7[6*%8 + 3], %1" /* dst_1[3] = tmp; */ \
+ : "=&r" (y), "=&r" (tmp), "=&r" (tmp2) \
+ : "r" (src), "r" (r), "r" (g), "r" (b), "r" (dst), "i" (idx)); }
+
+#undef YUV2BGR2
+#define YUV2BGR2(dst, src, y, idx) \
+ { int tmp2; __asm__ volatile ( \
+ "ld.ub\t%0, %3[2*%8]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
+ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 24) & 0xFF] */ \
+ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 24) & 0xFF] */ \
+ "st.b\t%7[6*%8 + 2], %1\n\t" /* dst_1[2] = tmp; */ \
+ "st.b\t%7[6*%8 + 1], %2\n\t" /* dst_1[1] = tmp; */ \
+ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 24) & 0xFF] */ \
+ "ld.ub\t%0, %3[2*%8 + 1]\n\t" /* Y = ((uint32_t*)py_1)[0] */ \
+ "st.b\t%7[6*%8 + 0], %1\n\t" /* dst_1[0] = tmp; */ \
+ "ld.ub\t%1, %4[%0]\n\t" /* tmp = r[(Y >> 16) & 0xFF] */ \
+ "ld.ub\t%2, %5[%0]\n\t" /* tmp = g[(Y >> 16) & 0xFF] */ \
+ "st.b\t%7[6*%8 + 5], %1\n\t" /* dst_1[5] = tmp; */ \
+ "ld.ub\t%1, %6[%0]\n\t" /* tmp = b[(Y >> 16) & 0xFF] */ \
+ "st.b\t%7[6*%8 + 4], %2\n\t" /* dst_1[4] = tmp; */ \
+ "st.b\t%7[6*%8 + 3], %1" /* dst_1[3] = tmp; */ \
+ : "=&r" (y), "=&r" (tmp), "=&r" (tmp2) \
+ : "r" (src), "r" (r), "r" (g), "r" (b), "r" (dst), "i" (idx)); }
+
+int yuv2bgr24_avr32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dst[], int dstStride[]){
+ int y;
+
+ if(c->srcFormat == PIX_FMT_YUV422P){
+ srcStride[1] *= 2;
+ srcStride[2] *= 2;
+ }
+
+
+ for(y=0; y<srcSliceH; y+=2){
+ uint8_t *dst_1= (uint8_t*)(dst[0] + (y+srcSliceY )*dstStride[0]);
+ uint8_t *dst_2= (uint8_t*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);
+ uint32_t *r, *g, *b;
+ uint8_t *py_1= src[0] + y*srcStride[0];
+ uint8_t *py_2= py_1 + srcStride[0];
+ uint8_t *pu= src[1] + (y>>1)*srcStride[1];
+ uint8_t *pv= src[2] + (y>>1)*srcStride[2];
+ unsigned int h_size= c->dstW>>3;
+ while (h_size--) {
+ uint32_t U, V, Y1, Y2, tmp;
+ U = ((uint32_t*)pu)[0];
+ V = ((uint32_t*)pv)[0];
+
+ RGB("t")
+ YUV2BGR1(dst_1, py_1, Y1, 0)
+ YUV2BGR1(dst_2, py_2, Y2, 0)
+
+ RGB("u")
+ YUV2BGR2(dst_1, py_1, Y1, 1)
+ YUV2BGR2(dst_2, py_2, Y2, 1)
+
+ RGB("l")
+ YUV2BGR1(dst_1, py_1, Y1, 2)
+ YUV2BGR1(dst_2, py_2, Y2, 2)
+
+ RGB("b")
+ YUV2BGR2(dst_1, py_1, Y1, 3)
+ YUV2BGR2(dst_2, py_2, Y2, 3)
+
+ pu += 4;
+ pv += 4;
+ py_1 += 8;
+ py_2 += 8;
+ dst_1 += 24;
+ dst_2 += 24;
+ }
+ }
+ return srcSliceH;
+}
+
+
+
+static int yuv2rgb24_avr32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dst[], int dstStride[]){
+ int y;
+
+ if(c->srcFormat == PIX_FMT_YUV422P){
+ srcStride[1] *= 2;
+ srcStride[2] *= 2;
+ }
+ for(y=0; y<srcSliceH; y+=2){
+ uint8_t *dst_1= (uint8_t*)(dst[0] + (y+srcSliceY )*dstStride[0]);
+ uint8_t *dst_2= (uint8_t*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);
+ uint8_t *r, *g, *b;
+ uint8_t *py_1= src[0] + y*srcStride[0];
+ uint8_t *py_2= py_1 + srcStride[0];
+ uint8_t *pu= src[1] + (y>>1)*srcStride[1];
+ uint8_t *pv= src[2] + (y>>1)*srcStride[2];
+ unsigned int h_size= c->dstW>>3;
+ while (h_size--) {
+ uint32_t U, V, Y1, Y2, tmp;
+ U = ((uint32_t*)pu)[0];
+ V = ((uint32_t*)pv)[0];
+
+ RGB("t")
+ YUV2RGB1(dst_1, py_1, Y1, 0)
+ YUV2RGB1(dst_2, py_2, Y2, 0)
+
+ RGB("u")
+ YUV2RGB2(dst_1, py_1, Y1, 1)
+ YUV2RGB2(dst_2, py_2, Y2, 1)
+
+ RGB("l")
+ YUV2RGB1(dst_1, py_1, Y1, 2)
+ YUV2RGB1(dst_2, py_2, Y2, 2)
+
+ RGB("b")
+ YUV2RGB2(dst_1, py_1, Y1, 3)
+ YUV2RGB2(dst_2, py_2, Y2, 3)
+
+ pu += 4;
+ pv += 4;
+ py_1 += 8;
+ py_2 += 8;
+ dst_1 += 24;
+ dst_2 += 24;
+ }
+ }
+ return srcSliceH;
+}
+
+#define SCALE(x, bits) (((x) + ( 1 << (bits - 1))) >> bits)
+#define COEFF_FRAC_BITS 9
+#define OFFSET_FRAC_BITS 2
+
+/* Coefficients used in the pico */
+static struct {
+ short coeff2_2;
+ short coeff2_3;
+ short coeff2_0;
+ short coeff2_1;
+ short coeff1_2;
+ short coeff1_3;
+ short coeff1_0;
+ short coeff1_1;
+ short coeff0_2;
+ short coeff0_3;
+ short coeff0_0;
+ short coeff0_1;
+} pico_coeff;
+
+
+static int yuv2bgr24_avr32_pico(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dst[], int dstStride[]){
+ int y;
+
+ /* Initialize pico */
+ PICO_LDCM_D(&pico_coeff,
+ PICO_REGVECT_COEFF0_A, PICO_REGVECT_COEFF0_B,
+ PICO_REGVECT_COEFF1_A, PICO_REGVECT_COEFF1_B,
+ PICO_REGVECT_COEFF2_A, PICO_REGVECT_COEFF2_B);
+
+ PICO_PUT_W(PICO_CONFIG,
+ (PICO_PACKED_MODE << PICO_OUTPUT_MODE
+ | PICO_TRANSFORMATION_MODE << PICO_INPUT_MODE
+ | OFFSET_FRAC_BITS << PICO_OFFSET_FRAC_BITS
+ | COEFF_FRAC_BITS << PICO_COEFF_FRAC_BITS));
+
+
+ if(c->srcFormat == PIX_FMT_YUV422P){
+ srcStride[1] *= 2;
+ srcStride[2] *= 2;
+ }
+
+ for(y=0; y<srcSliceH; y+=2){
+ uint8_t *dst_1= (uint8_t*)(dst[0] + (y+srcSliceY )*dstStride[0]);
+ uint8_t *dst_2= (uint8_t*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);
+ uint8_t *py_1= src[0] + y*srcStride[0];
+ uint8_t *py_2= py_1 + srcStride[0];
+ uint8_t *pu= src[1] + (y>>1)*srcStride[1];
+ uint8_t *pv= src[2] + (y>>1)*srcStride[2];
+ unsigned int h_size= c->dstW>>3;
+ int *py_1_int = (int *)py_1;
+ int *py_2_int = (int *)py_2;
+ int *pu_int = (int *)pu;
+ int *pv_int = (int *)pv;
+ while (h_size--) {
+ PICO_PUT_W(PICO_INPIX0, *py_1_int++);
+ PICO_PUT_W(PICO_INPIX1, *pu_int++);
+ PICO_PUT_W(PICO_INPIX2, *pv_int++);
+ PICO_OP(0, 0, 0, 4, 8);
+ PICO_OP(0, 1, 1, 4, 8);
+ PICO_OP(0, 2, 2, 5, 9);
+ PICO_OP(0, 3, 3, 5, 9);
+ PICO_PUT_W(PICO_INPIX0, *py_1_int++);
+ PICO_STCM_W(dst_1, PICO_REGVECT_OUTPIX2, PICO_REGVECT_OUTPIX1, PICO_REGVECT_OUTPIX0);
+ PICO_OP(0, 0, 0, 6, 10);
+ PICO_OP(0, 1, 1, 6, 10);
+ PICO_OP(0, 2, 2, 7, 11);
+ PICO_OP(0, 3, 3, 7, 11);
+ PICO_PUT_W(PICO_INPIX0, *py_2_int++);
+ PICO_STCM_W(dst_1 + 12, PICO_REGVECT_OUTPIX2, PICO_REGVECT_OUTPIX1, PICO_REGVECT_OUTPIX0);
+
+ PICO_OP(0, 0, 0, 4, 8);
+ PICO_OP(0, 1, 1, 4, 8);
+ PICO_OP(0, 2, 2, 5, 9);
+ PICO_OP(0, 3, 3, 5, 9);
+ PICO_PUT_W(PICO_INPIX0, *py_2_int++);
+ PICO_STCM_W(dst_2, PICO_REGVECT_OUTPIX2, PICO_REGVECT_OUTPIX1, PICO_REGVECT_OUTPIX0);
+ PICO_OP(0, 0, 0, 6, 10);
+ PICO_OP(0, 1, 1, 6, 10);
+ PICO_OP(0, 2, 2, 7, 11);
+ PICO_OP(0, 3, 3, 7, 11);
+ PICO_STCM_W(dst_2 + 12, PICO_REGVECT_OUTPIX2, PICO_REGVECT_OUTPIX1, PICO_REGVECT_OUTPIX0);
+
+ dst_1 += 24;
+ dst_2 += 24;
+ }
+ }
+ return srcSliceH;
+}
+
+extern int avr32_use_pico;
+
+SwsFunc yuv2rgb_init_avr32 (SwsContext *c){
+ switch(c->dstFormat){
+ case PIX_FMT_BGR24:
+ {
+ if ( avr32_use_pico ){
+ av_log(c, AV_LOG_INFO, "AVR32 BGR24: Using PICO for color space conversion\n");
+ return yuv2bgr24_avr32_pico;
+ } else {
+ av_log(c, AV_LOG_INFO, "AVR32 BGR24: Using optimized color space conversion\n");
+ return yuv2bgr24_avr32;
+ }
+ }
+ break;
+ case PIX_FMT_RGB24:
+ {
+ if ( avr32_use_pico ){
+ av_log(c, AV_LOG_INFO, "AVR32 RGB24: Using PICO for color space conversion\n");
+ return yuv2bgr24_avr32_pico;
+ } else {
+ av_log(c, AV_LOG_INFO, "AVR32 RGB24: Using optimized color space conversion\n");
+ return yuv2rgb24_avr32;
+ }
+ }
+ }
+ return NULL;
+}
+
+
+int yuv2rgb_c_init_tables_avr32 (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation){
+ const int isRgb = (c->dstFormat == PIX_FMT_RGB24);
+
+ int64_t crv = inv_table[0];
+ int64_t cbu = inv_table[1];
+ int64_t cgu = -inv_table[2];
+ int64_t cgv = -inv_table[3];
+ int64_t cy = 1<<16;
+ int64_t oy = 0;
+
+ if(!fullRange){
+ cy= (cy*255) / 219;
+ oy= 16<<16;
+ }
+
+ cy = (cy *contrast )>>16;
+ crv= (crv*contrast * saturation)>>32;
+ cbu= (cbu*contrast * saturation)>>32;
+ cgu= (cgu*contrast * saturation)>>32;
+ cgv= (cgv*contrast * saturation)>>32;
+
+ oy -= 256*brightness;
+
+ pico_coeff.coeff1_0 = SCALE(cy, (16 - COEFF_FRAC_BITS)); /* G <- Y */
+ pico_coeff.coeff1_1 = SCALE(cgu, (16 - COEFF_FRAC_BITS)); /* G <- U */
+ pico_coeff.coeff1_2 = SCALE(cgv, (16 - COEFF_FRAC_BITS)); /* G <- V */
+ pico_coeff.coeff1_3 = (SCALE((-128*cgu - 128*cgv - 16*cy), (16 - OFFSET_FRAC_BITS))
+ + /*0.5*/(1 << (OFFSET_FRAC_BITS-1))); /* G offset */
+
+ if ( isRgb ){
+ pico_coeff.coeff0_0 = SCALE(cy, (16 - COEFF_FRAC_BITS)); /* R <- Y */
+ pico_coeff.coeff0_1 = 0; /* R <- U */
+ pico_coeff.coeff0_2 = SCALE(crv, (16 - COEFF_FRAC_BITS)); /* R <- V */
+ pico_coeff.coeff0_3 = (SCALE((-128*crv - 16*cy), (16 - OFFSET_FRAC_BITS))
+ + /*0.5*/(1 << (OFFSET_FRAC_BITS-1))); /* R offset */
+
+ pico_coeff.coeff2_0 = SCALE(cy, (16 - COEFF_FRAC_BITS)); /* B <- Y */
+ pico_coeff.coeff2_1 = SCALE(cbu, (16 - COEFF_FRAC_BITS)); /* B <- U */
+ pico_coeff.coeff2_2 = 0; /* B <- V */
+ pico_coeff.coeff2_3 = (SCALE((-128*cbu - 16*cy), (16 - OFFSET_FRAC_BITS))
+ + /*0.5*/(1 << (OFFSET_FRAC_BITS-1)));/* B offset */
+ } else {
+ pico_coeff.coeff2_0 = SCALE(cy, (16 - COEFF_FRAC_BITS)); /* R <- Y */
+ pico_coeff.coeff2_1 = 0; /* R <- U */
+ pico_coeff.coeff2_2 = SCALE(crv, (16 - COEFF_FRAC_BITS)); /* R <- V */
+ pico_coeff.coeff2_3 = (SCALE((-128*crv - 16*cy), (16 - OFFSET_FRAC_BITS))
+ + /*0.5*/(1 << (OFFSET_FRAC_BITS-1))); /* R offset */
+
+ pico_coeff.coeff0_0 = SCALE(cy, (16 - COEFF_FRAC_BITS)); /* B <- Y */
+ pico_coeff.coeff0_1 = SCALE(cbu, (16 - COEFF_FRAC_BITS)); /* B <- U */
+ pico_coeff.coeff0_2 = 0; /* B <- V */
+ pico_coeff.coeff0_3 = (SCALE((-128*cbu - 16*cy), (16 - OFFSET_FRAC_BITS))
+ + /*0.5*/(1 << (OFFSET_FRAC_BITS-1))); /* B offset */
+ }
+
+ return 0;
+}
+
+#undef RGB
+
--
1.5.6.3
More information about the MPlayer-dev-eng
mailing list