[FFmpeg-cvslog] swscale: [LA] Optimize swscale funcs in input.c
Shiyou Yin
git at videolan.org
Fri Apr 12 00:57:07 EEST 2024
ffmpeg | branch: master | Shiyou Yin <yinshiyou-hf at loongson.cn> | Sat Mar 16 11:03:33 2024 +0800| [2a7d622ddd0394f20de06b5f1da2f3c3cbc90f6f] | committer: Michael Niedermayer
swscale: [LA] Optimize swscale funcs in input.c
Optimized 7 funcs with LSX and LASX:
1. yuy2ToUV_c
2. yvy2ToUV_c
3. uyvyToUV_c
4. nv12ToUV_c
5. nv21ToUV_c
6. abgrToA_c
7. rgbaToA_c
Reviewed-by: colleague of Shiyou Yin
Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2a7d622ddd0394f20de06b5f1da2f3c3cbc90f6f
---
libswscale/loongarch/Makefile | 1 +
libswscale/loongarch/input.S | 495 ++++++++++++++++++++++++++
libswscale/loongarch/input_lasx.c | 43 +++
libswscale/loongarch/input_lsx.c | 65 ++++
libswscale/loongarch/swscale_init_loongarch.c | 20 +-
libswscale/loongarch/swscale_loongarch.h | 46 +++
6 files changed, 652 insertions(+), 18 deletions(-)
diff --git a/libswscale/loongarch/Makefile b/libswscale/loongarch/Makefile
index c35ba309a4..7ba11d492e 100644
--- a/libswscale/loongarch/Makefile
+++ b/libswscale/loongarch/Makefile
@@ -9,4 +9,5 @@ LSX-OBJS-$(CONFIG_SWSCALE) += loongarch/swscale.o \
loongarch/input.o \
loongarch/output.o \
loongarch/output_lsx.o \
+ loongarch/input_lsx.o \
loongarch/yuv2rgb_lsx.o
diff --git a/libswscale/loongarch/input.S b/libswscale/loongarch/input.S
index d01f7384b1..717592b004 100644
--- a/libswscale/loongarch/input.S
+++ b/libswscale/loongarch/input.S
@@ -283,3 +283,498 @@ function planar_rgb_to_uv_lsx
ld.d s3, sp, 16
addi.d sp, sp, 24
endfunc
+
+/*
+ * void yuy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ * const uint8_t *src2, int width, uint32_t *unused, void *opq)
+ */
+function yuy2ToUV_lsx
+ andi t0, a5, 7
+ srli.d a5, a5, 3
+ beqz a5, 2f
+1:
+ vld vr0, a3, 1
+ vld vr1, a3, 17
+ addi.d a5, a5, -1
+ addi.d a3, a3, 32
+ vpickev.b vr2, vr1, vr0
+ vpickev.b vr0, vr2, vr2
+ vpickod.b vr1, vr2, vr2
+ fst.d f0, a0, 0
+ fst.d f1, a1, 0
+ addi.d a0, a0, 8
+ addi.d a1, a1, 8
+ bnez a5, 1b
+2:
+ beqz t0, 4f
+3:
+ ld.b t1, a3, 1
+ ld.b t2, a3, 3
+ addi.d a3, a3, 4
+ addi.d t0, t0, -1
+ st.b t1, a0, 0
+ st.b t2, a1, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ bnez t0, 3b
+4:
+endfunc
+
+function yuy2ToUV_lasx
+ andi t0, a5, 15
+ srli.d a5, a5, 4
+ beqz a5, 2f
+1:
+ xvld xr0, a3, 1
+ xvld xr1, a3, 33
+ addi.d a5, a5, -1
+ addi.d a3, a3, 64
+ xvpickev.b xr2, xr1, xr0
+ xvpermi.d xr2, xr2, 0xd8
+ xvpickev.b xr0, xr2, xr2
+ xvpermi.d xr0, xr0, 0xd8
+ xvpickod.b xr1, xr2, xr2
+ xvpermi.d xr1, xr1, 0xd8
+ vst vr0, a0, 0
+ vst vr1, a1, 0
+ addi.d a0, a0, 16
+ addi.d a1, a1, 16
+ bnez a5, 1b
+2:
+ beqz t0, 4f
+3:
+ ld.b t1, a3, 1
+ ld.b t2, a3, 3
+ addi.d a3, a3, 4
+ addi.d t0, t0, -1
+ st.b t1, a0, 0
+ st.b t2, a1, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ bnez t0, 3b
+4:
+endfunc
+
+/*
+ * void yvy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ * const uint8_t *src2, int width, uint32_t *unused, void *opq)
+ */
+function yvy2ToUV_lsx
+ andi t0, a5, 7
+ srli.d a5, a5, 3
+ beqz a5, 2f
+1:
+ vld vr0, a3, 1
+ vld vr1, a3, 17
+ addi.d a5, a5, -1
+ addi.d a3, a3, 32
+ vpickev.b vr2, vr1, vr0
+ vpickev.b vr0, vr2, vr2
+ vpickod.b vr1, vr2, vr2
+ fst.d f0, a1, 0
+ fst.d f1, a0, 0
+ addi.d a0, a0, 8
+ addi.d a1, a1, 8
+ bnez a5, 1b
+2:
+ beqz t0, 4f
+3:
+ ld.b t1, a3, 1
+ ld.b t2, a3, 3
+ addi.d a3, a3, 4
+ addi.d t0, t0, -1
+ st.b t1, a1, 0
+ st.b t2, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ bnez t0, 3b
+4:
+endfunc
+
+function yvy2ToUV_lasx
+ andi t0, a5, 15
+ srli.d a5, a5, 4
+ beqz a5, 2f
+1:
+ xvld xr0, a3, 1
+ xvld xr1, a3, 33
+ addi.d a5, a5, -1
+ addi.d a3, a3, 64
+ xvpickev.b xr2, xr1, xr0
+ xvpermi.d xr2, xr2, 0xd8
+ xvpickev.b xr0, xr2, xr2
+ xvpermi.d xr0, xr0, 0xd8
+ xvpickod.b xr1, xr2, xr2
+ xvpermi.d xr1, xr1, 0xd8
+ vst vr0, a1, 0
+ vst vr1, a0, 0
+ addi.d a0, a0, 16
+ addi.d a1, a1, 16
+ bnez a5, 1b
+2:
+ beqz t0, 4f
+3:
+ ld.b t1, a3, 1
+ ld.b t2, a3, 3
+ addi.d a3, a3, 4
+ addi.d t0, t0, -1
+ st.b t1, a1, 0
+ st.b t2, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ bnez t0, 3b
+4:
+endfunc
+
+/*
+ * void uyvyToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ * const uint8_t *src2, int width, uint32_t *unused, void *opq)
+ */
+function uyvyToUV_lsx
+ andi t0, a5, 7
+ srli.d a5, a5, 3
+ beqz a5, 2f
+1:
+ vld vr0, a3, 0
+ vld vr1, a3, 16
+ addi.d a5, a5, -1
+ addi.d a3, a3, 32
+ vpickev.b vr2, vr1, vr0
+ vpickev.b vr0, vr2, vr2
+ vpickod.b vr1, vr2, vr2
+ fst.d f0, a0, 0
+ fst.d f1, a1, 0
+ addi.d a0, a0, 8
+ addi.d a1, a1, 8
+ bnez a5, 1b
+2:
+ beqz t0, 4f
+3:
+ ld.b t1, a3, 1
+ ld.b t2, a3, 3
+ addi.d a3, a3, 4
+ addi.d t0, t0, -1
+ st.b t1, a0, 0
+ st.b t2, a1, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ bnez t0, 3b
+4:
+endfunc
+
+function uyvyToUV_lasx
+ andi t0, a5, 15
+ srli.d a5, a5, 4
+ beqz a5, 2f
+1:
+ xvld xr0, a3, 0
+ xvld xr1, a3, 32
+ addi.d a5, a5, -1
+ addi.d a3, a3, 64
+ xvpickev.b xr2, xr1, xr0
+ xvpermi.d xr2, xr2, 0xd8
+ xvpickev.b xr0, xr2, xr2
+ xvpermi.d xr0, xr0, 0xd8
+ xvpickod.b xr1, xr2, xr2
+ xvpermi.d xr1, xr1, 0xd8
+ vst vr0, a0, 0
+ vst vr1, a1, 0
+ addi.d a0, a0, 16
+ addi.d a1, a1, 16
+ bnez a5, 1b
+2:
+ beqz t0, 4f
+3:
+ ld.b t1, a3, 1
+ ld.b t2, a3, 3
+ addi.d a3, a3, 4
+ addi.d t0, t0, -1
+ st.b t1, a0, 0
+ st.b t2, a1, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ bnez t0, 3b
+4:
+endfunc
+
+/*
+ * void nv12ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ * const uint8_t *src2, int width, uint32_t *unused, void *opq)
+ */
+function nv12ToUV_lsx
+ andi t0, a5, 15
+ srli.d a5, a5, 4
+ beqz a5, 2f
+1:
+ vld vr0, a3, 0
+ vld vr1, a3, 16
+ addi.d a5, a5, -1
+ addi.d a3, a3, 32
+ vpickev.b vr2, vr1, vr0
+ vpickod.b vr3, vr1, vr0
+ vst vr2, a0, 0
+ vst vr3, a1, 0
+ addi.d a0, a0, 16
+ addi.d a1, a1, 16
+ bnez a5, 1b
+2:
+ beqz t0, 4f
+3:
+ ld.b t1, a3, 0
+ ld.b t2, a3, 1
+ addi.d a3, a3, 2
+ addi.d t0, t0, -1
+ st.b t1, a0, 0
+ st.b t2, a1, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ bnez t0, 3b
+4:
+endfunc
+
+function nv12ToUV_lasx
+ andi t0, a5, 31
+ srli.d a5, a5, 5
+ beqz a5, 2f
+1:
+ xvld xr0, a3, 0
+ xvld xr1, a3, 32
+ addi.d a5, a5, -1
+ addi.d a3, a3, 64
+ xvpickev.b xr2, xr1, xr0
+ xvpickod.b xr3, xr1, xr0
+ xvpermi.d xr2, xr2, 0xd8
+ xvpermi.d xr3, xr3, 0xd8
+ xvst xr2, a0, 0
+ xvst xr3, a1, 0
+ addi.d a0, a0, 32
+ addi.d a1, a1, 32
+ bnez a5, 1b
+2:
+ beqz t0, 4f
+3:
+ ld.b t1, a3, 0
+ ld.b t2, a3, 1
+ addi.d a3, a3, 2
+ addi.d t0, t0, -1
+ st.b t1, a0, 0
+ st.b t2, a1, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ bnez t0, 3b
+4:
+endfunc
+
+/*
+ * void nv21ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ * const uint8_t *src2, int width, uint32_t *unused, void *opq)
+ */
+function nv21ToUV_lsx
+ andi t0, a5, 15
+ srli.d a5, a5, 4
+ beqz a5, 2f
+1:
+ vld vr0, a3, 0
+ vld vr1, a3, 16
+ addi.d a5, a5, -1
+ addi.d a3, a3, 32
+ vpickev.b vr2, vr1, vr0
+ vpickod.b vr3, vr1, vr0
+ vst vr2, a1, 0
+ vst vr3, a0, 0
+ addi.d a0, a0, 16
+ addi.d a1, a1, 16
+ bnez a5, 1b
+2:
+ beqz t0, 4f
+3:
+ ld.b t1, a3, 0
+ ld.b t2, a3, 1
+ addi.d a3, a3, 2
+ addi.d t0, t0, -1
+ st.b t1, a1, 0
+ st.b t2, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ bnez t0, 3b
+4:
+endfunc
+
+function nv21ToUV_lasx
+ andi t0, a5, 31
+ srli.d a5, a5, 5
+ beqz a5, 2f
+1:
+ xvld xr0, a3, 0
+ xvld xr1, a3, 32
+ addi.d a5, a5, -1
+ addi.d a3, a3, 64
+ xvpickev.b xr2, xr1, xr0
+ xvpickod.b xr3, xr1, xr0
+ xvpermi.d xr2, xr2, 0xd8
+ xvpermi.d xr3, xr3, 0xd8
+ xvst xr2, a1, 0
+ xvst xr3, a0, 0
+ addi.d a0, a0, 32
+ addi.d a1, a1, 32
+ bnez a5, 1b
+2:
+ beqz t0, 4f
+3:
+ ld.b t1, a3, 0
+ ld.b t2, a3, 1
+ addi.d a3, a3, 2
+ addi.d t0, t0, -1
+ st.b t1, a1, 0
+ st.b t2, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ bnez t0, 3b
+4:
+endfunc
+
+/*
+ *void abgrToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
+ * const uint8_t *unused2, int width, uint32_t *unused, void *opq)
+ */
+function abgrToA_lsx
+ andi t0, a4, 7
+ srli.d a4, a4, 3
+ vxor.v vr0, vr0, vr0
+ beqz a4, 2f
+1:
+ vld vr1, a1, 0
+ vld vr2, a1, 16
+ addi.d a4, a4, -1
+ addi.d a1, a1, 32
+ vpickev.b vr3, vr2, vr1
+ vpackev.b vr3, vr0, vr3
+ vslli.h vr1, vr3, 6
+ vsrli.h vr2, vr3, 2
+ vor.v vr3, vr2, vr1
+ vst vr3, a0, 0
+ addi.d a0, a0, 16
+ bnez a4, 1b
+2:
+ beqz t0, 4f
+3:
+ ld.b t1, a1, 3
+ addi.d t0, t0, -1
+ addi.d a1, a1, 4
+ andi t1, t1, 0xff
+ slli.w t2, t1, 6
+ srli.w t3, t1, 2
+ or t1, t2, t3
+ st.h t1, a0, 0
+ addi.d a0, a0, 2
+ bnez t0, 3b
+4:
+endfunc
+
+function abgrToA_lasx
+ andi t0, a4, 15
+ srli.d a4, a4, 4
+ xvxor.v xr0, xr0, xr0
+ beqz a4, 2f
+1:
+ xvld xr1, a1, 0
+ xvld xr2, a1, 32
+ addi.d a4, a4, -1
+ addi.d a1, a1, 64
+ xvpickev.b xr3, xr2, xr1
+ xvpermi.d xr3, xr3, 0xd8
+ xvpackev.b xr3, xr0, xr3
+ xvslli.h xr1, xr3, 6
+ xvsrli.h xr2, xr3, 2
+ xvor.v xr3, xr2, xr1
+ xvst xr3, a0, 0
+ addi.d a0, a0, 32
+ bnez a4, 1b
+2:
+ beqz t0, 4f
+3:
+ ld.b t1, a1, 3
+ addi.d t0, t0, -1
+ addi.d a1, a1, 4
+ andi t1, t1, 0xff
+ slli.w t2, t1, 6
+ srli.w t3, t1, 2
+ or t1, t2, t3
+ st.h t1, a0, 0
+ addi.d a0, a0, 2
+ bnez t0, 3b
+4:
+endfunc
+
+/*
+ *void rgbaToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
+ * const uint8_t *unused2, int width, uint32_t *unused, void *opq)
+ */
+function rgbaToA_lsx
+ andi t0, a4, 7
+ srli.d a4, a4, 3
+ vxor.v vr0, vr0, vr0
+ beqz a4, 2f
+1:
+ vld vr1, a1, 3
+ vld vr2, a1, 19
+ addi.d a4, a4, -1
+ addi.d a1, a1, 32
+ vpickev.b vr3, vr2, vr1
+ vpackev.b vr3, vr0, vr3
+ vslli.h vr1, vr3, 6
+ vsrli.h vr2, vr3, 2
+ vor.v vr3, vr2, vr1
+ vst vr3, a0, 0
+ addi.d a0, a0, 16
+ bnez a4, 1b
+2:
+ beqz t0, 4f
+3:
+ ld.b t1, a1, 3
+ addi.d t0, t0, -1
+ addi.d a1, a1, 4
+ andi t1, t1, 0xff
+ slli.w t2, t1, 6
+ srli.w t3, t1, 2
+ or t1, t2, t3
+ st.h t1, a0, 0
+ addi.d a0, a0, 2
+ bnez t0, 3b
+4:
+endfunc
+
+function rgbaToA_lasx
+ andi t0, a4, 15
+ srli.d a4, a4, 4
+ xvxor.v xr0, xr0, xr0
+ beqz a4, 2f
+1:
+ xvld xr1, a1, 3
+ xvld xr2, a1, 35
+ addi.d a4, a4, -1
+ addi.d a1, a1, 64
+ xvpickev.b xr3, xr2, xr1
+ xvpermi.d xr3, xr3, 0xd8
+ xvpackev.b xr3, xr0, xr3
+ xvslli.h xr1, xr3, 6
+ xvsrli.h xr2, xr3, 2
+ xvor.v xr3, xr2, xr1
+ xvst xr3, a0, 0
+ addi.d a0, a0, 32
+ bnez a4, 1b
+2:
+ beqz t0, 4f
+3:
+ ld.b t1, a1, 3
+ addi.d t0, t0, -1
+ addi.d a1, a1, 4
+ andi t1, t1, 0xff
+ slli.w t2, t1, 6
+ srli.w t3, t1, 2
+ or t1, t2, t3
+ st.h t1, a0, 0
+ addi.d a0, a0, 2
+ bnez t0, 3b
+4:
+endfunc
diff --git a/libswscale/loongarch/input_lasx.c b/libswscale/loongarch/input_lasx.c
index 4830072eaf..0f1d954880 100644
--- a/libswscale/loongarch/input_lasx.c
+++ b/libswscale/loongarch/input_lasx.c
@@ -200,3 +200,46 @@ void planar_rgb_to_y_lasx(uint8_t *_dst, const uint8_t *src[4], int width,
dst[i] = (tem_ry * r + tem_gy * g + tem_by * b + set) >> shift;
}
}
+
+av_cold void ff_sws_init_input_lasx(SwsContext *c)
+{
+ enum AVPixelFormat srcFormat = c->srcFormat;
+
+ switch (srcFormat) {
+ case AV_PIX_FMT_YUYV422:
+ c->chrToYV12 = yuy2ToUV_lasx;
+ break;
+ case AV_PIX_FMT_YVYU422:
+ c->chrToYV12 = yvy2ToUV_lasx;
+ break;
+ case AV_PIX_FMT_UYVY422:
+ c->chrToYV12 = uyvyToUV_lasx;
+ break;
+ case AV_PIX_FMT_NV12:
+ case AV_PIX_FMT_NV16:
+ case AV_PIX_FMT_NV24:
+ c->chrToYV12 = nv12ToUV_lasx;
+ break;
+ case AV_PIX_FMT_NV21:
+ case AV_PIX_FMT_NV42:
+ c->chrToYV12 = nv21ToUV_lasx;
+ break;
+ case AV_PIX_FMT_GBRAP:
+ case AV_PIX_FMT_GBRP:
+ c->readChrPlanar = planar_rgb_to_uv_lasx;
+ break;
+ }
+
+ if (c->needAlpha) {
+ switch (srcFormat) {
+ case AV_PIX_FMT_BGRA:
+ case AV_PIX_FMT_RGBA:
+ c->alpToYV12 = rgbaToA_lasx;
+ break;
+ case AV_PIX_FMT_ABGR:
+ case AV_PIX_FMT_ARGB:
+ c->alpToYV12 = abgrToA_lasx;
+ break;
+ }
+ }
+}
diff --git a/libswscale/loongarch/input_lsx.c b/libswscale/loongarch/input_lsx.c
new file mode 100644
index 0000000000..1bb04457bb
--- /dev/null
+++ b/libswscale/loongarch/input_lsx.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2024 Loongson Technology Corporation Limited
+ * Contributed by Shiyou Yin<yinshiyou-hf at loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swscale_loongarch.h"
+
+av_cold void ff_sws_init_input_lsx(SwsContext *c)
+{
+ enum AVPixelFormat srcFormat = c->srcFormat;
+
+ switch (srcFormat) {
+ case AV_PIX_FMT_YUYV422:
+ c->chrToYV12 = yuy2ToUV_lsx;
+ break;
+ case AV_PIX_FMT_YVYU422:
+ c->chrToYV12 = yvy2ToUV_lsx;
+ break;
+ case AV_PIX_FMT_UYVY422:
+ c->chrToYV12 = uyvyToUV_lsx;
+ break;
+ case AV_PIX_FMT_NV12:
+ case AV_PIX_FMT_NV16:
+ case AV_PIX_FMT_NV24:
+ c->chrToYV12 = nv12ToUV_lsx;
+ break;
+ case AV_PIX_FMT_NV21:
+ case AV_PIX_FMT_NV42:
+ c->chrToYV12 = nv21ToUV_lsx;
+ break;
+ case AV_PIX_FMT_GBRAP:
+ case AV_PIX_FMT_GBRP:
+ c->readChrPlanar = planar_rgb_to_uv_lsx;
+ break;
+ }
+
+ if (c->needAlpha) {
+ switch (srcFormat) {
+ case AV_PIX_FMT_BGRA:
+ case AV_PIX_FMT_RGBA:
+ c->alpToYV12 = rgbaToA_lsx;
+ break;
+ case AV_PIX_FMT_ABGR:
+ case AV_PIX_FMT_ARGB:
+ c->alpToYV12 = abgrToA_lsx;
+ break;
+ }
+ }
+}
diff --git a/libswscale/loongarch/swscale_init_loongarch.c b/libswscale/loongarch/swscale_init_loongarch.c
index 04d2553fa4..3a5a7ee856 100644
--- a/libswscale/loongarch/swscale_init_loongarch.c
+++ b/libswscale/loongarch/swscale_init_loongarch.c
@@ -63,6 +63,7 @@ av_cold void ff_sws_init_swscale_loongarch(SwsContext *c)
ff_sws_init_output_lsx(c, &c->yuv2plane1, &c->yuv2planeX,
&c->yuv2nv12cX, &c->yuv2packed1,
&c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX);
+ ff_sws_init_input_lsx(c);
if (c->srcBpc == 8) {
if (c->dstBpc <= 14) {
c->hyScale = c->hcScale = ff_hscale_8_to_15_lsx;
@@ -73,21 +74,13 @@ av_cold void ff_sws_init_swscale_loongarch(SwsContext *c)
c->hyScale = c->hcScale = c->dstBpc > 14 ? ff_hscale_16_to_19_lsx
: ff_hscale_16_to_15_lsx;
}
- switch (c->srcFormat) {
- case AV_PIX_FMT_GBRAP:
- case AV_PIX_FMT_GBRP:
- {
- c->readChrPlanar = planar_rgb_to_uv_lsx;
- c->readLumPlanar = planar_rgb_to_y_lsx;
- }
- break;
- }
}
#if HAVE_LASX
if (have_lasx(cpu_flags)) {
ff_sws_init_output_lasx(c, &c->yuv2plane1, &c->yuv2planeX,
&c->yuv2nv12cX, &c->yuv2packed1,
&c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX);
+ ff_sws_init_input_lasx(c);
if (c->srcBpc == 8) {
if (c->dstBpc <= 14) {
c->hyScale = c->hcScale = ff_hscale_8_to_15_lasx;
@@ -98,15 +91,6 @@ av_cold void ff_sws_init_swscale_loongarch(SwsContext *c)
c->hyScale = c->hcScale = c->dstBpc > 14 ? ff_hscale_16_to_19_lasx
: ff_hscale_16_to_15_lasx;
}
- switch (c->srcFormat) {
- case AV_PIX_FMT_GBRAP:
- case AV_PIX_FMT_GBRP:
- {
- c->readChrPlanar = planar_rgb_to_uv_lasx;
- c->readLumPlanar = planar_rgb_to_y_lasx;
- }
- break;
- }
}
#endif // #if HAVE_LASX
ff_sws_init_range_convert_loongarch(c);
diff --git a/libswscale/loongarch/swscale_loongarch.h b/libswscale/loongarch/swscale_loongarch.h
index ea93881f8e..07c91bc25c 100644
--- a/libswscale/loongarch/swscale_loongarch.h
+++ b/libswscale/loongarch/swscale_loongarch.h
@@ -68,6 +68,29 @@ void yuv2planeX_8_lsx(const int16_t *filter, int filterSize,
void yuv2plane1_8_lsx(const int16_t *src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset);
+void yuy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void yvy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void uyvyToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void nv12ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void nv21ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void abgrToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
+ const uint8_t *unused2, int width, uint32_t *unused, void *opq);
+
+void rgbaToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
+ const uint8_t *unused2, int width, uint32_t *unused, void *opq);
+
+av_cold void ff_sws_init_input_lsx(SwsContext *c);
+
av_cold void ff_sws_init_output_lsx(SwsContext *c,
yuv2planar1_fn *yuv2plane1,
yuv2planarX_fn *yuv2planeX,
@@ -152,6 +175,29 @@ void yuv2planeX_8_lasx(const int16_t *filter, int filterSize,
void yuv2plane1_8_lasx(const int16_t *src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset);
+void yuy2ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void yvy2ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void uyvyToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void nv12ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void nv21ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void abgrToA_lasx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
+ const uint8_t *unused2, int width, uint32_t *unused, void *opq);
+
+void rgbaToA_lasx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
+ const uint8_t *unused2, int width, uint32_t *unused, void *opq);
+
+av_cold void ff_sws_init_input_lasx(SwsContext *c);
+
av_cold void ff_sws_init_output_lasx(SwsContext *c,
yuv2planar1_fn *yuv2plane1,
yuv2planarX_fn *yuv2planeX,
More information about the ffmpeg-cvslog
mailing list