[FFmpeg-cvslog] arm: fmtconvert: Split armv6 fmtconvert code off from vfp code
Diego Biurrun
git at videolan.org
Thu Aug 29 16:15:49 CEST 2013
ffmpeg | branch: master | Diego Biurrun <diego at biurrun.de> | Fri Aug 23 20:01:36 2013 +0200| [f0389eb777b1ab4291329d4f709098cdfa7384dc] | committer: Diego Biurrun
arm: fmtconvert: Split armv6 fmtconvert code off from vfp code
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f0389eb777b1ab4291329d4f709098cdfa7384dc
---
libavcodec/arm/Makefile | 4 +-
libavcodec/arm/fmtconvert_init_arm.c | 9 ++--
libavcodec/arm/fmtconvert_vfp.S | 57 ------------------------
libavcodec/arm/fmtconvert_vfp_armv6.S | 78 +++++++++++++++++++++++++++++++++
4 files changed, 85 insertions(+), 63 deletions(-)
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 0e5e53a..201f897 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -52,11 +52,13 @@ ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \
arm/vp8dsp_init_armv6.o \
arm/vp8dsp_armv6.o
+VFP-OBJS += arm/fmtconvert_vfp.o
+
VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \
arm/synth_filter_vfp.o
VFP-OBJS-$(CONFIG_FFT) += arm/fft_vfp.o
VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o
-VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o
+VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp_armv6.o
NEON-OBJS += arm/fmtconvert_neon.o
diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c
index eb66eb8..7c5bd91 100644
--- a/libavcodec/arm/fmtconvert_init_arm.c
+++ b/libavcodec/arm/fmtconvert_init_arm.c
@@ -43,16 +43,15 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx
{
int cpu_flags = av_get_cpu_flags();
- if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) {
+ if (have_vfp(cpu_flags)) {
if (!have_vfpv3(cpu_flags)) {
- // These functions don't use anything armv6 specific in themselves,
- // but ff_float_to_int16_vfp which is in the same assembly source
- // file does, thus the whole file requires armv6 to be built.
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp;
c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp;
}
- c->float_to_int16 = ff_float_to_int16_vfp;
+ if (have_armv6(cpu_flags)) {
+ c->float_to_int16 = ff_float_to_int16_vfp;
+ }
}
if (have_neon(cpu_flags)) {
diff --git a/libavcodec/arm/fmtconvert_vfp.S b/libavcodec/arm/fmtconvert_vfp.S
index d841802..4e43f42 100644
--- a/libavcodec/arm/fmtconvert_vfp.S
+++ b/libavcodec/arm/fmtconvert_vfp.S
@@ -1,5 +1,4 @@
/*
- * Copyright (c) 2008 Siarhei Siamashka <ssvb at users.sourceforge.net>
* Copyright (c) 2013 RISC OS Open Ltd <bavison at riscosopen.org>
*
* This file is part of Libav.
@@ -23,62 +22,6 @@
#include "libavutil/arm/asm.S"
/**
- * ARM VFP optimized float to int16 conversion.
- * Assume that len is a positive number and is multiple of 8, destination
- * buffer is at least 4 bytes aligned (8 bytes alignment is better for
- * performance), little-endian byte sex.
- */
-@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
-function ff_float_to_int16_vfp, export=1
- push {r4-r8,lr}
- vpush {d8-d11}
- vldmia r1!, {s16-s23}
- vcvt.s32.f32 s0, s16
- vcvt.s32.f32 s1, s17
- vcvt.s32.f32 s2, s18
- vcvt.s32.f32 s3, s19
- vcvt.s32.f32 s4, s20
- vcvt.s32.f32 s5, s21
- vcvt.s32.f32 s6, s22
- vcvt.s32.f32 s7, s23
-1:
- subs r2, r2, #8
- vmov r3, r4, s0, s1
- vmov r5, r6, s2, s3
- vmov r7, r8, s4, s5
- vmov ip, lr, s6, s7
- it gt
- vldmiagt r1!, {s16-s23}
- ssat r4, #16, r4
- ssat r3, #16, r3
- ssat r6, #16, r6
- ssat r5, #16, r5
- pkhbt r3, r3, r4, lsl #16
- pkhbt r4, r5, r6, lsl #16
- itttt gt
- vcvtgt.s32.f32 s0, s16
- vcvtgt.s32.f32 s1, s17
- vcvtgt.s32.f32 s2, s18
- vcvtgt.s32.f32 s3, s19
- itttt gt
- vcvtgt.s32.f32 s4, s20
- vcvtgt.s32.f32 s5, s21
- vcvtgt.s32.f32 s6, s22
- vcvtgt.s32.f32 s7, s23
- ssat r8, #16, r8
- ssat r7, #16, r7
- ssat lr, #16, lr
- ssat ip, #16, ip
- pkhbt r5, r7, r8, lsl #16
- pkhbt r6, ip, lr, lsl #16
- stmia r0!, {r3-r6}
- bgt 1b
-
- vpop {d8-d11}
- pop {r4-r8,pc}
-endfunc
-
-/**
* ARM VFP optimised int32 to float conversion.
* Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned
* (16 bytes alignment is best for BCM2835), little-endian.
diff --git a/libavcodec/arm/fmtconvert_vfp_armv6.S b/libavcodec/arm/fmtconvert_vfp_armv6.S
new file mode 100644
index 0000000..fb12de1
--- /dev/null
+++ b/libavcodec/arm/fmtconvert_vfp_armv6.S
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2008 Siarhei Siamashka <ssvb at users.sourceforge.net>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/arm/asm.S"
+
+/**
+ * ARM VFP optimized float to int16 conversion.
+ * Assume that len is a positive number and is multiple of 8, destination
+ * buffer is at least 4 bytes aligned (8 bytes alignment is better for
+ * performance), little-endian byte sex.
+ */
+@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
+function ff_float_to_int16_vfp, export=1
+ push {r4-r8,lr}
+ vpush {d8-d11}
+ vldmia r1!, {s16-s23}
+ vcvt.s32.f32 s0, s16
+ vcvt.s32.f32 s1, s17
+ vcvt.s32.f32 s2, s18
+ vcvt.s32.f32 s3, s19
+ vcvt.s32.f32 s4, s20
+ vcvt.s32.f32 s5, s21
+ vcvt.s32.f32 s6, s22
+ vcvt.s32.f32 s7, s23
+1:
+ subs r2, r2, #8
+ vmov r3, r4, s0, s1
+ vmov r5, r6, s2, s3
+ vmov r7, r8, s4, s5
+ vmov ip, lr, s6, s7
+ it gt
+ vldmiagt r1!, {s16-s23}
+ ssat r4, #16, r4
+ ssat r3, #16, r3
+ ssat r6, #16, r6
+ ssat r5, #16, r5
+ pkhbt r3, r3, r4, lsl #16
+ pkhbt r4, r5, r6, lsl #16
+ itttt gt
+ vcvtgt.s32.f32 s0, s16
+ vcvtgt.s32.f32 s1, s17
+ vcvtgt.s32.f32 s2, s18
+ vcvtgt.s32.f32 s3, s19
+ itttt gt
+ vcvtgt.s32.f32 s4, s20
+ vcvtgt.s32.f32 s5, s21
+ vcvtgt.s32.f32 s6, s22
+ vcvtgt.s32.f32 s7, s23
+ ssat r8, #16, r8
+ ssat r7, #16, r7
+ ssat lr, #16, lr
+ ssat ip, #16, ip
+ pkhbt r5, r7, r8, lsl #16
+ pkhbt r6, ip, lr, lsl #16
+ stmia r0!, {r3-r6}
+ bgt 1b
+
+ vpop {d8-d11}
+ pop {r4-r8,pc}
+endfunc
More information about the ffmpeg-cvslog
mailing list