[FFmpeg-cvslog] ARM: make some NEON macros reusable
Janne Grunau
git at videolan.org
Sat Dec 3 03:13:09 CET 2011
ffmpeg | branch: master | Janne Grunau <janne-libav at jannau.net> | Tue Nov 29 13:38:10 2011 +0000| [a760f530bba6d21484c611de67d072fdab56e08e] | committer: Mans Rullgard
ARM: make some NEON macros reusable
Signed-off-by: Mans Rullgard <mans at mansr.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a760f530bba6d21484c611de67d072fdab56e08e
---
libavcodec/arm/h264dsp_neon.S | 41 +---------------------------
libavcodec/arm/neon.S | 59 +++++++++++++++++++++++++++++++++++++++++
libavcodec/arm/vp8dsp_neon.S | 26 +++--------------
3 files changed, 65 insertions(+), 61 deletions(-)
diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S
index 5156538..1e97908 100644
--- a/libavcodec/arm/h264dsp_neon.S
+++ b/libavcodec/arm/h264dsp_neon.S
@@ -19,46 +19,7 @@
*/
#include "asm.S"
-
- .macro transpose_8x8 r0 r1 r2 r3 r4 r5 r6 r7
- vtrn.32 \r0, \r4
- vtrn.32 \r1, \r5
- vtrn.32 \r2, \r6
- vtrn.32 \r3, \r7
- vtrn.16 \r0, \r2
- vtrn.16 \r1, \r3
- vtrn.16 \r4, \r6
- vtrn.16 \r5, \r7
- vtrn.8 \r0, \r1
- vtrn.8 \r2, \r3
- vtrn.8 \r4, \r5
- vtrn.8 \r6, \r7
- .endm
-
- .macro transpose_4x4 r0 r1 r2 r3
- vtrn.16 \r0, \r2
- vtrn.16 \r1, \r3
- vtrn.8 \r0, \r1
- vtrn.8 \r2, \r3
- .endm
-
- .macro swap4 r0 r1 r2 r3 r4 r5 r6 r7
- vswp \r0, \r4
- vswp \r1, \r5
- vswp \r2, \r6
- vswp \r3, \r7
- .endm
-
- .macro transpose16_4x4 r0 r1 r2 r3 r4 r5 r6 r7
- vtrn.32 \r0, \r2
- vtrn.32 \r1, \r3
- vtrn.32 \r4, \r6
- vtrn.32 \r5, \r7
- vtrn.16 \r0, \r1
- vtrn.16 \r2, \r3
- vtrn.16 \r4, \r5
- vtrn.16 \r6, \r7
- .endm
+#include "neon.S"
/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
.macro h264_chroma_mc8 type
diff --git a/libavcodec/arm/neon.S b/libavcodec/arm/neon.S
new file mode 100644
index 0000000..716a607
--- /dev/null
+++ b/libavcodec/arm/neon.S
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+.macro transpose_8x8 r0, r1, r2, r3, r4, r5, r6, r7
+ vtrn.32 \r0, \r4
+ vtrn.32 \r1, \r5
+ vtrn.32 \r2, \r6
+ vtrn.32 \r3, \r7
+ vtrn.16 \r0, \r2
+ vtrn.16 \r1, \r3
+ vtrn.16 \r4, \r6
+ vtrn.16 \r5, \r7
+ vtrn.8 \r0, \r1
+ vtrn.8 \r2, \r3
+ vtrn.8 \r4, \r5
+ vtrn.8 \r6, \r7
+.endm
+
+.macro transpose_4x4 r0, r1, r2, r3
+ vtrn.16 \r0, \r2
+ vtrn.16 \r1, \r3
+ vtrn.8 \r0, \r1
+ vtrn.8 \r2, \r3
+.endm
+
+.macro swap4 r0, r1, r2, r3, r4, r5, r6, r7
+ vswp \r0, \r4
+ vswp \r1, \r5
+ vswp \r2, \r6
+ vswp \r3, \r7
+.endm
+
+.macro transpose16_4x4 r0, r1, r2, r3, r4, r5, r6, r7
+ vtrn.32 \r0, \r2
+ vtrn.32 \r1, \r3
+ vtrn.32 \r4, \r6
+ vtrn.32 \r5, \r7
+ vtrn.16 \r0, \r1
+ vtrn.16 \r2, \r3
+ vtrn.16 \r4, \r5
+ vtrn.16 \r6, \r7
+.endm
diff --git a/libavcodec/arm/vp8dsp_neon.S b/libavcodec/arm/vp8dsp_neon.S
index 1b9f24e..4ff53ad 100644
--- a/libavcodec/arm/vp8dsp_neon.S
+++ b/libavcodec/arm/vp8dsp_neon.S
@@ -22,6 +22,7 @@
*/
#include "asm.S"
+#include "neon.S"
function ff_vp8_luma_dc_wht_neon, export=1
vld1.16 {q0-q1}, [r1,:128]
@@ -454,23 +455,6 @@ endfunc
.endif
.endm
-.macro transpose8x16matrix
- vtrn.32 q0, q4
- vtrn.32 q1, q5
- vtrn.32 q2, q6
- vtrn.32 q3, q7
-
- vtrn.16 q0, q2
- vtrn.16 q1, q3
- vtrn.16 q4, q6
- vtrn.16 q5, q7
-
- vtrn.8 q0, q1
- vtrn.8 q2, q3
- vtrn.8 q4, q5
- vtrn.8 q6, q7
-.endm
-
.macro vp8_v_loop_filter16 name, inner=0, simple=0
function ff_vp8_v_loop_filter16\name\()_neon, export=1
vpush {q4-q7}
@@ -605,7 +589,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
vld1.8 {d13}, [r0], r1
vld1.8 {d15}, [r0], r1
- transpose8x16matrix
+ transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
vdup.8 q14, r2 @ flim_E
.if !\simple
@@ -616,7 +600,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
sub r0, r0, r1, lsl #4 @ backup 16 rows
- transpose8x16matrix
+ transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
@ Store pixels:
vst1.8 {d0}, [r0], r1
@@ -670,7 +654,7 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
vld1.8 {d14}, [r0], r2
vld1.8 {d15}, [r1], r2
- transpose8x16matrix
+ transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
vdup.8 q14, r3 @ flim_E
vdup.8 q15, r12 @ flim_I
@@ -681,7 +665,7 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
sub r0, r0, r2, lsl #3 @ backup u 8 rows
sub r1, r1, r2, lsl #3 @ backup v 8 rows
- transpose8x16matrix
+ transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
@ Store pixels:
vst1.8 {d0}, [r0], r2
More information about the ffmpeg-cvslog
mailing list