[FFmpeg-devel] [PATCH 07/10] swscale/arm/yuv2rgb: macro-ify
Matthieu Bouron
matthieu.bouron at gmail.com
Fri Mar 25 23:46:02 CET 2016
From: Matthieu Bouron <matthieu.bouron at stupeflix.com>
---
libswscale/arm/yuv2rgb_neon.S | 115 ++++++++++++++----------------------------
1 file changed, 39 insertions(+), 76 deletions(-)
diff --git a/libswscale/arm/yuv2rgb_neon.S b/libswscale/arm/yuv2rgb_neon.S
index 8abb986..f77f534 100644
--- a/libswscale/arm/yuv2rgb_neon.S
+++ b/libswscale/arm/yuv2rgb_neon.S
@@ -105,7 +105,7 @@
compute_16px r2, d14, d15, \ofmt
.endm
-.macro load_args_nvx
+.macro load_args_nv12
push {r4-r12, lr}
vpush {q4-q7}
ldr r4, [sp, #104] @ r4 = srcY
@@ -122,6 +122,10 @@
sub r7, r7, r0 @ r7 = linesizeC - width (paddingC)
.endm
+.macro load_args_nv21
+ load_args_nv12
+.endm
+
.macro load_args_yuv420p
push {r4-r12, lr}
vpush {q4-q7}
@@ -146,113 +150,72 @@
load_args_yuv420p
.endm
-.macro declare_func ifmt ofmt
-function ff_\ifmt\()_to_\ofmt\()_neon, export=1
-
-.ifc \ifmt,nv12
- load_args_nvx
-.endif
-
-.ifc \ifmt,nv21
- load_args_nvx
-.endif
-
-.ifc \ifmt,yuv420p
- load_args_yuv420p
-.endif
-
-
-.ifc \ifmt,yuv422p
- load_args_yuv422p
-.endif
-
-1:
- mov r8, r0 @ r8 = width
-2:
- pld [r6, #64*3]
- pld [r4, #64*3]
-
- vmov.i8 d10, #128
-
-.ifc \ifmt,nv12
+.macro load_chroma_nv12
vld2.8 {d2, d3}, [r6]! @ q1: interleaved chroma line
vsubl.u8 q14, d2, d10 @ q14 = U - 128
vsubl.u8 q15, d3, d10 @ q15 = V - 128
+.endm
- process_1l_16px \ofmt
-.endif
-
-.ifc \ifmt,nv21
+.macro load_chroma_nv21
vld2.8 {d2, d3}, [r6]! @ q1: interleaved chroma line
vsubl.u8 q14, d3, d10 @ q14 = U - 128
vsubl.u8 q15, d2, d10 @ q15 = V - 128
+.endm
- process_1l_16px \ofmt
-.endif
-
-.ifc \ifmt,yuv420p
- pld [r10, #64*3]
-
- vld1.8 d2, [r6]! @ d2: chroma red line
- vld1.8 d3, [r10]! @ d3: chroma blue line
- vsubl.u8 q14, d2, d10 @ q14 = U - 128
- vsubl.u8 q15, d3, d10 @ q15 = V - 128
-
- process_1l_16px \ofmt
-.endif
-
-.ifc \ifmt,yuv422p
+.macro load_chroma_yuv420p
pld [r10, #64*3]
vld1.8 d2, [r6]! @ d2: chroma red line
vld1.8 d3, [r10]! @ d3: chroma blue line
vsubl.u8 q14, d2, d10 @ q14 = U - 128
vsubl.u8 q15, d3, d10 @ q15 = V - 128
+.endm
- process_1l_16px \ofmt
-.endif
-
- subs r8, r8, #16 @ width -= 16
- bgt 2b
-
- add r2, r2, r3 @ dst += padding
- add r4, r4, r5 @ srcY += paddingY
-
-.ifc \ifmt,nv12
- tst r1, #1
- subeq r6, r6, r0 @ if (height % 2 == 0) paddingU -= width
- addne r6, r7 @ else paddingU += linesizeU - width
-
- subs r1, r1, #1 @ height -= 1
-.endif
+.macro load_chroma_yuv422p
+ load_chroma_yuv420p
+.endm
-.ifc \ifmt,nv21
+.macro increment_nv12
tst r1, #1
subeq r6, r6, r0 @ if (height % 2 == 0) paddingU -= width
addne r6, r7 @ else paddingU += linesizeU - width
+.endm
- subs r1, r1, #1 @ height -= 1
-.endif
+.macro increment_nv21
+ increment_nv12
+.endm
-.ifc \ifmt,yuv420p
+.macro increment_yuv420p
tst r1, #1
subeq r6, r6, r0, lsr #1 @ if (height % 2 == 0) paddingU -= (width / 2)
addne r6, r7 @ else paddingU += linesizeU - (width / 2)
subeq r10, r10, r0, lsr #1 @ if (height % 2 == 0) paddingU -= (width / 2)
addne r10, r12 @ else paddingV = linesizeV - (width / 2)
+.endm
- subs r1, r1, #1 @ height -= 1
-.endif
-
-.ifc \ifmt,yuv422p
+.macro increment_yuv422p
add r6, r6, r7 @ srcU += paddingU
add r10,r10,r12 @ srcV += paddingV
+.endm
+.macro declare_func ifmt ofmt
+function ff_\ifmt\()_to_\ofmt\()_neon, export=1
+ load_args_\ifmt
+1:
+ mov r8, r0 @ r8 = width
+2:
+ pld [r6, #64*3]
+ pld [r4, #64*3]
+ vmov.i8 d10, #128
+ load_chroma_\ifmt
+ process_1l_16px \ofmt
+ subs r8, r8, #16 @ width -= 16
+ bgt 2b
+ add r2, r2, r3 @ dst += padding
+ add r4, r4, r5 @ srcY += paddingY
+ increment_\ifmt
subs r1, r1, #1 @ height -= 1
-.endif
-
bgt 1b
-
vpop {q4-q7}
pop {r4-r12, lr}
mov pc, lr
--
2.7.4
More information about the ffmpeg-devel
mailing list