[FFmpeg-devel] [PATCH 07/10] swscale/arm/yuv2rgb: macro-ify

Matthieu Bouron matthieu.bouron at gmail.com
Fri Mar 25 23:46:02 CET 2016


From: Matthieu Bouron <matthieu.bouron at stupeflix.com>

---
 libswscale/arm/yuv2rgb_neon.S | 115 ++++++++++++++----------------------------
 1 file changed, 39 insertions(+), 76 deletions(-)

diff --git a/libswscale/arm/yuv2rgb_neon.S b/libswscale/arm/yuv2rgb_neon.S
index 8abb986..f77f534 100644
--- a/libswscale/arm/yuv2rgb_neon.S
+++ b/libswscale/arm/yuv2rgb_neon.S
@@ -105,7 +105,7 @@
     compute_16px        r2, d14, d15, \ofmt
 .endm
 
-.macro load_args_nvx
+.macro load_args_nv12
     push                {r4-r12, lr}
     vpush               {q4-q7}
     ldr                 r4, [sp, #104]                                 @ r4  = srcY
@@ -122,6 +122,10 @@
     sub                 r7, r7, r0                                     @ r7 = linesizeC - width     (paddingC)
 .endm
 
+.macro load_args_nv21
+    load_args_nv12
+.endm
+
 .macro load_args_yuv420p
     push                {r4-r12, lr}
     vpush               {q4-q7}
@@ -146,113 +150,72 @@
     load_args_yuv420p
 .endm
 
-.macro declare_func ifmt ofmt
-function ff_\ifmt\()_to_\ofmt\()_neon, export=1
-
-.ifc \ifmt,nv12
-    load_args_nvx
-.endif
-
-.ifc \ifmt,nv21
-    load_args_nvx
-.endif
-
-.ifc \ifmt,yuv420p
-    load_args_yuv420p
-.endif
-
-
-.ifc \ifmt,yuv422p
-    load_args_yuv422p
-.endif
-
-1:
-    mov                 r8, r0                                         @ r8 = width
-2:
-    pld [r6, #64*3]
-    pld [r4, #64*3]
-
-    vmov.i8             d10, #128
-
-.ifc \ifmt,nv12
+.macro load_chroma_nv12
     vld2.8              {d2, d3}, [r6]!                                @ q1: interleaved chroma line
     vsubl.u8            q14, d2, d10                                   @ q14 = U - 128
     vsubl.u8            q15, d3, d10                                   @ q15 = V - 128
+.endm
 
-    process_1l_16px \ofmt
-.endif
-
-.ifc \ifmt,nv21
+.macro load_chroma_nv21
     vld2.8              {d2, d3}, [r6]!                                @ q1: interleaved chroma line
     vsubl.u8            q14, d3, d10                                   @ q14 = U - 128
     vsubl.u8            q15, d2, d10                                   @ q15 = V - 128
+.endm
 
-    process_1l_16px \ofmt
-.endif
-
-.ifc \ifmt,yuv420p
-    pld [r10, #64*3]
-
-    vld1.8              d2, [r6]!                                      @ d2: chroma red line
-    vld1.8              d3, [r10]!                                     @ d3: chroma blue line
-    vsubl.u8            q14, d2, d10                                   @ q14 = U - 128
-    vsubl.u8            q15, d3, d10                                   @ q15 = V - 128
-
-    process_1l_16px \ofmt
-.endif
-
-.ifc \ifmt,yuv422p
+.macro load_chroma_yuv420p
     pld [r10, #64*3]
 
     vld1.8              d2, [r6]!                                      @ d2: chroma red line
     vld1.8              d3, [r10]!                                     @ d3: chroma blue line
     vsubl.u8            q14, d2, d10                                   @ q14 = U - 128
     vsubl.u8            q15, d3, d10                                   @ q15 = V - 128
+.endm
 
-    process_1l_16px \ofmt
-.endif
-
-    subs                r8, r8, #16                                    @ width -= 16
-    bgt                 2b
-
-    add                 r2, r2, r3                                     @ dst   += padding
-    add                 r4, r4, r5                                     @ srcY  += paddingY
-
-.ifc \ifmt,nv12
-    tst                 r1, #1
-    subeq               r6, r6, r0                                     @ if (height % 2 == 0) paddingU -= width
-    addne               r6, r7                                         @ else                 paddingU += linesizeU - width
-
-    subs                r1, r1, #1                                     @ height -= 1
-.endif
+.macro load_chroma_yuv422p
+    load_chroma_yuv420p
+.endm
 
-.ifc \ifmt,nv21
+.macro increment_nv12
     tst                 r1, #1
     subeq               r6, r6, r0                                     @ if (height % 2 == 0) paddingU -= width
     addne               r6, r7                                         @ else                 paddingU += linesizeU - width
+.endm
 
-    subs                r1, r1, #1                                     @ height -= 1
-.endif
+.macro increment_nv21
+    increment_nv12
+.endm
 
-.ifc \ifmt,yuv420p
+.macro increment_yuv420p
     tst                 r1, #1
     subeq               r6, r6, r0, lsr #1                             @ if (height % 2 == 0) paddingU -= (width / 2)
     addne               r6, r7                                         @ else                 paddingU += linesizeU - (width / 2)
     subeq               r10, r10, r0, lsr #1                           @ if (height % 2 == 0) paddingU -= (width / 2)
     addne               r10, r12                                       @ else                 paddingV = linesizeV - (width / 2)
+.endm
 
-    subs                r1, r1, #1                                     @ height -= 1
-.endif
-
-.ifc \ifmt,yuv422p
+.macro increment_yuv422p
     add                 r6, r6, r7                                     @ srcU  += paddingU
     add                 r10,r10,r12                                    @ srcV  += paddingV
+.endm
 
+.macro declare_func ifmt ofmt
+function ff_\ifmt\()_to_\ofmt\()_neon, export=1
+    load_args_\ifmt
+1:
+    mov                 r8, r0                                         @ r8 = width
+2:
+    pld [r6, #64*3]
+    pld [r4, #64*3]
+    vmov.i8             d10, #128
+    load_chroma_\ifmt
+    process_1l_16px \ofmt
+    subs                r8, r8, #16                                    @ width -= 16
+    bgt                 2b
+    add                 r2, r2, r3                                     @ dst   += padding
+    add                 r4, r4, r5                                     @ srcY  += paddingY
+    increment_\ifmt
     subs                r1, r1, #1                                     @ height -= 1
-.endif
-
     bgt                 1b
-
     vpop                {q4-q7}
     pop                 {r4-r12, lr}
     mov                 pc, lr
-- 
2.7.4



More information about the ffmpeg-devel mailing list