[FFmpeg-cvslog] ARM: clean up NEON put/avg_pixels macros
Mans Rullgard
git at videolan.org
Fri Dec 2 01:00:55 CET 2011
ffmpeg | branch: master | Mans Rullgard <mans at mansr.com> | Thu Dec 1 05:04:32 2011 +0000| [94267ddfb25ccb6ebd9f22249894586279499aea] | committer: Mans Rullgard
ARM: clean up NEON put/avg_pixels macros
Although this adds a few lines, the macro calls are less convoluted.
Signed-off-by: Mans Rullgard <mans at mansr.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=94267ddfb25ccb6ebd9f22249894586279499aea
---
libavcodec/arm/dsputil_neon.S | 153 +++++++++++++++++++++++------------------
1 files changed, 85 insertions(+), 68 deletions(-)
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S
index 3396913..6f0a6e9 100644
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -40,10 +40,10 @@ function ff_clear_blocks_neon, export=1
bx lr
endfunc
- .macro pixels16 avg=0
-.if \avg
+.macro pixels16 rnd=1, avg=0
+ .if \avg
mov ip, r0
-.endif
+ .endif
1: vld1.64 {d0, d1}, [r1], r2
vld1.64 {d2, d3}, [r1], r2
vld1.64 {d4, d5}, [r1], r2
@@ -52,7 +52,7 @@ endfunc
pld [r1]
pld [r1, r2]
pld [r1, r2, lsl #1]
-.if \avg
+ .if \avg
vld1.64 {d16,d17}, [ip,:128], r2
vrhadd.u8 q0, q0, q8
vld1.64 {d18,d19}, [ip,:128], r2
@@ -61,7 +61,7 @@ endfunc
vrhadd.u8 q2, q2, q10
vld1.64 {d22,d23}, [ip,:128], r2
vrhadd.u8 q3, q3, q11
-.endif
+ .endif
subs r3, r3, #4
vst1.64 {d0, d1}, [r0,:128], r2
vst1.64 {d2, d3}, [r0,:128], r2
@@ -69,31 +69,31 @@ endfunc
vst1.64 {d6, d7}, [r0,:128], r2
bne 1b
bx lr
- .endm
+.endm
- .macro pixels16_x2 vhadd=vrhadd.u8
+.macro pixels16_x2 rnd=1, avg=0
1: vld1.64 {d0-d2}, [r1], r2
vld1.64 {d4-d6}, [r1], r2
pld [r1]
pld [r1, r2]
subs r3, r3, #2
vext.8 q1, q0, q1, #1
- \vhadd q0, q0, q1
+ avg q0, q0, q1
vext.8 q3, q2, q3, #1
- \vhadd q2, q2, q3
+ avg q2, q2, q3
vst1.64 {d0, d1}, [r0,:128], r2
vst1.64 {d4, d5}, [r0,:128], r2
bne 1b
bx lr
- .endm
+.endm
- .macro pixels16_y2 vhadd=vrhadd.u8
+.macro pixels16_y2 rnd=1, avg=0
vld1.64 {d0, d1}, [r1], r2
vld1.64 {d2, d3}, [r1], r2
1: subs r3, r3, #2
- \vhadd q2, q0, q1
+ avg q2, q0, q1
vld1.64 {d0, d1}, [r1], r2
- \vhadd q3, q0, q1
+ avg q3, q0, q1
vld1.64 {d2, d3}, [r1], r2
pld [r1]
pld [r1, r2]
@@ -101,14 +101,14 @@ endfunc
vst1.64 {d6, d7}, [r0,:128], r2
bne 1b
bx lr
- .endm
+.endm
- .macro pixels16_xy2 vshrn=vrshrn.u16 no_rnd=0
+.macro pixels16_xy2 rnd=1, avg=0
vld1.64 {d0-d2}, [r1], r2
vld1.64 {d4-d6}, [r1], r2
-.if \no_rnd
+ .ifeq \rnd
vmov.i16 q13, #1
-.endif
+ .endif
pld [r1]
pld [r1, r2]
vext.8 q1, q0, q1, #1
@@ -121,40 +121,40 @@ endfunc
vld1.64 {d0-d2}, [r1], r2
vadd.u16 q12, q8, q9
pld [r1]
-.if \no_rnd
+ .ifeq \rnd
vadd.u16 q12, q12, q13
-.endif
+ .endif
vext.8 q15, q0, q1, #1
vadd.u16 q1 , q10, q11
- \vshrn d28, q12, #2
-.if \no_rnd
+ shrn d28, q12, #2
+ .ifeq \rnd
vadd.u16 q1, q1, q13
-.endif
- \vshrn d29, q1, #2
+ .endif
+ shrn d29, q1, #2
vaddl.u8 q8, d0, d30
vld1.64 {d2-d4}, [r1], r2
vaddl.u8 q10, d1, d31
vst1.64 {d28,d29}, [r0,:128], r2
vadd.u16 q12, q8, q9
pld [r1, r2]
-.if \no_rnd
+ .ifeq \rnd
vadd.u16 q12, q12, q13
-.endif
+ .endif
vext.8 q2, q1, q2, #1
vadd.u16 q0, q10, q11
- \vshrn d30, q12, #2
-.if \no_rnd
+ shrn d30, q12, #2
+ .ifeq \rnd
vadd.u16 q0, q0, q13
-.endif
- \vshrn d31, q0, #2
+ .endif
+ shrn d31, q0, #2
vaddl.u8 q9, d2, d4
vaddl.u8 q11, d3, d5
vst1.64 {d30,d31}, [r0,:128], r2
bgt 1b
bx lr
- .endm
+.endm
- .macro pixels8 avg=0
+.macro pixels8 rnd=1, avg=0
1: vld1.64 {d0}, [r1], r2
vld1.64 {d1}, [r1], r2
vld1.64 {d2}, [r1], r2
@@ -163,7 +163,7 @@ endfunc
pld [r1]
pld [r1, r2]
pld [r1, r2, lsl #1]
-.if \avg
+ .if \avg
vld1.64 {d4}, [r0,:64], r2
vrhadd.u8 d0, d0, d4
vld1.64 {d5}, [r0,:64], r2
@@ -173,7 +173,7 @@ endfunc
vld1.64 {d7}, [r0,:64], r2
vrhadd.u8 d3, d3, d7
sub r0, r0, r2, lsl #2
-.endif
+ .endif
subs r3, r3, #4
vst1.64 {d0}, [r0,:64], r2
vst1.64 {d1}, [r0,:64], r2
@@ -181,9 +181,9 @@ endfunc
vst1.64 {d3}, [r0,:64], r2
bne 1b
bx lr
- .endm
+.endm
- .macro pixels8_x2 vhadd=vrhadd.u8
+.macro pixels8_x2 rnd=1, avg=0
1: vld1.64 {d0, d1}, [r1], r2
vext.8 d1, d0, d1, #1
vld1.64 {d2, d3}, [r1], r2
@@ -192,20 +192,20 @@ endfunc
pld [r1, r2]
subs r3, r3, #2
vswp d1, d2
- \vhadd q0, q0, q1
+ avg q0, q0, q1
vst1.64 {d0}, [r0,:64], r2
vst1.64 {d1}, [r0,:64], r2
bne 1b
bx lr
- .endm
+.endm
- .macro pixels8_y2 vhadd=vrhadd.u8
+.macro pixels8_y2 rnd=1, avg=0
vld1.64 {d0}, [r1], r2
vld1.64 {d1}, [r1], r2
1: subs r3, r3, #2
- \vhadd d4, d0, d1
+ avg d4, d0, d1
vld1.64 {d0}, [r1], r2
- \vhadd d5, d0, d1
+ avg d5, d0, d1
vld1.64 {d1}, [r1], r2
pld [r1]
pld [r1, r2]
@@ -213,14 +213,14 @@ endfunc
vst1.64 {d5}, [r0,:64], r2
bne 1b
bx lr
- .endm
+.endm
- .macro pixels8_xy2 vshrn=vrshrn.u16 no_rnd=0
+.macro pixels8_xy2 rnd=1, avg=0
vld1.64 {d0, d1}, [r1], r2
vld1.64 {d2, d3}, [r1], r2
-.if \no_rnd
+ .ifeq \rnd
vmov.i16 q11, #1
-.endif
+ .endif
pld [r1]
pld [r1, r2]
vext.8 d4, d0, d1, #1
@@ -232,66 +232,83 @@ endfunc
pld [r1]
vadd.u16 q10, q8, q9
vext.8 d4, d0, d1, #1
-.if \no_rnd
+ .ifeq \rnd
vadd.u16 q10, q10, q11
-.endif
+ .endif
vaddl.u8 q8, d0, d4
- \vshrn d5, q10, #2
+ shrn d5, q10, #2
vld1.64 {d2, d3}, [r1], r2
vadd.u16 q10, q8, q9
pld [r1, r2]
-.if \no_rnd
+ .ifeq \rnd
vadd.u16 q10, q10, q11
-.endif
+ .endif
vst1.64 {d5}, [r0,:64], r2
- \vshrn d7, q10, #2
+ shrn d7, q10, #2
vext.8 d6, d2, d3, #1
vaddl.u8 q9, d2, d6
vst1.64 {d7}, [r0,:64], r2
bgt 1b
bx lr
- .endm
-
- .macro pixfunc pfx name suf rnd_op args:vararg
+.endm
+
+.macro pixfunc pfx, name, suf, rnd=1, avg=0
+ .if \rnd
+ .macro avg rd, rn, rm
+ vrhadd.u8 \rd, \rn, \rm
+ .endm
+ .macro shrn rd, rn, rm
+ vrshrn.u16 \rd, \rn, \rm
+ .endm
+ .else
+ .macro avg rd, rn, rm
+ vhadd.u8 \rd, \rn, \rm
+ .endm
+ .macro shrn rd, rn, rm
+ vshrn.u16 \rd, \rn, \rm
+ .endm
+ .endif
function ff_\pfx\name\suf\()_neon, export=1
- \name \rnd_op \args
+ \name \rnd, \avg
endfunc
- .endm
+ .purgem avg
+ .purgem shrn
+.endm
- .macro pixfunc2 pfx name args:vararg
- pixfunc \pfx \name
- pixfunc \pfx \name \args
- .endm
+.macro pixfunc2 pfx, name, avg=0
+ pixfunc \pfx, \name, rnd=1, avg=\avg
+ pixfunc \pfx, \name, _no_rnd, rnd=0, avg=\avg
+.endm
function ff_put_h264_qpel16_mc00_neon, export=1
mov r3, #16
endfunc
- pixfunc put_ pixels16
- pixfunc2 put_ pixels16_x2, _no_rnd, vhadd.u8
- pixfunc2 put_ pixels16_y2, _no_rnd, vhadd.u8
- pixfunc2 put_ pixels16_xy2, _no_rnd, vshrn.u16, 1
+ pixfunc put_, pixels16, avg=0
+ pixfunc2 put_, pixels16_x2, avg=0
+ pixfunc2 put_, pixels16_y2, avg=0
+ pixfunc2 put_, pixels16_xy2, avg=0
function ff_avg_h264_qpel16_mc00_neon, export=1
mov r3, #16
endfunc
- pixfunc avg_ pixels16,, 1
+ pixfunc avg_, pixels16, avg=1
function ff_put_h264_qpel8_mc00_neon, export=1
mov r3, #8
endfunc
- pixfunc put_ pixels8
- pixfunc2 put_ pixels8_x2, _no_rnd, vhadd.u8
- pixfunc2 put_ pixels8_y2, _no_rnd, vhadd.u8
- pixfunc2 put_ pixels8_xy2, _no_rnd, vshrn.u16, 1
+ pixfunc put_, pixels8, avg=0
+ pixfunc2 put_, pixels8_x2, avg=0
+ pixfunc2 put_, pixels8_y2, avg=0
+ pixfunc2 put_, pixels8_xy2, avg=0
function ff_avg_h264_qpel8_mc00_neon, export=1
mov r3, #8
endfunc
- pixfunc avg_ pixels8,, 1
+ pixfunc avg_, pixels8, avg=1
function ff_put_pixels_clamped_neon, export=1
vld1.64 {d16-d19}, [r0,:128]!
More information about the ffmpeg-cvslog
mailing list