[FFmpeg-cvslog] arm: vp9itxfm: Simplify the stack alignment code

Janne Grunau git at videolan.org
Sat Jan 14 22:36:14 EET 2017


ffmpeg | branch: master | Janne Grunau <janne-libav at jannau.net> | Tue Jan 10 00:15:09 2017 +0200| [a71cd8439fd32fd83b7a9b9ac8d6f861846770c7] | committer: Michael Niedermayer

arm: vp9itxfm: Simplify the stack alignment code

This is one instruction less for thumb, and only have got
1/2 arm/thumb specific instructions.

This is cherrypicked from libav commit
e5b0fc170f85b00f7dd0ac514918fb5c95253d39.

Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a71cd8439fd32fd83b7a9b9ac8d6f861846770c7
---

 libavcodec/arm/vp9itxfm_neon.S | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index 06470a3..d7a2654 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -791,15 +791,13 @@ function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1
 .ifnc \txfm1\()_\txfm2,idct_idct
         vpush           {q4-q7}
 .endif
-        mov             r7,  sp
 
         @ Align the stack, allocate a temp buffer
-T       mov             r12, sp
-T       bic             r12, r12, #15
-T       sub             r12, r12, #512
-T       mov             sp,  r12
-A       bic             sp,  sp,  #15
-A       sub             sp,  sp,  #512
+T       mov             r7,  sp
+T       and             r7,  r7,  #15
+A       and             r7,  sp,  #15
+        add             r7,  r7,  #512
+        sub             sp,  sp,  r7
 
         mov             r4,  r0
         mov             r5,  r1
@@ -828,7 +826,7 @@ A       sub             sp,  sp,  #512
         bl              \txfm2\()16_1d_4x16_pass2_neon
 .endr
 
-        mov             sp,  r7
+        add             sp,  sp,  r7
 .ifnc \txfm1\()_\txfm2,idct_idct
         vpop            {q4-q7}
 .endif
@@ -1117,15 +1115,13 @@ function ff_vp9_idct_idct_32x32_add_neon, export=1
         beq             idct32x32_dc_add_neon
         push            {r4-r7,lr}
         vpush           {q4-q7}
-        mov             r7,  sp
 
         @ Align the stack, allocate a temp buffer
-T       mov             r12, sp
-T       bic             r12, r12, #15
-T       sub             r12, r12, #2048
-T       mov             sp,  r12
-A       bic             sp,  sp,  #15
-A       sub             sp,  sp,  #2048
+T       mov             r7,  sp
+T       and             r7,  r7,  #15
+A       and             r7,  sp,  #15
+        add             r7,  r7,  #2048
+        sub             sp,  sp,  r7
 
         mov             r4,  r0
         mov             r5,  r1
@@ -1143,7 +1139,7 @@ A       sub             sp,  sp,  #2048
         bl              idct32_1d_4x32_pass2_neon
 .endr
 
-        mov             sp,  r7
+        add             sp,  sp,  r7
         vpop            {q4-q7}
         pop             {r4-r7,pc}
 endfunc



More information about the ffmpeg-cvslog mailing list