[FFmpeg-cvslog] arm/aarch64: vp9itxfm: Skip loading the min_eob pointer when it won' t be used

Martin Storsjö git at videolan.org
Sun Mar 19 23:01:59 EET 2017


ffmpeg | branch: master | Martin Storsjö <martin at martin.st> | Sun Feb 26 22:13:10 2017 +0200| [70317b25aa35c0907720e4d2b7686408588c07aa] | committer: Martin Storsjö

arm/aarch64: vp9itxfm: Skip loading the min_eob pointer when it won't be used

In the half/quarter cases where we don't use the min_eob array, defer
loading the pointer until we know it will be needed.

This is cherrypicked from libav commit
3a0d5e206d24d41d87a25ba16a79b2ea04c39d4c.

Signed-off-by: Martin Storsjö <martin at martin.st>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=70317b25aa35c0907720e4d2b7686408588c07aa
---

 libavcodec/aarch64/vp9itxfm_neon.S | 3 ++-
 libavcodec/arm/vp9itxfm_neon.S     | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/libavcodec/aarch64/vp9itxfm_neon.S b/libavcodec/aarch64/vp9itxfm_neon.S
index 2c3c002..3e5da08 100644
--- a/libavcodec/aarch64/vp9itxfm_neon.S
+++ b/libavcodec/aarch64/vp9itxfm_neon.S
@@ -1483,7 +1483,6 @@ function ff_vp9_idct_idct_32x32_add_neon, export=1
         b.eq            idct32x32_dc_add_neon
 
         movrel          x10, idct_coeffs
-        movrel          x12, min_eob_idct_idct_32, 2
 
         mov             x15, x30
 
@@ -1508,6 +1507,8 @@ function ff_vp9_idct_idct_32x32_add_neon, export=1
         cmp             w3,  #135
         b.le            idct32x32_half_add_neon
 
+        movrel          x12, min_eob_idct_idct_32, 2
+
 .irp i, 0, 8, 16, 24
         add             x0,  sp,  #(\i*64)
 .if \i > 0
diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index adc9896..6d4d765 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -889,8 +889,6 @@ function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1
         push            {r4-r8,lr}
 .ifnc \txfm1\()_\txfm2,idct_idct
         vpush           {q4-q7}
-.else
-        movrel          r8,  min_eob_idct_idct_16 + 2
 .endif
 
         @ Align the stack, allocate a temp buffer
@@ -914,6 +912,8 @@ A       and             r7,  sp,  #15
         ble             idct16x16_quarter_add_neon
         cmp             r3,  #38
         ble             idct16x16_half_add_neon
+
+        movrel          r8,  min_eob_idct_idct_16 + 2
 .endif
 
 .irp i, 0, 4, 8, 12



More information about the ffmpeg-cvslog mailing list