[FFmpeg-devel] [PATCH] sws/aarch64: restore hscale for a limited subset of filter sizes

Clément Bœsch u at pkh.me
Mon Apr 4 10:35:16 CEST 2016


From: Clément Bœsch <clement at stupeflix.com>

---
I need to add a &3 (mod4) version now... I don't know if it can be any smaller.
---
 libswscale/aarch64/hscale.S  | 2 +-
 libswscale/aarch64/swscale.c | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/libswscale/aarch64/hscale.S b/libswscale/aarch64/hscale.S
index c32394c..e6bd365 100644
--- a/libswscale/aarch64/hscale.S
+++ b/libswscale/aarch64/hscale.S
@@ -20,7 +20,7 @@
 
 #include "libavutil/aarch64/asm.S"
 
-function ff_hscale_8_to_15_neon, export=1
+function ff_hscale_8_to_15_X8_neon, export=1
         add                 x10, x4, w6, UXTW #1        // filter2 = filter + filterSize*2 (x2 because int16)
 1:      ldr                 w8, [x5], #4                // filterPos[0]
         ldr                 w9, [x5], #4                // filterPos[1]
diff --git a/libswscale/aarch64/swscale.c b/libswscale/aarch64/swscale.c
index ebf76a5..f38effe 100644
--- a/libswscale/aarch64/swscale.c
+++ b/libswscale/aarch64/swscale.c
@@ -21,7 +21,7 @@
 #include "libswscale/swscale_internal.h"
 #include "libavutil/aarch64/cpu.h"
 
-void ff_hscale_8_to_15_neon(SwsContext *c, int16_t *dst, int dstW,
+void ff_hscale_8_to_15_X8_neon(SwsContext *c, int16_t *dst, int dstW,
                             const uint8_t *src, const int16_t *filter,
                             const int32_t *filterPos, int filterSize);
 
@@ -31,7 +31,8 @@ av_cold void ff_sws_init_swscale_aarch64(SwsContext *c)
 
     if (have_neon(cpu_flags)) {
         if (c->srcBpc == 8 && c->dstBpc <= 14) {
-            //c->hyScale = c->hcScale = ff_hscale_8_to_15_neon;
+            if (c->hLumFilterSize & 7) c->hyScale = ff_hscale_8_to_15_X8_neon;
+            if (c->hChrFilterSize & 7) c->hcScale = ff_hscale_8_to_15_X8_neon;
         }
     }
 }
-- 
2.7.4



More information about the ffmpeg-devel mailing list