[FFmpeg-devel] [PATCH] sws/aarch64: restore hscale for a limited subset of filter sizes
Clément Bœsch
u at pkh.me
Mon Apr 4 10:35:16 CEST 2016
From: Clément Bœsch <clement at stupeflix.com>
---
I need to add a &3 (mod4) version now... I don't know if it can be any smaller.
---
libswscale/aarch64/hscale.S | 2 +-
libswscale/aarch64/swscale.c | 5 +++--
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/libswscale/aarch64/hscale.S b/libswscale/aarch64/hscale.S
index c32394c..e6bd365 100644
--- a/libswscale/aarch64/hscale.S
+++ b/libswscale/aarch64/hscale.S
@@ -20,7 +20,7 @@
#include "libavutil/aarch64/asm.S"
-function ff_hscale_8_to_15_neon, export=1
+function ff_hscale_8_to_15_X8_neon, export=1
add x10, x4, w6, UXTW #1 // filter2 = filter + filterSize*2 (x2 because int16)
1: ldr w8, [x5], #4 // filterPos[0]
ldr w9, [x5], #4 // filterPos[1]
diff --git a/libswscale/aarch64/swscale.c b/libswscale/aarch64/swscale.c
index ebf76a5..f38effe 100644
--- a/libswscale/aarch64/swscale.c
+++ b/libswscale/aarch64/swscale.c
@@ -21,7 +21,7 @@
#include "libswscale/swscale_internal.h"
#include "libavutil/aarch64/cpu.h"
-void ff_hscale_8_to_15_neon(SwsContext *c, int16_t *dst, int dstW,
+void ff_hscale_8_to_15_X8_neon(SwsContext *c, int16_t *dst, int dstW,
const uint8_t *src, const int16_t *filter,
const int32_t *filterPos, int filterSize);
@@ -31,7 +31,8 @@ av_cold void ff_sws_init_swscale_aarch64(SwsContext *c)
if (have_neon(cpu_flags)) {
if (c->srcBpc == 8 && c->dstBpc <= 14) {
- //c->hyScale = c->hcScale = ff_hscale_8_to_15_neon;
+ if (c->hLumFilterSize & 7) c->hyScale = ff_hscale_8_to_15_X8_neon;
+ if (c->hChrFilterSize & 7) c->hcScale = ff_hscale_8_to_15_X8_neon;
}
}
}
--
2.7.4
More information about the ffmpeg-devel
mailing list