[FFmpeg-devel] [PATCH 2/3] x86: hevc/sao: aligned source buffers
James Almer
jamrial at gmail.com
Sun Feb 1 20:00:10 CET 2015
From: Christophe Gisquet <christophe.gisquet at gmail.com>
Usefull for at least band filter, for which:
- Band filter call only:
32 64
Before: 16556 54015
After: 16497 52355
- Whole case:
32 64
Before: 37031 103008
After: 32045 93952
---
libavcodec/hevc.c | 6 +++---
libavcodec/x86/hevc_sao.asm | 28 ++++++++++++++--------------
2 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c
index 7db32f7..f24cd8f 100644
--- a/libavcodec/hevc.c
+++ b/libavcodec/hevc.c
@@ -284,12 +284,12 @@ static int get_buffer_sao(HEVCContext *s, AVFrame *frame, const HEVCSPS *sps)
{
int ret, i;
- frame->width = s->avctx->coded_width + 2;
- frame->height = s->avctx->coded_height + 2;
+ frame->width = FFALIGN(s->avctx->coded_width + 2, FF_INPUT_BUFFER_PADDING_SIZE);
+ frame->height = s->avctx->coded_height + 3;
if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
return ret;
for (i = 0; frame->data[i]; i++) {
- int offset = frame->linesize[i] + (1 << sps->pixel_shift);
+ int offset = frame->linesize[i] + FF_INPUT_BUFFER_PADDING_SIZE;
frame->data[i] += offset;
}
frame->width = s->avctx->coded_width;
diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm
index 7f36fd0..4c11730 100644
--- a/libavcodec/x86/hevc_sao.asm
+++ b/libavcodec/x86/hevc_sao.asm
@@ -104,26 +104,26 @@ align 16
%assign i 0
%rep %2
- movu m13, [srcq + i]
+ mova m13, [srcq + i]
punpcklbw m8, m13, m14
HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m8
punpckhbw m13, m14
HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13
packuswb m8, m13
- movu [dstq + i], m8
+ mova [dstq + i], m8
%assign i i+mmsize
%endrep
%if %1 == 48
INIT_XMM cpuname
- movu m13, [srcq + i]
+ mova m13, [srcq + i]
punpcklbw m8, m13, m14
HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m8
punpckhbw m13, m14
HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13
packuswb m8, m13
- movu [dstq + i], m8
+ mova [dstq + i], m8
%assign i i+16
%endif ; %1 == 48
@@ -143,37 +143,37 @@ cglobal hevc_sao_band_filter_%2_%1, 6, 6, 15, dst, src, dststride, srcstride, of
align 16
.loop
%if %2 == 8
- movu m8, [srcq]
+ mova m8, [srcq]
HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
CLIPW m8, m14, m13
- movu [dstq], m8
+ mova [dstq], m8
%endif
%assign i 0
%rep %3
- movu m8, [srcq + i]
+ mova m8, [srcq + i]
HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
CLIPW m8, m14, m13
- movu [dstq + i], m8
+ mova [dstq + i], m8
- movu m9, [srcq + i + mmsize]
+ mova m9, [srcq + i + mmsize]
HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9
CLIPW m9, m14, m13
- movu [dstq + i + mmsize], m9
+ mova [dstq + i + mmsize], m9
%assign i i+mmsize*2
%endrep
%if %2 == 48
INIT_XMM cpuname
- movu m8, [srcq + i]
+ mova m8, [srcq + i]
HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
CLIPW m8, m14, m13
- movu [dstq + i], m8
+ mova [dstq + i], m8
- movu m9, [srcq + i + mmsize]
+ mova m9, [srcq + i + mmsize]
HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9
CLIPW m9, m14, m13
- movu [dstq + i + mmsize], m9
+ mova [dstq + i + mmsize], m9
%assign i i+32
%endif ; %1 == 48
--
2.2.2
More information about the ffmpeg-devel
mailing list