[FFmpeg-devel] [PATCH 2/3] x86: hevc/sao: aligned source buffers

James Almer jamrial at gmail.com
Sun Feb 1 20:00:10 CET 2015


From: Christophe Gisquet <christophe.gisquet at gmail.com>

Usefull for at least band filter, for which:
- Band filter call only:
           32      64
Before:  16556    54015
After:   16497    52355
- Whole case:
           32      64
Before:  37031   103008
After:   32045    93952
---
 libavcodec/hevc.c           |  6 +++---
 libavcodec/x86/hevc_sao.asm | 28 ++++++++++++++--------------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c
index 7db32f7..f24cd8f 100644
--- a/libavcodec/hevc.c
+++ b/libavcodec/hevc.c
@@ -284,12 +284,12 @@ static int get_buffer_sao(HEVCContext *s, AVFrame *frame, const HEVCSPS *sps)
 {
     int ret, i;
 
-    frame->width  = s->avctx->coded_width  + 2;
-    frame->height = s->avctx->coded_height + 2;
+    frame->width  = FFALIGN(s->avctx->coded_width + 2, FF_INPUT_BUFFER_PADDING_SIZE);
+    frame->height = s->avctx->coded_height + 3;
     if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
         return ret;
     for (i = 0; frame->data[i]; i++) {
-        int offset = frame->linesize[i] + (1 << sps->pixel_shift);
+        int offset = frame->linesize[i] + FF_INPUT_BUFFER_PADDING_SIZE;
         frame->data[i] += offset;
     }
     frame->width  = s->avctx->coded_width;
diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm
index 7f36fd0..4c11730 100644
--- a/libavcodec/x86/hevc_sao.asm
+++ b/libavcodec/x86/hevc_sao.asm
@@ -104,26 +104,26 @@ align 16
 
 %assign i 0
 %rep %2
-    movu             m13, [srcq + i]
+    mova             m13, [srcq + i]
     punpcklbw         m8, m13, m14
     HEVC_SAO_BAND_FILTER_COMPUTE 8, m9,  m8
     punpckhbw        m13, m14
     HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13
     packuswb          m8, m13
-    movu      [dstq + i], m8
+    mova      [dstq + i], m8
 %assign i i+mmsize
 %endrep
 
 %if %1 == 48
 INIT_XMM cpuname
 
-    movu             m13, [srcq + i]
+    mova             m13, [srcq + i]
     punpcklbw         m8, m13, m14
     HEVC_SAO_BAND_FILTER_COMPUTE 8, m9,  m8
     punpckhbw        m13, m14
     HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13
     packuswb          m8, m13
-    movu      [dstq + i], m8
+    mova      [dstq + i], m8
 %assign i i+16
 %endif ; %1 == 48
 
@@ -143,37 +143,37 @@ cglobal hevc_sao_band_filter_%2_%1, 6, 6, 15, dst, src, dststride, srcstride, of
 align 16
 .loop
 %if %2 == 8
-    movu              m8, [srcq]
+    mova              m8, [srcq]
     HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
     CLIPW             m8, m14, m13
-    movu          [dstq], m8
+    mova          [dstq], m8
 %endif
 
 %assign i 0
 %rep %3
-    movu              m8, [srcq + i]
+    mova              m8, [srcq + i]
     HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
     CLIPW             m8, m14, m13
-    movu      [dstq + i], m8
+    mova      [dstq + i], m8
 
-    movu              m9, [srcq + i + mmsize]
+    mova              m9, [srcq + i + mmsize]
     HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9
     CLIPW             m9, m14, m13
-    movu      [dstq + i + mmsize], m9
+    mova      [dstq + i + mmsize], m9
 %assign i i+mmsize*2
 %endrep
 
 %if %2 == 48
 INIT_XMM cpuname
-    movu              m8, [srcq + i]
+    mova              m8, [srcq + i]
     HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
     CLIPW             m8, m14, m13
-    movu      [dstq + i], m8
+    mova      [dstq + i], m8
 
-    movu              m9, [srcq + i + mmsize]
+    mova              m9, [srcq + i + mmsize]
     HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9
     CLIPW             m9, m14, m13
-    movu      [dstq + i + mmsize], m9
+    mova      [dstq + i + mmsize], m9
 %assign i i+32
 %endif ; %1 == 48
 
-- 
2.2.2



More information about the ffmpeg-devel mailing list