[FFmpeg-devel] [PATCH 2/2] x86/hevc_deblock: load less data in hevc_h_loop_filter_luma_8
James Almer
jamrial at gmail.com
Mon Jul 28 21:17:35 CEST 2014
Reading 8 bytes is enough.
Signed-off-by: James Almer <jamrial at gmail.com>
---
libavcodec/x86/hevc_deblock.asm | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm
index f7cd031..ecebd36 100644
--- a/libavcodec/x86/hevc_deblock.asm
+++ b/libavcodec/x86/hevc_deblock.asm
@@ -799,14 +799,14 @@ cglobal hevc_h_loop_filter_luma_8, 4, 15, 16, pix, stride, beta, tc, count, pix0
mov pix0q, pixq
sub pix0q, src3strideq
sub pix0q, strideq
- movdqu m0, [pix0q]; p3
- movdqu m1, [pix0q + strideq]; p2
- movdqu m2, [pix0q + 2 * strideq]; p1
- movdqu m3, [pix0q + src3strideq]; p0
- movdqu m4, [pixq]; q0
- movdqu m5, [pixq + strideq]; q1
- movdqu m6, [pixq + 2 * strideq]; q2
- movdqu m7, [pixq + src3strideq]; q3
+ movq m0, [pix0q]; p3
+ movq m1, [pix0q + strideq]; p2
+ movq m2, [pix0q + 2 * strideq]; p1
+ movq m3, [pix0q + src3strideq]; p0
+ movq m4, [pixq]; q0
+ movq m5, [pixq + strideq]; q1
+ movq m6, [pixq + 2 * strideq]; q2
+ movq m7, [pixq + src3strideq]; q3
pxor m8, m8
punpcklbw m0, m8
punpcklbw m1, m8
--
1.8.5.5
More information about the ffmpeg-devel
mailing list