[FFmpeg-cvslog] x86: hevc_mc: load less data in epel filters

Christophe Gisquet git at videolan.org
Sun Jul 27 19:15:48 CEST 2014


ffmpeg | branch: master | Christophe Gisquet <christophe.gisquet at gmail.com> | Fri Jul 25 15:08:49 2014 +0200| [81943a10b5007825892bac4582659fa7f74c4025] | committer: Michael Niedermayer

x86: hevc_mc: load less data in epel filters

Before:
5679 decicycles in epel_bi, 2059976 runs, 37176 skips
3468 decicycles in epel_uni, 1040886 runs, 7690 skips

After:
5323 decicycles in epel_bi, 2059493 runs, 37659 skips
3262 decicycles in epel_uni, 1040871 runs, 7705 skips

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=81943a10b5007825892bac4582659fa7f74c4025
---

 libavcodec/x86/hevc_mc.asm |   22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index 545f556..9cfebb8 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -176,15 +176,23 @@ QPEL_TABLE 12, 4, w, sse4
 %else
     %define rfilterq %2
 %endif
-    movdqu            m0, [rfilterq ]            ;load 128bit of x
+%if (%1 == 8 && %4 <= 4)
+%define %%load movd
+%elif (%1 == 8 && %4 <= 8) || (%1 > 8 && %4 <= 4)
+%define %%load movq
+%else
+%define %%load movdqu
+%endif
+
+    %%load            m0, [rfilterq ]
 %ifnum %3
-    movdqu            m1, [rfilterq+  %3]        ;load 128bit of x+stride
-    movdqu            m2, [rfilterq+2*%3]        ;load 128bit of x+2*stride
-    movdqu            m3, [rfilterq+3*%3]        ;load 128bit of x+3*stride
+    %%load            m1, [rfilterq+  %3]
+    %%load            m2, [rfilterq+2*%3]
+    %%load            m3, [rfilterq+3*%3]
 %else
-    movdqu            m1, [rfilterq+  %3q]       ;load 128bit of x+stride
-    movdqu            m2, [rfilterq+2*%3q]       ;load 128bit of x+2*stride
-    movdqu            m3, [rfilterq+r3srcq]      ;load 128bit of x+2*stride
+    %%load            m1, [rfilterq+  %3q]
+    %%load            m2, [rfilterq+2*%3q]
+    %%load            m3, [rfilterq+r3srcq]
 %endif
 
 %if %1 == 8



More information about the ffmpeg-cvslog mailing list