[FFmpeg-devel] [PATCH 3/6] x86: hevc_mc: clean EPEL_FILTER macro

Christophe Gisquet christophe.gisquet at gmail.com
Sun Jun 1 16:12:59 CEST 2014


It consumes an extra reg for no benefit.
---
 libavcodec/x86/hevc_mc.asm | 49 ++++++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index 5cf37d0..b28dea3 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -112,21 +112,17 @@ QPEL_TABLE 10, 4, w, sse4
 %endif
 %endmacro
 
-%macro EPEL_FILTER 2-4                            ; bit depth, filter index
-%ifdef PIC
-    lea         rfilterq, [hevc_epel_filters_sse4_%1]
-%else
-    %define rfilterq hevc_epel_filters_sse4_%1
-%endif
+%macro EPEL_FILTER 2                           ; bit depth, filter index
     sub              %2q, 1
     shl              %2q, 5                      ; multiply by 32
-%if %0 == 2
-    movdqa           m14, [rfilterq + %2q]        ; get 2 first values of filters
-    movdqa           m15, [rfilterq + %2q+16]     ; get 2 last values of filters
+%ifdef PIC
+    lea              %2q, [%2q + hevc_epel_filters_sse4_%1]
+%define FILTER  %2q
 %else
-    movdqa           %3, [rfilterq + %2q]        ; get 2 first values of filters
-    movdqa           %4, [rfilterq + %2q+16]     ; get 2 last values of filters
+%define FILTER  %2q + hevc_epel_filters_sse4_%1
 %endif
+    mova              m4, [FILTER]        ; get 2 first values of filters
+    mova              m5, [FILTER+16]     ; get 2 last values of filters
 %endmacro
 
 %macro EPEL_HV_FILTER 1
@@ -554,9 +550,10 @@ cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 7, 7, 6, dst, dststride, src, srcstrid
 
 
 %macro HEVC_PUT_HEVC_EPEL 2
-cglobal hevc_put_hevc_epel_h%1_%2, 6, 7, 6, dst, dststride, src, srcstride, height, mx, rfilter
+cglobal hevc_put_hevc_epel_h%1_%2, 6, 6, 6, dst, dststride, src, srcstride, height, mx
 %assign %%stride ((%2 + 7)/8)
-    EPEL_FILTER       %2, mx, m4, m5
+    EPEL_FILTER       %2, mx
+%define rfilterq mxq
 .loop
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m4, m5
@@ -564,10 +561,11 @@ cglobal hevc_put_hevc_epel_h%1_%2, 6, 7, 6, dst, dststride, src, srcstride, heig
     LOOP_END         dst, dststride, src, srcstride
     RET
 
-cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 7, dst, dststride, src, srcstride, height, mx, rfilter
+cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 7, dst, dststride, src, srcstride, height, mx
 %assign %%stride ((%2 + 7)/8)
     movdqa            m6, [pw_%2]
-    EPEL_FILTER       %2, mx, m4, m5
+    EPEL_FILTER       %2, mx
+%define rfilterq mxq
 .loop
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m4, m5
@@ -579,9 +577,10 @@ cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 7, dst, dststride, src, srcstride,
     jnz               .loop                      ; height loop
     RET
 
-cglobal hevc_put_hevc_bi_epel_h%1_%2, 8, 9, 7, dst, dststride, src, srcstride, src2, src2stride,height, mx, rfilter
+cglobal hevc_put_hevc_bi_epel_h%1_%2, 8, 8, 7, dst, dststride, src, srcstride, src2, src2stride,height, mx
     movdqa            m6, [pw_bi_%2]
-    EPEL_FILTER       %2, mx, m4, m5
+    EPEL_FILTER       %2, mx
+%define rfilterq mxq
 .loop
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m4, m5
@@ -602,10 +601,12 @@ cglobal hevc_put_hevc_bi_epel_h%1_%2, 8, 9, 7, dst, dststride, src, srcstride, s
 ;                      int16_t* mcbuffer)
 ; ******************************
 
-cglobal hevc_put_hevc_epel_v%1_%2, 7, 8, 6, dst, dststride, src, srcstride, height, r3src, my, rfilter
+cglobal hevc_put_hevc_epel_v%1_%2, 5, 7, 6, dst, dststride, src, srcstride, height, r3src, my
+    mov              myd, mym
+    EPEL_FILTER       %2, my
     lea           r3srcq, [srcstrideq*3]
     sub             srcq, srcstrideq
-    EPEL_FILTER       %2, my, m4, m5
+%define rfilterq   myq
 .loop
     EPEL_LOAD         %2, srcq, srcstride, %1
     EPEL_COMPUTE      %2, %1, m4, m5
@@ -613,11 +614,12 @@ cglobal hevc_put_hevc_epel_v%1_%2, 7, 8, 6, dst, dststride, src, srcstride, heig
     LOOP_END          dst, dststride, src, srcstride
     RET
 
-cglobal hevc_put_hevc_uni_epel_v%1_%2, 7, 8, 7, dst, dststride, src, srcstride, height, r3src, my, rfilter
+cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 7, 7, dst, dststride, src, srcstride, height, r3src, my
+    mov              myd, mym
+    EPEL_FILTER       %2, my
     lea           r3srcq, [srcstrideq*3]
     movdqa            m6, [pw_%2]
     sub             srcq, srcstrideq
-    EPEL_FILTER       %2, my, m4, m5
 .loop
     EPEL_LOAD         %2, srcq, srcstride, %1
     EPEL_COMPUTE      %2, %1, m4, m5
@@ -630,11 +632,12 @@ cglobal hevc_put_hevc_uni_epel_v%1_%2, 7, 8, 7, dst, dststride, src, srcstride,
     RET
 
 
-cglobal hevc_put_hevc_bi_epel_v%1_%2, 9, 10, 7, dst, dststride, src, srcstride, src2, src2stride,height, r3src, my, rfilter
+cglobal hevc_put_hevc_bi_epel_v%1_%2, 8, 9, 7, dst, dststride, src, srcstride, src2, src2stride,height, r3src, my
+    mov              myd, mym
+    EPEL_FILTER       %2, my
     lea           r3srcq, [srcstrideq*3]
     movdqa            m6, [pw_bi_%2]
     sub             srcq, srcstrideq
-    EPEL_FILTER       %2, my, m4, m5
 .loop
     EPEL_LOAD         %2, srcq, srcstride, %1
     EPEL_COMPUTE      %2, %1, m4, m5
-- 
1.8.0.msysgit.0



More information about the ffmpeg-devel mailing list