[FFmpeg-devel] [PATCH 3/5] x86: hevc_mc: save 1 gpr in epel filter loading
Christophe Gisquet
christophe.gisquet at gmail.com
Sat Feb 7 19:49:38 CET 2015
The 3*stride value stored in r3src can be loaded much later,
so use r3src instead of a dedicated gpr when possible.
---
libavcodec/x86/hevc_mc.asm | 65 ++++++++++++++++++++++------------------------
1 file changed, 31 insertions(+), 34 deletions(-)
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index aab69dd..74e08d4 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -136,20 +136,22 @@ QPEL_TABLE 10, 8, w, avx2
%endmacro
-%macro EPEL_FILTER 2-4 ; bit depth, filter index
+%macro EPEL_FILTER 5 ; bit depth, filter index, xmma, xmmb, gprtmp
%if cpuflag(avx2)
%assign %%offset 32
%ifdef PIC
- lea rfilterq, [hevc_epel_filters_avx2_%1]
+ lea %5q, [hevc_epel_filters_avx2_%1]
+ %define FILTER %5q
%else
- %define rfilterq hevc_epel_filters_avx2_%1
+ %define FILTER hevc_epel_filters_avx2_%1
%endif
%else
%assign %%offset 16
%ifdef PIC
- lea rfilterq, [hevc_epel_filters_sse4_%1]
+ lea %5q, [hevc_epel_filters_sse4_%1]
+ %define FILTER %5q
%else
- %define rfilterq hevc_epel_filters_sse4_%1
+ %define FILTER hevc_epel_filters_sse4_%1
%endif
%endif ;cpuflag(avx2)
sub %2q, 1
@@ -158,13 +160,8 @@ QPEL_TABLE 10, 8, w, avx2
%else
shl %2q, 5 ; multiply by 32
%endif
-%if %0 == 2
- mova m14, [rfilterq + %2q] ; get 2 first values of filters
- mova m15, [rfilterq + %2q+%%offset] ; get 2 last values of filters
-%else
- mova %3, [rfilterq + %2q] ; get 2 first values of filters
- mova %4, [rfilterq + %2q+%%offset] ; get 2 last values of filters
-%endif
+ mova %3, [FILTER + %2q] ; get 2 first values of filters
+ mova %4, [FILTER + %2q+%%offset] ; get 2 last values of filters
%endmacro
%macro EPEL_HV_FILTER 1
@@ -179,7 +176,7 @@ QPEL_TABLE 10, 8, w, avx2
%endif
%ifdef PIC
- lea rfilterq, [%%table]
+ lea r3srcq, [%%table]
%else
%define rfilterq %%table
%endif
@@ -187,9 +184,8 @@ QPEL_TABLE 10, 8, w, avx2
sub myq, 1
shl mxq, %%shift ; multiply by 32
shl myq, %%shift ; multiply by 32
- mova m14, [rfilterq + mxq] ; get 2 first values of filters
- mova m15, [rfilterq + mxq+%%offset] ; get 2 last values of filters
- lea r3srcq, [srcstrideq*3]
+ mova m14, [r3srcq + mxq] ; get 2 first values of filters
+ mova m15, [r3srcq + mxq+%%offset] ; get 2 last values of filters
%if cpuflag(avx2)
%define %%table hevc_epel_filters_avx2_10
@@ -197,12 +193,13 @@ QPEL_TABLE 10, 8, w, avx2
%define %%table hevc_epel_filters_sse4_10
%endif
%ifdef PIC
- lea rfilterq, [%%table]
+ lea r3srcq, [%%table]
%else
%define rfilterq %%table
%endif
- mova m12, [rfilterq + myq] ; get 2 first values of filters
- mova m13, [rfilterq + myq+%%offset] ; get 2 last values of filters
+ mova m12, [r3srcq + myq] ; get 2 first values of filters
+ mova m13, [r3srcq + myq+%%offset] ; get 2 last values of filters
+ lea r3srcq, [srcstrideq*3]
%endmacro
%macro QPEL_FILTER 2
@@ -739,7 +736,7 @@ cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 6, 6, 6, dst, dststride, src, srcstrid
%macro HEVC_PUT_HEVC_EPEL 2
cglobal hevc_put_hevc_epel_h%1_%2, 5, 6, 11, dst, src, srcstride, height, mx, rfilter
%assign %%stride ((%2 + 7)/8)
- EPEL_FILTER %2, mx, m4, m5
+ EPEL_FILTER %2, mx, m4, m5, rfilter
.loop
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
EPEL_COMPUTE %2, %1, m4, m5, 1
@@ -750,7 +747,7 @@ cglobal hevc_put_hevc_epel_h%1_%2, 5, 6, 11, dst, src, srcstride, height, mx, rf
cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 11, dst, dststride, src, srcstride, height, mx, rfilter
%assign %%stride ((%2 + 7)/8)
movdqa m6, [pw_%2]
- EPEL_FILTER %2, mx, m4, m5
+ EPEL_FILTER %2, mx, m4, m5, rfilter
.loop
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
EPEL_COMPUTE %2, %1, m4, m5
@@ -764,7 +761,7 @@ cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 11, dst, dststride, src, srcstride,
cglobal hevc_put_hevc_bi_epel_h%1_%2, 7, 8, 11, dst, dststride, src, srcstride, src2, height, mx, rfilter
movdqa m6, [pw_bi_%2]
- EPEL_FILTER %2, mx, m4, m5
+ EPEL_FILTER %2, mx, m4, m5, rfilter
.loop
EPEL_LOAD %2, srcq-%%stride, %%stride, %1
EPEL_COMPUTE %2, %1, m4, m5, 1
@@ -784,11 +781,11 @@ cglobal hevc_put_hevc_bi_epel_h%1_%2, 7, 8, 11, dst, dststride, src, srcstride,
; int height, int mx, int my, int width)
; ******************************
-cglobal hevc_put_hevc_epel_v%1_%2, 4, 7, 11, dst, src, srcstride, height, r3src, my, rfilter
+cglobal hevc_put_hevc_epel_v%1_%2, 4, 6, 11, dst, src, srcstride, height, r3src, my
movifnidn myd, mym
- lea r3srcq, [srcstrideq*3]
sub srcq, srcstrideq
- EPEL_FILTER %2, my, m4, m5
+ EPEL_FILTER %2, my, m4, m5, r3src
+ lea r3srcq, [srcstrideq*3]
.loop
EPEL_LOAD %2, srcq, srcstride, %1
EPEL_COMPUTE %2, %1, m4, m5, 1
@@ -796,12 +793,12 @@ cglobal hevc_put_hevc_epel_v%1_%2, 4, 7, 11, dst, src, srcstride, height, r3src,
LOOP_END dst, src, srcstride
RET
-cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 8, 11, dst, dststride, src, srcstride, height, r3src, my, rfilter
+cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 7, 11, dst, dststride, src, srcstride, height, r3src, my
movifnidn myd, mym
- lea r3srcq, [srcstrideq*3]
movdqa m6, [pw_%2]
sub srcq, srcstrideq
- EPEL_FILTER %2, my, m4, m5
+ EPEL_FILTER %2, my, m4, m5, r3src
+ lea r3srcq, [srcstrideq*3]
.loop
EPEL_LOAD %2, srcq, srcstride, %1
EPEL_COMPUTE %2, %1, m4, m5
@@ -814,12 +811,12 @@ cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 8, 11, dst, dststride, src, srcstride,
RET
-cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 9, 11, dst, dststride, src, srcstride, src2, height, r3src, my, rfilter
+cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 8, 11, dst, dststride, src, srcstride, src2, height, r3src, my
movifnidn myd, mym
- lea r3srcq, [srcstrideq*3]
movdqa m6, [pw_bi_%2]
sub srcq, srcstrideq
- EPEL_FILTER %2, my, m4, m5
+ EPEL_FILTER %2, my, m4, m5, r3src
+ lea r3srcq, [srcstrideq*3]
.loop
EPEL_LOAD %2, srcq, srcstride, %1
EPEL_COMPUTE %2, %1, m4, m5, 1
@@ -842,7 +839,7 @@ cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 9, 11, dst, dststride, src, srcstride,
; ******************************
%macro HEVC_PUT_HEVC_EPEL_HV 2
-cglobal hevc_put_hevc_epel_hv%1_%2, 6, 8, 16 , dst, src, srcstride, height, mx, my, r3src, rfilter
+cglobal hevc_put_hevc_epel_hv%1_%2, 6, 7, 16 , dst, src, srcstride, height, mx, my, r3src
%assign %%stride ((%2 + 7)/8)
sub srcq, srcstrideq
EPEL_HV_FILTER %2
@@ -909,7 +906,7 @@ cglobal hevc_put_hevc_epel_hv%1_%2, 6, 8, 16 , dst, src, srcstride, height, mx,
LOOP_END dst, src, srcstride
RET
-cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 9, 16 , dst, dststride, src, srcstride, height, mx, my, r3src, rfilter
+cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 8, 16 , dst, dststride, src, srcstride, height, mx, my, r3src
%assign %%stride ((%2 + 7)/8)
sub srcq, srcstrideq
EPEL_HV_FILTER %2
@@ -973,7 +970,7 @@ cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 9, 16 , dst, dststride, src, srcstrid
jnz .loop ; height loop
RET
-cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 10, 16, dst, dststride, src, srcstride, src2, height, mx, my, r3src, rfilter
+cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 9, 16, dst, dststride, src, srcstride, src2, height, mx, my, r3src
%assign %%stride ((%2 + 7)/8)
sub srcq, srcstrideq
EPEL_HV_FILTER %2
--
1.9.2.msysgit.0
More information about the ffmpeg-devel
mailing list