[FFmpeg-cvslog] x86/vp9lpf: add a preload system in FILTER_UPDATE.
Clément Bœsch
git at videolan.org
Mon Jan 27 22:43:04 CET 2014
ffmpeg | branch: master | Clément Bœsch <u at pkh.me> | Sat Jan 25 19:04:56 2014 +0100| [822385d77598948169ba97996c69abe519988bde] | committer: Clément Bœsch
x86/vp9lpf: add a preload system in FILTER_UPDATE.
Allow some macro refactoring in filter14().
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=822385d77598948169ba97996c69abe519988bde
---
libavcodec/x86/vp9lpf.asm | 47 +++++++++++++++++++--------------------------
1 file changed, 20 insertions(+), 27 deletions(-)
diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index a174b73..08e69ef 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -93,11 +93,16 @@ SECTION .text
mova %5, %1
%endmacro
-%macro FILTER_UPDATE 11-12 ; tmp1, tmp2, cacheL, cacheH, dstp, -, -, +, +, rshift, [source]
+%macro FILTER_UPDATE 11-14 ; tmp1, tmp2, cacheL, cacheH, dstp, -, -, +, +, rshift, [source], [preload reg + value]
+%if %0 == 13 ; no source + preload
+ mova %12, %13
+%elif %0 == 14 ; source + preload
+ mova %13, %14
+%endif
FILTER_SUBx2_ADDx2 %1, l, %3, %6, %7, %8, %9, %10
FILTER_SUBx2_ADDx2 %2, h, %4, %6, %7, %8, %9, %10
packuswb %1, %2
-%if %0 == 12
+%if %0 == 12 || %0 == 14
MASK_APPLY %1, %12, %11, %2
%else
MASK_APPLY %1, %5, %11, %2
@@ -537,31 +542,19 @@ SECTION .text
mova m8, [P5]
mova m9, [P4]
FILTER_INIT m4, m5, m6, m7, [P6], 14, m1, m3
- FILTER_UPDATE m6, m7, m4, m5, [P5], m2, m3, m8, m13, 4, m1, m8 ; [p5] -p7 -p6 +p5 +q1
- mova m13, [Q2]
- FILTER_UPDATE m4, m5, m6, m7, [P4], m2, m8, m9, m13, 4, m1, m9 ; [p4] -p7 -p5 +p4 +q2
- mova m13, [Q3]
- FILTER_UPDATE m6, m7, m4, m5, [P3], m2, m9, m14, m13, 4, m1, m14 ; [p3] -p7 -p4 +p3 +q3
- mova m13, [Q4]
- FILTER_UPDATE m4, m5, m6, m7, [P2], m2, m14, m15, m13, 4, m1 ; [p2] -p7 -p3 +p2 +q4
- mova m13, [Q5]
- FILTER_UPDATE m6, m7, m4, m5, [P1], m2, m15, m10, m13, 4, m1 ; [p1] -p7 -p2 +p1 +q5
- mova m13, [Q6]
- FILTER_UPDATE m4, m5, m6, m7, [P0], m2, m10, m11, m13, 4, m1 ; [p0] -p7 -p1 +p0 +q6
- mova m13, [Q7]
- FILTER_UPDATE m6, m7, m4, m5, [Q0], m2, m11, m12, m13, 4, m1 ; [q0] -p7 -p0 +q0 +q7
- mova m2, [Q1]
- FILTER_UPDATE m4, m5, m6, m7, [Q1], m3, m12, m2, m13, 4, m1 ; [q1] -p6 -q0 +q1 +q7
- mova m3, [Q2]
- FILTER_UPDATE m6, m7, m4, m5, [Q2], m8, m2, m3, m13, 4, m1 ; [q2] -p5 -q1 +q2 +q7
- mova m8, [Q3]
- FILTER_UPDATE m4, m5, m6, m7, [Q3], m9, m3, m8, m13, 4, m1, m8 ; [q3] -p4 -q2 +q3 +q7
- mova m9, [Q4]
- FILTER_UPDATE m6, m7, m4, m5, [Q4], m14, m8, m9, m13, 4, m1, m9 ; [q4] -p3 -q3 +q4 +q7
- mova m14, [Q5]
- FILTER_UPDATE m4, m5, m6, m7, [Q5], m15, m9, m14, m13, 4, m1, m14 ; [q5] -p2 -q4 +q5 +q7
- mova m15, [Q6]
- FILTER_UPDATE m6, m7, m4, m5, [Q6], m10, m14, m15, m13, 4, m1, m15 ; [q6] -p1 -q5 +q6 +q7
+ FILTER_UPDATE m6, m7, m4, m5, [P5], m2, m3, m8, m13, 4, m1, m8 ; [p5] -p7 -p6 +p5 +q1
+ FILTER_UPDATE m4, m5, m6, m7, [P4], m2, m8, m9, m13, 4, m1, m9, m13, [Q2] ; [p4] -p7 -p5 +p4 +q2
+ FILTER_UPDATE m6, m7, m4, m5, [P3], m2, m9, m14, m13, 4, m1, m14, m13, [Q3] ; [p3] -p7 -p4 +p3 +q3
+ FILTER_UPDATE m4, m5, m6, m7, [P2], m2, m14, m15, m13, 4, m1, m13, [Q4] ; [p2] -p7 -p3 +p2 +q4
+ FILTER_UPDATE m6, m7, m4, m5, [P1], m2, m15, m10, m13, 4, m1, m13, [Q5] ; [p1] -p7 -p2 +p1 +q5
+ FILTER_UPDATE m4, m5, m6, m7, [P0], m2, m10, m11, m13, 4, m1, m13, [Q6] ; [p0] -p7 -p1 +p0 +q6
+ FILTER_UPDATE m6, m7, m4, m5, [Q0], m2, m11, m12, m13, 4, m1, m13, [Q7] ; [q0] -p7 -p0 +q0 +q7
+ FILTER_UPDATE m4, m5, m6, m7, [Q1], m3, m12, m2, m13, 4, m1, m2, [Q1] ; [q1] -p6 -q0 +q1 +q7
+ FILTER_UPDATE m6, m7, m4, m5, [Q2], m8, m2, m3, m13, 4, m1, m3, [Q2] ; [q2] -p5 -q1 +q2 +q7
+ FILTER_UPDATE m4, m5, m6, m7, [Q3], m9, m3, m8, m13, 4, m1, m8, m8, [Q3] ; [q3] -p4 -q2 +q3 +q7
+ FILTER_UPDATE m6, m7, m4, m5, [Q4], m14, m8, m9, m13, 4, m1, m9, m9, [Q4] ; [q4] -p3 -q3 +q4 +q7
+ FILTER_UPDATE m4, m5, m6, m7, [Q5], m15, m9, m14, m13, 4, m1, m14, m14, [Q5] ; [q5] -p2 -q4 +q5 +q7
+ FILTER_UPDATE m6, m7, m4, m5, [Q6], m10, m14, m15, m13, 4, m1, m15, m15, [Q6] ; [q6] -p1 -q5 +q6 +q7
%ifidn %1, h
mova m0, [P7]
More information about the ffmpeg-cvslog
mailing list