[FFmpeg-cvslog] Merge commit '07e1f99a1bb41d1a615676140eefc85cf69fa793'
Clément Bœsch
git at videolan.org
Mon Mar 20 19:38:40 EET 2017
ffmpeg | branch: master | Clément Bœsch <u at pkh.me> | Mon Mar 20 18:38:07 2017 +0100| [3898e346b33515897d6ea83369f39e9d10a419bb] | committer: Clément Bœsch
Merge commit '07e1f99a1bb41d1a615676140eefc85cf69fa793'
* commit '07e1f99a1bb41d1a615676140eefc85cf69fa793':
x86util: Document SBUTTERFLY macro
Merged-by: Clément Bœsch <u at pkh.me>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3898e346b33515897d6ea83369f39e9d10a419bb
---
libavutil/x86/x86util.asm | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index c50ddc6..de7d2d1 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -34,6 +34,16 @@
[base], [base + stride], [base + 2*stride], [base3], \
[base3 + stride], [base3 + 2*stride], [base3 + stride3], [base3 + stride*4]
+; Interleave low src0 with low src1 and store in src0,
+; interleave high src0 with high src1 and store in src1.
+; %1 - types
+; %2 - index of the register with src0
+; %3 - index of the register with src1
+; %4 - index of the register for intermediate results
+; example for %1 - wd: input: src0: x0 x1 x2 x3 z0 z1 z2 z3
+; src1: y0 y1 y2 y3 q0 q1 q2 q3
+; output: src0: x0 y0 x1 y1 x2 y2 x3 y3
+; src1: z0 q0 z1 q1 z2 q2 z3 q3
%macro SBUTTERFLY 4
%ifidn %1, dqqq
vperm2i128 m%4, m%2, m%3, q0301
======================================================================
diff --cc libavutil/x86/x86util.asm
index c50ddc6,bba958e..de7d2d1
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@@ -29,16 -29,18 +29,26 @@@
%include "libavutil/x86/x86inc.asm"
+; expands to [base],...,[base+7*stride]
+%define PASS8ROWS(base, base3, stride, stride3) \
+ [base], [base + stride], [base + 2*stride], [base3], \
+ [base3 + stride], [base3 + 2*stride], [base3 + stride3], [base3 + stride*4]
+
+ ; Interleave low src0 with low src1 and store in src0,
+ ; interleave high src0 with high src1 and store in src1.
+ ; %1 - types
+ ; %2 - index of the register with src0
+ ; %3 - index of the register with src1
+ ; %4 - index of the register for intermediate results
+ ; example for %1 - wd: input: src0: x0 x1 x2 x3 z0 z1 z2 z3
+ ; src1: y0 y1 y2 y3 q0 q1 q2 q3
+ ; output: src0: x0 y0 x1 y1 x2 y2 x3 y3
+ ; src1: z0 q0 z1 q1 z2 q2 z3 q3
%macro SBUTTERFLY 4
-%if avx_enabled == 0
+%ifidn %1, dqqq
+ vperm2i128 m%4, m%2, m%3, q0301
+ vinserti128 m%2, m%2, xm%3, 1
+%elif avx_enabled == 0
mova m%4, m%2
punpckl%1 m%2, m%3
punpckh%1 m%4, m%3
More information about the ffmpeg-cvslog
mailing list