[FFmpeg-cvslog] Merge commit '07e1f99a1bb41d1a615676140eefc85cf69fa793'

Clément Bœsch git at videolan.org
Mon Mar 20 19:38:40 EET 2017


ffmpeg | branch: master | Clément Bœsch <u at pkh.me> | Mon Mar 20 18:38:07 2017 +0100| [3898e346b33515897d6ea83369f39e9d10a419bb] | committer: Clément Bœsch

Merge commit '07e1f99a1bb41d1a615676140eefc85cf69fa793'

* commit '07e1f99a1bb41d1a615676140eefc85cf69fa793':
  x86util: Document SBUTTERFLY macro

Merged-by: Clément Bœsch <u at pkh.me>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3898e346b33515897d6ea83369f39e9d10a419bb
---

 libavutil/x86/x86util.asm | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index c50ddc6..de7d2d1 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -34,6 +34,16 @@
     [base],           [base  + stride],   [base  + 2*stride], [base3], \
     [base3 + stride], [base3 + 2*stride], [base3 + stride3],  [base3 + stride*4]
 
+; Interleave low src0 with low src1 and store in src0,
+; interleave high src0 with high src1 and store in src1.
+; %1 - types
+; %2 - index of the register with src0
+; %3 - index of the register with src1
+; %4 - index of the register for intermediate results
+; example for %1 - wd: input: src0: x0 x1 x2 x3 z0 z1 z2 z3
+;                             src1: y0 y1 y2 y3 q0 q1 q2 q3
+;                     output: src0: x0 y0 x1 y1 x2 y2 x3 y3
+;                             src1: z0 q0 z1 q1 z2 q2 z3 q3
 %macro SBUTTERFLY 4
 %ifidn %1, dqqq
     vperm2i128  m%4, m%2, m%3, q0301


======================================================================

diff --cc libavutil/x86/x86util.asm
index c50ddc6,bba958e..de7d2d1
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@@ -29,16 -29,18 +29,26 @@@
  
  %include "libavutil/x86/x86inc.asm"
  
 +; expands to [base],...,[base+7*stride]
 +%define PASS8ROWS(base, base3, stride, stride3) \
 +    [base],           [base  + stride],   [base  + 2*stride], [base3], \
 +    [base3 + stride], [base3 + 2*stride], [base3 + stride3],  [base3 + stride*4]
 +
+ ; Interleave low src0 with low src1 and store in src0,
+ ; interleave high src0 with high src1 and store in src1.
+ ; %1 - types
+ ; %2 - index of the register with src0
+ ; %3 - index of the register with src1
+ ; %4 - index of the register for intermediate results
+ ; example for %1 - wd: input: src0: x0 x1 x2 x3 z0 z1 z2 z3
+ ;                             src1: y0 y1 y2 y3 q0 q1 q2 q3
+ ;                     output: src0: x0 y0 x1 y1 x2 y2 x3 y3
+ ;                             src1: z0 q0 z1 q1 z2 q2 z3 q3
  %macro SBUTTERFLY 4
 -%if avx_enabled == 0
 +%ifidn %1, dqqq
 +    vperm2i128  m%4, m%2, m%3, q0301
 +    vinserti128 m%2, m%2, xm%3, 1
 +%elif avx_enabled == 0
      mova      m%4, m%2
      punpckl%1 m%2, m%3
      punpckh%1 m%4, m%3



More information about the ffmpeg-cvslog mailing list