[FFmpeg-cvslog] Merge commit 'cdb1665f70def544ddab3e3ed3763ef99c8b3873'
Derek Buitenhuis
git at videolan.org
Sun Apr 24 13:51:52 CEST 2016
ffmpeg | branch: master | Derek Buitenhuis <derek.buitenhuis at gmail.com> | Sun Apr 24 12:51:34 2016 +0100| [87b8e9500874930667ac966ea2fabdd6222ef6e0] | committer: Derek Buitenhuis
Merge commit 'cdb1665f70def544ddab3e3ed3763ef99c8b3873'
* commit 'cdb1665f70def544ddab3e3ed3763ef99c8b3873':
aarch64: Make transpose_4x4H do a regular transpose
Merged-by: Derek Buitenhuis <derek.buitenhuis at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=87b8e9500874930667ac966ea2fabdd6222ef6e0
---
libavcodec/aarch64/h264idct_neon.S | 24 ++++++++++++------------
libavcodec/aarch64/neon.S | 12 ++++++------
2 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/libavcodec/aarch64/h264idct_neon.S b/libavcodec/aarch64/h264idct_neon.S
index 91f1e77..fa414f7 100644
--- a/libavcodec/aarch64/h264idct_neon.S
+++ b/libavcodec/aarch64/h264idct_neon.S
@@ -33,25 +33,25 @@ function ff_h264_idct_add_neon, export=1
sshr v17.4H, v3.4H, #1
st1 {v30.8H}, [x1], #16
sub v5.4H, v0.4H, v2.4H
- add v6.4H, v1.4H, v17.4H
- sub v7.4H, v16.4H, v3.4H
- add v0.4H, v4.4H, v6.4H
- add v1.4H, v5.4H, v7.4H
- sub v3.4H, v4.4H, v6.4H
- sub v2.4H, v5.4H, v7.4H
+ sub v6.4H, v16.4H, v3.4H
+ add v7.4H, v1.4H, v17.4H
+ add v0.4H, v4.4H, v7.4H
+ add v1.4H, v5.4H, v6.4H
+ sub v2.4H, v5.4H, v6.4H
+ sub v3.4H, v4.4H, v7.4H
transpose_4x4H v0, v1, v2, v3, v4, v5, v6, v7
- add v4.4H, v0.4H, v3.4H
+ add v4.4H, v0.4H, v2.4H
ld1 {v18.S}[0], [x0], x2
- sshr v16.4H, v2.4H, #1
+ sshr v16.4H, v3.4H, #1
sshr v17.4H, v1.4H, #1
- ld1 {v19.S}[1], [x0], x2
- sub v5.4H, v0.4H, v3.4H
ld1 {v18.S}[1], [x0], x2
+ sub v5.4H, v0.4H, v2.4H
+ ld1 {v19.S}[1], [x0], x2
add v6.4H, v16.4H, v1.4H
ins v4.D[1], v5.D[0]
- sub v7.4H, v2.4H, v17.4H
+ sub v7.4H, v17.4H, v3.4H
ld1 {v19.S}[0], [x0], x2
ins v6.D[1], v7.D[0]
sub x0, x0, x2, lsl #2
@@ -68,8 +68,8 @@ function ff_h264_idct_add_neon, export=1
sqxtun v1.8B, v1.8H
st1 {v0.S}[0], [x0], x2
- st1 {v1.S}[1], [x0], x2
st1 {v0.S}[1], [x0], x2
+ st1 {v1.S}[1], [x0], x2
st1 {v1.S}[0], [x0], x2
sub x1, x1, #32
diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S
index a227cbd..0fddbec 100644
--- a/libavcodec/aarch64/neon.S
+++ b/libavcodec/aarch64/neon.S
@@ -107,12 +107,12 @@
.macro transpose_4x4H r0, r1, r2, r3, r4, r5, r6, r7
trn1 \r4\().4H, \r0\().4H, \r1\().4H
trn2 \r5\().4H, \r0\().4H, \r1\().4H
- trn1 \r7\().4H, \r2\().4H, \r3\().4H
- trn2 \r6\().4H, \r2\().4H, \r3\().4H
- trn1 \r0\().2S, \r4\().2S, \r7\().2S
- trn2 \r3\().2S, \r4\().2S, \r7\().2S
- trn1 \r1\().2S, \r5\().2S, \r6\().2S
- trn2 \r2\().2S, \r5\().2S, \r6\().2S
+ trn1 \r6\().4H, \r2\().4H, \r3\().4H
+ trn2 \r7\().4H, \r2\().4H, \r3\().4H
+ trn1 \r0\().2S, \r4\().2S, \r6\().2S
+ trn2 \r2\().2S, \r4\().2S, \r6\().2S
+ trn1 \r1\().2S, \r5\().2S, \r7\().2S
+ trn2 \r3\().2S, \r5\().2S, \r7\().2S
.endm
.macro transpose_8x8H r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
======================================================================
More information about the ffmpeg-cvslog
mailing list