[FFmpeg-cvslog] avcodec/arm64: fix inverted register order in transpose_4x4H

Janne Grunau git at videolan.org
Sat Dec 19 03:59:41 CET 2015


ffmpeg | branch: master | Janne Grunau <janne-libav at jannau.net> | Fri Dec 18 11:27:05 2015 +0100| [2dba0407fdb855bbe44c888232d58ddb2fd3a412] | committer: Michael Niedermayer

avcodec/arm64: fix inverted register order in transpose_4x4H

Fix related register order issue in ff_h264_idct_add_neon.

Found-by: zjh8890 <243186085 at qq.com>

Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2dba0407fdb855bbe44c888232d58ddb2fd3a412
---

 libavcodec/aarch64/h264idct_neon.S |    4 ++--
 libavcodec/aarch64/neon.S          |    4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/aarch64/h264idct_neon.S b/libavcodec/aarch64/h264idct_neon.S
index 04b5a47..91f1e77 100644
--- a/libavcodec/aarch64/h264idct_neon.S
+++ b/libavcodec/aarch64/h264idct_neon.S
@@ -37,8 +37,8 @@ function ff_h264_idct_add_neon, export=1
         sub             v7.4H,  v16.4H, v3.4H
         add             v0.4H,  v4.4H,  v6.4H
         add             v1.4H,  v5.4H,  v7.4H
-        sub             v2.4H,  v4.4H,  v6.4H
-        sub             v3.4H,  v5.4H,  v7.4H
+        sub             v3.4H,  v4.4H,  v6.4H
+        sub             v2.4H,  v5.4H,  v7.4H
 
         transpose_4x4H  v0, v1, v2, v3, v4, v5, v6, v7
 
diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S
index 619aec6..a227cbd 100644
--- a/libavcodec/aarch64/neon.S
+++ b/libavcodec/aarch64/neon.S
@@ -107,8 +107,8 @@
 .macro  transpose_4x4H  r0, r1, r2, r3, r4, r5, r6, r7
         trn1            \r4\().4H,  \r0\().4H,  \r1\().4H
         trn2            \r5\().4H,  \r0\().4H,  \r1\().4H
-        trn1            \r7\().4H,  \r3\().4H,  \r2\().4H
-        trn2            \r6\().4H,  \r3\().4H,  \r2\().4H
+        trn1            \r7\().4H,  \r2\().4H,  \r3\().4H
+        trn2            \r6\().4H,  \r2\().4H,  \r3\().4H
         trn1            \r0\().2S,  \r4\().2S,  \r7\().2S
         trn2            \r3\().2S,  \r4\().2S,  \r7\().2S
         trn1            \r1\().2S,  \r5\().2S,  \r6\().2S



More information about the ffmpeg-cvslog mailing list