[FFmpeg-cvslog] libavcodec: aarch64: Don't clobber v8 in the h%4 case in ff_pix_abs16_xy2_neon
Martin Storsjö
git at videolan.org
Sat Jul 16 17:33:48 EEST 2022
ffmpeg | branch: master | Martin Storsjö <martin at martin.st> | Tue Jul 12 23:51:36 2022 +0300| [02e7853fd94aa78a5f0990ee0105a291172a5eab] | committer: Martin Storsjö
libavcodec: aarch64: Don't clobber v8 in the h%4 case in ff_pix_abs16_xy2_neon
Checkasm doesn't currently test this codepath.
Signed-off-by: Martin Storsjö <martin at martin.st>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=02e7853fd94aa78a5f0990ee0105a291172a5eab
---
libavcodec/aarch64/me_cmp_neon.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libavcodec/aarch64/me_cmp_neon.S b/libavcodec/aarch64/me_cmp_neon.S
index e49d049fc2..31db3793d9 100644
--- a/libavcodec/aarch64/me_cmp_neon.S
+++ b/libavcodec/aarch64/me_cmp_neon.S
@@ -189,11 +189,11 @@ function ff_pix_abs16_xy2_neon, export=1
urshr v16.8h, v16.8h, #2 // shift right by 2 0..7 (rounding shift right)
urshr v17.8h, v17.8h, #2 // shift right by 2 8..15
- uxtl2 v8.8h, v1.16b // 8->16 bits pix1 8..15
+ uxtl2 v7.8h, v1.16b // 8->16 bits pix1 8..15
uxtl v1.8h, v1.8b // 8->16 bits pix1 0..7
uabd v6.8h, v1.8h, v16.8h // absolute difference 0..7
- uaba v6.8h, v8.8h, v17.8h // absolute difference accumulate 8..15
+ uaba v6.8h, v7.8h, v17.8h // absolute difference accumulate 8..15
mov v2.16b, v18.16b // pix3 -> pix2
mov v3.16b, v19.16b // pix3+1 -> pix2+1
uaddlv s6, v6.8h // add up accumulator in v6
More information about the ffmpeg-cvslog
mailing list