[FFmpeg-cvslog] aarch64: me_cmp: Avoid using the non-unrolled codepath for the minimum unroll size
Martin Storsjö
git at videolan.org
Thu Sep 29 10:33:04 EEST 2022
ffmpeg | branch: master | Martin Storsjö <martin at martin.st> | Wed Sep 28 11:55:51 2022 +0300| [8089fe072e4552348a215d9fb4a0545ccf830763] | committer: Martin Storsjö
aarch64: me_cmp: Avoid using the non-unrolled codepath for the minimum unroll size
Signed-off-by: Martin Storsjö <martin at martin.st>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8089fe072e4552348a215d9fb4a0545ccf830763
---
libavcodec/aarch64/me_cmp_neon.S | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/libavcodec/aarch64/me_cmp_neon.S b/libavcodec/aarch64/me_cmp_neon.S
index 832a7cb22d..c710358ab7 100644
--- a/libavcodec/aarch64/me_cmp_neon.S
+++ b/libavcodec/aarch64/me_cmp_neon.S
@@ -471,7 +471,7 @@ function sse8_neon, export=1
movi v21.4s, #0
movi v20.4s, #0
cmp w4, #4
- b.le 2f
+ b.lt 2f
// make 4 iterations at once
1:
@@ -534,7 +534,7 @@ function sse4_neon, export=1
movi v16.4s, #0 // clear the result accumulator
cmp w4, #4
- b.le 2f
+ b.lt 2f
// make 4 iterations at once
1:
@@ -663,7 +663,7 @@ function vsse16_neon, export=1
cmp w4, #3 // check if we can make 3 iterations at once
usubl v31.8h, v0.8b, v1.8b // Signed difference of pix1[0] - pix2[0], first iteration
usubl2 v30.8h, v0.16b, v1.16b // Signed difference of pix1[0] - pix2[0], first iteration
- b.le 2f
+ b.lt 2f
1:
More information about the ffmpeg-cvslog
mailing list