[FFmpeg-cvslog] aarch64: hevc_idct: Fix overflows in idct_dc
Martin Storsjö
git at videolan.org
Sat May 22 23:13:13 EEST 2021
ffmpeg | branch: release/4.4 | Martin Storsjö <martin at martin.st> | Mon May 17 12:48:03 2021 +0300| [c813f5e3436b5ba40b105cdaaaa7b1184baabde7] | committer: Martin Storsjö
aarch64: hevc_idct: Fix overflows in idct_dc
This is marginally slower, but correct for all input values.
The previous implementation failed with certain input seeds, e.g.
"checkasm --test=hevc_idct 98".
Signed-off-by: Martin Storsjö <martin at martin.st>
(cherry picked from commit f27e3ccf06ee19935d160164ca4a02f28cfc2a27)
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c813f5e3436b5ba40b105cdaaaa7b1184baabde7
---
libavcodec/aarch64/hevcdsp_idct_neon.S | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/libavcodec/aarch64/hevcdsp_idct_neon.S b/libavcodec/aarch64/hevcdsp_idct_neon.S
index 28c11e632c..0869431294 100644
--- a/libavcodec/aarch64/hevcdsp_idct_neon.S
+++ b/libavcodec/aarch64/hevcdsp_idct_neon.S
@@ -573,14 +573,13 @@ idct_16x16 10
// void ff_hevc_idct_NxN_dc_DEPTH_neon(int16_t *coeffs)
.macro idct_dc size, bitdepth
function ff_hevc_idct_\size\()x\size\()_dc_\bitdepth\()_neon, export=1
- movi v1.8h, #((1 << (14 - \bitdepth))+1)
ld1r {v4.8h}, [x0]
- add v4.8h, v4.8h, v1.8h
- sshr v0.8h, v4.8h, #(15 - \bitdepth)
- sshr v1.8h, v4.8h, #(15 - \bitdepth)
+ srshr v4.8h, v4.8h, #1
+ srshr v0.8h, v4.8h, #(14 - \bitdepth)
+ srshr v1.8h, v4.8h, #(14 - \bitdepth)
.if \size > 4
- sshr v2.8h, v4.8h, #(15 - \bitdepth)
- sshr v3.8h, v4.8h, #(15 - \bitdepth)
+ srshr v2.8h, v4.8h, #(14 - \bitdepth)
+ srshr v3.8h, v4.8h, #(14 - \bitdepth)
.if \size > 16 /* dc 32x32 */
mov x2, #4
1:
More information about the ffmpeg-cvslog
mailing list