[FFmpeg-cvslog] aarch64: hevc_idct: Fix overflows in idct_dc
Martin Storsjö
git at videolan.org
Sat May 22 00:10:45 EEST 2021
ffmpeg | branch: master | Martin Storsjö <martin at martin.st> | Mon May 17 12:48:03 2021 +0300| [f27e3ccf06ee19935d160164ca4a02f28cfc2a27] | committer: Martin Storsjö
aarch64: hevc_idct: Fix overflows in idct_dc
This is marginally slower, but correct for all input values.
The previous implementation failed with certain input seeds, e.g.
"checkasm --test=hevc_idct 98".
Signed-off-by: Martin Storsjö <martin at martin.st>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f27e3ccf06ee19935d160164ca4a02f28cfc2a27
---
libavcodec/aarch64/hevcdsp_idct_neon.S | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/libavcodec/aarch64/hevcdsp_idct_neon.S b/libavcodec/aarch64/hevcdsp_idct_neon.S
index 28c11e632c..0869431294 100644
--- a/libavcodec/aarch64/hevcdsp_idct_neon.S
+++ b/libavcodec/aarch64/hevcdsp_idct_neon.S
@@ -573,14 +573,13 @@ idct_16x16 10
// void ff_hevc_idct_NxN_dc_DEPTH_neon(int16_t *coeffs)
.macro idct_dc size, bitdepth
function ff_hevc_idct_\size\()x\size\()_dc_\bitdepth\()_neon, export=1
- movi v1.8h, #((1 << (14 - \bitdepth))+1)
ld1r {v4.8h}, [x0]
- add v4.8h, v4.8h, v1.8h
- sshr v0.8h, v4.8h, #(15 - \bitdepth)
- sshr v1.8h, v4.8h, #(15 - \bitdepth)
+ srshr v4.8h, v4.8h, #1
+ srshr v0.8h, v4.8h, #(14 - \bitdepth)
+ srshr v1.8h, v4.8h, #(14 - \bitdepth)
.if \size > 4
- sshr v2.8h, v4.8h, #(15 - \bitdepth)
- sshr v3.8h, v4.8h, #(15 - \bitdepth)
+ srshr v2.8h, v4.8h, #(14 - \bitdepth)
+ srshr v3.8h, v4.8h, #(14 - \bitdepth)
.if \size > 16 /* dc 32x32 */
mov x2, #4
1:
More information about the ffmpeg-cvslog
mailing list