[FFmpeg-cvslog] lavc/vc1dsp: match C block layout in inv_trans_4x8_rvv

Rémi Denis-Courmont git at videolan.org
Tue Jun 11 17:15:21 EEST 2024


ffmpeg | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Mon Jun 10 20:29:56 2024 +0300| [b6f37ffba71fa26b6176eb964cadcb442a115a54] | committer: Rémi Denis-Courmont

lavc/vc1dsp: match C block layout in inv_trans_4x8_rvv

Although checkasm does not verify this, the decoder requires that the
transform updates the input block exactly like the C code does.

This fixes vc1-ism, vc1_ilaced_twomv, vc1_sa00040, vc1_sa10091,
vc1_sa10143, vc1_sa20021, vc1test_smm0005 and wmv3-drm-dec tests.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b6f37ffba71fa26b6176eb964cadcb442a115a54
---

 libavcodec/riscv/vc1dsp_rvv.S | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S
index c4517d54f5..860b0cc5b1 100644
--- a/libavcodec/riscv/vc1dsp_rvv.S
+++ b/libavcodec/riscv/vc1dsp_rvv.S
@@ -303,15 +303,24 @@ func ff_vc1_inv_trans_4x8_rvv, zve32x
         vlsseg4e16.v v0, (a2), a3
         li           t1, 3
         jal          t0, ff_vc1_inv_trans_4_rvv
+        vssseg4e16.v v0, (a2), a3
+        vsetivli     zero, 4, e16, mf2, ta, ma
         addi         t1, a2, 1 * 8 * 2
-        vse16.v      v0, (a2)
+        vle16.v      v0, (a2)
         addi         t2, a2, 2 * 8 * 2
-        vse16.v      v1, (t1)
+        vle16.v      v1, (t1)
         addi         t3, a2, 3 * 8 * 2
-        vse16.v      v2, (t2)
-        vse16.v      v3, (t3)
-        vsetivli     zero, 4, e16, mf2, ta, ma
-        vlseg8e16.v  v0, (a2)
+        vle16.v      v2, (t2)
+        addi         t4, a2, 4 * 8 * 2
+        vle16.v      v3, (t3)
+        addi         t5, a2, 5 * 8 * 2
+        vle16.v      v4, (t4)
+        addi         t6, a2, 6 * 8 * 2
+        vle16.v      v5, (t5)
+        addi         t1, a2, 7 * 8 * 2
+        vle16.v      v6, (t6)
+        vle16.v      v7, (t1)
+
         jal          t0, ff_vc1_inv_trans_8_rvv
         vadd.vi      v4, v4, 1
         add          t0, a1, a0



More information about the ffmpeg-cvslog mailing list