[FFmpeg-devel] [PATCH 1/1] lavu/riscv: count bytes rather than words for bswap32

Rémi Denis-Courmont remi at remlab.net
Sat Jul 27 15:51:58 EEST 2024


This removes the dependency on Zba at essentially zero cost.
---
 libavcodec/riscv/bswapdsp_init.c |  8 ++++----
 libavcodec/riscv/bswapdsp_rvb.S  |  3 ++-
 libavutil/riscv/bswap_rvb.S      | 10 +++++-----
 libswscale/riscv/rgb2rgb.c       |  2 +-
 libswscale/riscv/rgb2rgb_rvb.S   |  3 +--
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/libavcodec/riscv/bswapdsp_init.c b/libavcodec/riscv/bswapdsp_init.c
index d490c434e7..c13387c232 100644
--- a/libavcodec/riscv/bswapdsp_init.c
+++ b/libavcodec/riscv/bswapdsp_init.c
@@ -33,15 +33,15 @@ av_cold void ff_bswapdsp_init_riscv(BswapDSPContext *c)
 #if HAVE_RV
     int flags = av_get_cpu_flags();
 
-    if (flags & AV_CPU_FLAG_RVB_ADDR) {
 #if (__riscv_xlen >= 64)
-        if (flags & AV_CPU_FLAG_RVB_BASIC)
-            c->bswap_buf = ff_bswap32_buf_rvb;
+    if (flags & AV_CPU_FLAG_RVB_BASIC)
+        c->bswap_buf = ff_bswap32_buf_rvb;
 #endif
 #if HAVE_RVV
+    if (flags & AV_CPU_FLAG_RVB_ADDR) {
         if (flags & AV_CPU_FLAG_RVV_I32)
             c->bswap16_buf = ff_bswap16_buf_rvv;
-#endif
     }
 #endif
+#endif
 }
diff --git a/libavcodec/riscv/bswapdsp_rvb.S b/libavcodec/riscv/bswapdsp_rvb.S
index 52b6cd0d7b..8c7c791fe1 100644
--- a/libavcodec/riscv/bswapdsp_rvb.S
+++ b/libavcodec/riscv/bswapdsp_rvb.S
@@ -23,8 +23,9 @@
 #include "libavutil/riscv/bswap_rvb.S"
 
 #if (__riscv_xlen >= 64)
-func ff_bswap32_buf_rvb, zba, zbb
+func ff_bswap32_buf_rvb, zbb
         lpad    0
+        slli    a2, a2, 2
         bswap32_rvb a0, a1, a2
 endfunc
 #endif
diff --git a/libavutil/riscv/bswap_rvb.S b/libavutil/riscv/bswap_rvb.S
index 3ff53ccb56..bc07779c44 100644
--- a/libavutil/riscv/bswap_rvb.S
+++ b/libavutil/riscv/bswap_rvb.S
@@ -20,21 +20,21 @@
 
 #if (__riscv_xlen >= 64)
         .macro  bswap32_rvb out, in, count
-        andi    t0, \count, 4
+        andi    t0, \count, 1
         beqz    t0, 1f
         /* Align input to 64-bit */
         lwu     t0, (\in)
         addi    \out, \out, 4
         rev8    t0, t0
-        addi    \count, \count, -1
+        addi    \count, \count, -4
         srli    t0, t0, __riscv_xlen - 32
         addi    \in, \in, 4
         sw      t0, -4(\out)
 1:
-        andi    t3, \count, -2
-        sh2add  \count, \count, \out
+        andi    t3, \count, -8
+        add     \count, \count, \out
         beqz    t3, 3f
-        sh2add  t3, t3, \out
+        add     t3, t3, \out
 2:      /* 2 elements (64 bits) at a time on a 64-bit boundary */
         ld      t0,  (\in)
         addi    \out, \out, 8
diff --git a/libswscale/riscv/rgb2rgb.c b/libswscale/riscv/rgb2rgb.c
index e751e11075..fb527e2bbe 100644
--- a/libswscale/riscv/rgb2rgb.c
+++ b/libswscale/riscv/rgb2rgb.c
@@ -46,7 +46,7 @@ av_cold void rgb2rgb_init_riscv(void)
     int flags = av_get_cpu_flags();
 
 #if (__riscv_xlen == 64)
-    if ((flags & AV_CPU_FLAG_RVB_BASIC) && (flags & AV_CPU_FLAG_RVB_ADDR))
+    if (flags & AV_CPU_FLAG_RVB_BASIC)
         shuffle_bytes_3210 = ff_shuffle_bytes_3210_rvb;
 #endif
 #if HAVE_RVV
diff --git a/libswscale/riscv/rgb2rgb_rvb.S b/libswscale/riscv/rgb2rgb_rvb.S
index af9ce2d215..d441308249 100644
--- a/libswscale/riscv/rgb2rgb_rvb.S
+++ b/libswscale/riscv/rgb2rgb_rvb.S
@@ -23,9 +23,8 @@
 #include "libavutil/riscv/bswap_rvb.S"
 
 #if (__riscv_xlen >= 64)
-func ff_shuffle_bytes_3210_rvb, zba, zbb
+func ff_shuffle_bytes_3210_rvb, zbb
         lpad    0
-        srli    a2, a2, 2
         bswap32_rvb a1, a0, a2
 endfunc
 #endif
-- 
2.45.2



More information about the ffmpeg-devel mailing list