[FFmpeg-cvslog] checkasm: Test more h264 idct variants

Martin Storsjö git at videolan.org
Sat Nov 11 20:22:52 EET 2017


ffmpeg | branch: master | Martin Storsjö <martin at martin.st> | Tue Aug 29 23:23:12 2017 +0300| [516c479172755c63063180b0c0953b68b670cdbd] | committer: Martin Storsjö

checkasm: Test more h264 idct variants

Signed-off-by: Martin Storsjö <martin at martin.st>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=516c479172755c63063180b0c0953b68b670cdbd
---

 tests/checkasm/h264dsp.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 89 insertions(+), 1 deletion(-)

diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c
index c9ddd52a7f..f355a72a74 100644
--- a/tests/checkasm/h264dsp.c
+++ b/tests/checkasm/h264dsp.c
@@ -22,6 +22,7 @@
 #include "checkasm.h"
 #include "libavcodec/avcodec.h"
 #include "libavcodec/h264dsp.h"
+#include "libavcodec/h264data.h"
 #include "libavutil/common.h"
 #include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
@@ -223,10 +224,97 @@ static void check_idct(void)
             }
         }
     }
-    report("idct");
+}
+
+static void check_idct_multiple(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, dst_full,  [16 * 16 * 2]);
+    LOCAL_ALIGNED_16(int16_t, coef_full, [16 * 16 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst0,  [16 * 16 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst1,  [16 * 16 * 2]);
+    LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]);
+    LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, nnzc,  [15 * 8]);
+    H264DSPContext h;
+    int bit_depth, i, y, func;
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]);
+
+    for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+        ff_h264dsp_init(&h, bit_depth, 1);
+        for (func = 0; func < 3; func++) {
+            void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL;
+            const char *name;
+            int sz = 4, intra = 0;
+            int block_offset[16] = { 0 };
+            switch (func) {
+            case 0:
+                idct = h.h264_idct_add16;
+                name = "h264_idct_add16";
+                break;
+            case 1:
+                idct = h.h264_idct_add16intra;
+                name = "h264_idct_add16intra";
+                intra = 1;
+                break;
+            case 2:
+                idct = h.h264_idct8_add4;
+                name = "h264_idct8_add4";
+                sz = 8;
+                break;
+            }
+            memset(nnzc, 0, 15 * 8);
+            memset(coef_full, 0, 16 * 16 * SIZEOF_COEF);
+            for (i = 0; i < 16 * 16; i += sz * sz) {
+                uint8_t src[8 * 8 * 2];
+                uint8_t dst[8 * 8 * 2];
+                int16_t coef[8 * 8 * 2];
+                int index = i / sz;
+                int block_y = (index / 16) * sz;
+                int block_x = index % 16;
+                int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL;
+                int nnz = rnd() % 3;
+
+                randomize_buffers();
+                if (sz == 4)
+                    dct4x4(coef, bit_depth);
+                else
+                    dct8x8(coef, bit_depth);
+
+                for (y = 0; y < sz; y++)
+                    memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL],
+                           &dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL);
+
+                if (nnz > 1)
+                    nnz = sz * sz;
+                memcpy(&coef_full[i * SIZEOF_COEF/sizeof(coef[0])],
+                       coef, nnz * SIZEOF_COEF);
+
+                if (intra && nnz == 1)
+                    nnz = 0;
+
+                nnzc[scan8[i / 16]] = nnz;
+                block_offset[i / 16] = offset;
+            }
+
+            if (check_func(idct, "%s_%dbpp", name, bit_depth)) {
+                memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF);
+                memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF);
+                memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL);
+                memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL);
+                call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc);
+                call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
+                if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) ||
+                    memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF))
+                    fail();
+                bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
+            }
+        }
+    }
 }
 
 void checkasm_check_h264dsp(void)
 {
     check_idct();
+    check_idct_multiple();
+    report("idct");
 }



More information about the ffmpeg-cvslog mailing list