[FFmpeg-devel] [PATCH 3/9] simple_idct10: improve precision

Christophe Gisquet christophe.gisquet at gmail.com
Sun Oct 11 16:06:07 CEST 2015


omse goes from 0.03060703 (which fails for dct-test) to 0.01663750.
---
 libavcodec/simple_idct.c                      |  9 ++++--
 libavcodec/simple_idct_template.c             | 45 ++++++++++++++++++---------
 tests/ref/fate/dnxhr-444                      |  2 +-
 tests/ref/vsynth/vsynth1-dnxhd-720p-10bit     |  2 +-
 tests/ref/vsynth/vsynth2-dnxhd-720p-10bit     |  2 +-
 tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit |  2 +-
 6 files changed, 42 insertions(+), 20 deletions(-)

diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index eeb6279..4d6d20d 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -36,6 +36,11 @@
 
 #define BIT_DEPTH 10
 #include "simple_idct_template.c"
+
+#define EXTRA_SHIFT  2
+#include "simple_idct_template.c"
+
+#undef EXTRA_SHIFT
 #undef BIT_DEPTH
 
 #define BIT_DEPTH 12
@@ -230,10 +235,10 @@ void ff_prores_idct(int16_t *block, const int16_t *qmat)
         block[i] *= qmat[i];
 
     for (i = 0; i < 8; i++)
-        idctRowCondDC_10(block + i*8, 2);
+        idctRowCondDC_extrashift_10(block + i*8, 2);
 
     for (i = 0; i < 8; i++) {
         block[i] += 8192;
-        idctSparseCol_10(block + i);
+        idctSparseCol_extrashift_10(block + i);
     }
 }
diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c
index 789db8d..0585679 100644
--- a/libavcodec/simple_idct_template.c
+++ b/libavcodec/simple_idct_template.c
@@ -66,19 +66,26 @@
 
 #elif BIT_DEPTH == 10 || BIT_DEPTH == 12
 
-#if BIT_DEPTH == 10
-#define W1 (22725*4)  // 90901
-#define W2 (21407*4) //  85627
-#define W3 (19265*4) //  77062
-#define W4 (16384*4) //  65535
-#define W5 (12873*4) //  51491
-#define W6 ( 8867*4) //  35468
-#define W7 ( 4520*4) //  18081
-
-#define ROW_SHIFT 15
-#define COL_SHIFT 20
-#define DC_SHIFT 1
-#else
+# if BIT_DEPTH == 10
+#define W1 22725 // 90901
+#define W2 21407 //  85627
+#define W3 19265 //  77062
+#define W4 16384 //  65535
+#define W5 12873 //  51491
+#define W6  8867 //  35468
+#define W7  4520 //  18081
+
+#   ifdef EXTRA_SHIFT
+#define ROW_SHIFT 13
+#define COL_SHIFT 18
+#define DC_SHIFT  1
+#   else
+#define ROW_SHIFT 12
+#define COL_SHIFT 19
+#define DC_SHIFT  2
+#   endif
+
+# else
 #define W1 45451
 #define W2 42813
 #define W3 38531
@@ -90,7 +97,7 @@
 #define ROW_SHIFT 16
 #define COL_SHIFT 17
 #define DC_SHIFT -1
-#endif
+# endif
 
 #define MUL(a, b)    ((a) * (b))
 #define MAC(a, b, c) ((a) += (b) * (c))
@@ -101,7 +108,11 @@
 
 #endif
 
+#ifdef EXTRA_SHIFT
+static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
+#else
 static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
+#endif
 {
     int a0, a1, a2, a3, b0, b1, b2, b3;
 
@@ -236,6 +247,9 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
         }                                               \
     } while (0)
 
+#ifdef EXTRA_SHIFT
+static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
+#else
 static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
                                           int16_t *col)
 {
@@ -285,6 +299,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
 }
 
 static inline void FUNC(idctSparseCol)(int16_t *col)
+#endif
 {
     int a0, a1, a2, a3, b0, b1, b2, b3;
 
@@ -300,6 +315,7 @@ static inline void FUNC(idctSparseCol)(int16_t *col)
     col[56] = ((a0 - b0) >> COL_SHIFT);
 }
 
+#ifndef EXTRA_SHIFT
 void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block)
 {
     pixel *dest = (pixel *)dest_;
@@ -338,3 +354,4 @@ void FUNC(ff_simple_idct)(int16_t *block)
     for (i = 0; i < 8; i++)
         FUNC(idctSparseCol)(block + i);
 }
+#endif
diff --git a/tests/ref/fate/dnxhr-444 b/tests/ref/fate/dnxhr-444
index 743067d..f9e73c3 100644
--- a/tests/ref/fate/dnxhr-444
+++ b/tests/ref/fate/dnxhr-444
@@ -1,2 +1,2 @@
 #tb 0: 1/24
-0,          0,          0,        1,  9665280, 0x238a023e
+0,          0,          0,        1,  9665280, 0x19ef4057
diff --git a/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit b/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit
index ab58807..dc808f3 100644
--- a/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit
+++ b/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit
@@ -1,4 +1,4 @@
 f8c4b7aa165a80df2485d526161290a3 *tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd
 2293760 tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd
-3cc84f9e8d2e704475b410de27dd9951 *tests/data/fate/vsynth1-dnxhd-720p-10bit.out.rawvideo
+87f1f0e074466facd3a9922ecc8311db *tests/data/fate/vsynth1-dnxhd-720p-10bit.out.rawvideo
 stddev:    6.23 PSNR: 32.23 MAXDIFF:   64 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit b/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit
index 5c21985..0d2068d 100644
--- a/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit
+++ b/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit
@@ -1,4 +1,4 @@
 e49cb87f69acc809aee55d64990c84a9 *tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd
 2293760 tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd
-a98c4b69d4d036089a455e147d6922a7 *tests/data/fate/vsynth2-dnxhd-720p-10bit.out.rawvideo
+1e6e1ef90e5c9b16a80acc17fde596ff *tests/data/fate/vsynth2-dnxhd-720p-10bit.out.rawvideo
 stddev:    1.54 PSNR: 44.36 MAXDIFF:   31 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit b/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit
index 1dcadd8..b9c9e03 100644
--- a/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit
+++ b/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit
@@ -1,4 +1,4 @@
 e96fc4a7d994b9369c50da32fd325822 *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd
 2293760 tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd
-2b497215c57558910a605ff8c78430d9 *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.out.rawvideo
+0e9fcec94aeff70bac5dec02cf2391bc *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.out.rawvideo
 stddev:    1.33 PSNR: 45.61 MAXDIFF:   22 bytes:  7603200/   760320
-- 
2.6.0



More information about the ffmpeg-devel mailing list