[FFmpeg-devel] [PATCH v5] avcodec/jpeg2000: Fix FF_DWT97_INT to pass the conformance testing defined in ISO/IEC 15444-4
Osamu Watanabe
owatanab at es.takushoku-u.ac.jp
Thu Sep 12 11:51:58 EEST 2024
Fix for the integer version of the inverse 9-7 DWT processing
(FF_DWT97_INT, https://trac.ffmpeg.org/ticket/10123), which is activated with
`-flags +bitexact`.
I went through the code path for the DWT 9-7 transform (integer) and improved
precision to match conformance codestream.
As a result, the encoded codestream size is slightly larger for a given Q value.
For example, `-flags +bitexact -i lena.pnm -q: 20 -format j2k -y tmp.j2c`
gives 13K (HEAD) and 19K (with this patch).
This commit also updates the source and reference files for affected FATE tests.
Signed-off-by: Osamu Watanabe <owatanab at es.takushoku-u.ac.jp>
---
libavcodec/jpeg2000.c | 6 ++--
libavcodec/jpeg2000dec.c | 2 +-
libavcodec/jpeg2000dwt.c | 43 ++++++++++++++----------
libavcodec/tests/jpeg2000dwt.c | 5 +++
tests/ref/fate/j2k-dwt | 40 +++++++++++-----------
tests/ref/fate/jpeg2000-dcinema | 4 +--
tests/ref/fate/jpeg2000dec-p0_04 | 2 +-
tests/ref/fate/jpeg2000dec-p0_05 | 2 +-
tests/ref/fate/jpeg2000dec-p0_09 | 2 +-
tests/ref/vsynth/vsynth1-jpeg2000-97 | 8 ++---
tests/ref/vsynth/vsynth2-jpeg2000-97 | 8 ++---
tests/ref/vsynth/vsynth3-jpeg2000-97 | 8 ++---
tests/ref/vsynth/vsynth_lena-jpeg2000-97 | 8 ++---
13 files changed, 74 insertions(+), 64 deletions(-)
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index d6ffb02319..f1a7d55ae1 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -260,9 +260,7 @@ static void init_band_stepsize(AVCodecContext *avctx,
band->f_stepsize *= F_LFTG_X * F_LFTG_X * 4;
break;
}
- if (codsty->transform == FF_DWT97) {
- band->f_stepsize *= pow(F_LFTG_K, 2*(codsty->nreslevels2decode - reslevelno) + lband - 2);
- }
+ band->f_stepsize *= pow(F_LFTG_K, 2*(codsty->nreslevels2decode - reslevelno) + lband - 2);
}
if (band->f_stepsize > (INT_MAX >> 15)) {
@@ -270,7 +268,7 @@ static void init_band_stepsize(AVCodecContext *avctx,
av_log(avctx, AV_LOG_ERROR, "stepsize out of range\n");
}
- band->i_stepsize = band->f_stepsize * (1 << 15);
+ band->i_stepsize = lrint(band->f_stepsize * (1 << 15) + 0.5f);
/* FIXME: In OpenJPEG code stepsize = stepsize * 0.5. Why?
* If not set output of entropic decoder is not correct. */
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 2e09b279dc..f68e41ed6a 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -2136,7 +2136,7 @@ static void dequantization_int_97(int x, int y, Jpeg2000Cblk *cblk,
int32_t *datap = &comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * (y + j) + x];
int *src = t1->data + j*t1->stride;
for (i = 0; i < w; ++i)
- datap[i] = (src[i] * (int64_t)band->i_stepsize + (1<<15)) >> 16;
+ datap[i] = (int32_t)(src[i] * (int64_t)band->i_stepsize + (1 << 14)) >> 15;
}
}
diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
index 34e33553f7..d24f15d39b 100644
--- a/libavcodec/jpeg2000dwt.c
+++ b/libavcodec/jpeg2000dwt.c
@@ -39,12 +39,12 @@
/* Lifting parameters in integer format.
* Computed as param = (float param) * (1 << 16) */
-#define I_LFTG_ALPHA 103949ll
-#define I_LFTG_BETA 3472ll
-#define I_LFTG_GAMMA 57862ll
-#define I_LFTG_DELTA 29066ll
-#define I_LFTG_K 80621ll
-#define I_LFTG_X 53274ll
+#define I_LFTG_ALPHA_PRIME 38413ll // = 103949 - 65536, (= alpha - 1.0)
+#define I_LFTG_BETA 3472ll
+#define I_LFTG_GAMMA 57862ll
+#define I_LFTG_DELTA 29066ll
+#define I_LFTG_K 80621ll
+#define I_LFTG_X 53274ll
#define I_PRESHIFT 8
static inline void extend53(int *p, int i0, int i1)
@@ -234,8 +234,11 @@ static void sd_1d97_int(int *p, int i0, int i1)
extend97_int(p, i0, i1);
i0++; i1++;
- for (i = (i0>>1) - 2; i < (i1>>1) + 1; i++)
- p[2 * i + 1] -= (I_LFTG_ALPHA * (p[2 * i] + p[2 * i + 2]) + (1 << 15)) >> 16;
+ for (i = (i0>>1) - 2; i < (i1>>1) + 1; i++) {
+ const int64_t sum = p[2 * i] + p[2 * i + 2];
+ p[2 * i + 1] -= sum;
+ p[2 * i + 1] -= (I_LFTG_ALPHA_PRIME * sum + (1 << 15)) >> 16;
+ }
for (i = (i0>>1) - 1; i < (i1>>1) + 1; i++)
p[2 * i] -= (I_LFTG_BETA * (p[2 * i - 1] + p[2 * i + 1]) + (1 << 15)) >> 16;
for (i = (i0>>1) - 1; i < (i1>>1); i++)
@@ -276,7 +279,7 @@ static void dwt_encode97_int(DWTContext *s, int *t)
// copy back and deinterleave
for (i = mv; i < lv; i+=2, j++)
- t[w*j + lp] = ((l[i] * I_LFTG_X) + (1 << 15)) >> 16;
+ t[w*j + lp] = l[i];
for (i = 1-mv; i < lv; i+=2, j++)
t[w*j + lp] = l[i];
}
@@ -293,7 +296,7 @@ static void dwt_encode97_int(DWTContext *s, int *t)
// copy back and deinterleave
for (i = mh; i < lh; i+=2, j++)
- t[w*lp + j] = ((l[i] * I_LFTG_X) + (1 << 15)) >> 16;
+ t[w*lp + j] = l[i];
for (i = 1-mh; i < lh; i+=2, j++)
t[w*lp + j] = l[i];
}
@@ -301,7 +304,7 @@ static void dwt_encode97_int(DWTContext *s, int *t)
}
for (i = 0; i < w * h; i++)
- t[i] = (t[i] + ((1<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
+ t[i] = (t[i] + ((1<<(I_PRESHIFT))>>1)) >> (I_PRESHIFT);
}
static void sr_1d53(unsigned *p, int i0, int i1)
@@ -471,8 +474,11 @@ static void sr_1d97_int(int32_t *p, int i0, int i1)
for (i = (i0 >> 1); i < (i1 >> 1) + 1; i++)
p[2 * i] += (I_LFTG_BETA * (p[2 * i - 1] + (int64_t)p[2 * i + 1]) + (1 << 15)) >> 16;
/* step 6 */
- for (i = (i0 >> 1); i < (i1 >> 1); i++)
- p[2 * i + 1] += (I_LFTG_ALPHA * (p[2 * i] + (int64_t)p[2 * i + 2]) + (1 << 15)) >> 16;
+ for (i = (i0 >> 1); i < (i1 >> 1); i++) {
+ const int64_t sum = p[2 * i] + (int64_t) p[2 * i + 2];
+ p[2 * i + 1] += sum;
+ p[2 * i + 1] += (I_LFTG_ALPHA_PRIME * sum + (1 << 15)) >> 16;
+ }
}
static void dwt_decode97_int(DWTContext *s, int32_t *t)
@@ -500,9 +506,9 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
l = line + mh;
for (lp = 0; lp < lv; lp++) {
int i, j = 0;
- // rescale with interleaving
+ // interleaving
for (i = mh; i < lh; i += 2, j++)
- l[i] = ((data[w * lp + j] * I_LFTG_K) + (1 << 15)) >> 16;
+ l[i] = data[w * lp + j];
for (i = 1 - mh; i < lh; i += 2, j++)
l[i] = data[w * lp + j];
@@ -516,9 +522,9 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
l = line + mv;
for (lp = 0; lp < lh; lp++) {
int i, j = 0;
- // rescale with interleaving
+ // interleaving
for (i = mv; i < lv; i += 2, j++)
- l[i] = ((data[w * j + lp] * I_LFTG_K) + (1 << 15)) >> 16;
+ l[i] = data[w * j + lp];
for (i = 1 - mv; i < lv; i += 2, j++)
l[i] = data[w * j + lp];
@@ -530,7 +536,8 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
}
for (i = 0; i < w * h; i++)
- data[i] = (data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
+ // We shift down by `I_PRESHIFT + 1` because the input coefficients `datap[]` were shifted down to 1 bit above from the binary point.
+ data[i] = (int32_t)(data[i] + ((1LL<<(I_PRESHIFT + 1))>>1)) >> (I_PRESHIFT + 1);
}
int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
diff --git a/libavcodec/tests/jpeg2000dwt.c b/libavcodec/tests/jpeg2000dwt.c
index 520ecc05a3..c4aa34282c 100644
--- a/libavcodec/tests/jpeg2000dwt.c
+++ b/libavcodec/tests/jpeg2000dwt.c
@@ -46,6 +46,11 @@ static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, i
fprintf(stderr, "ff_dwt_encode failed\n");
return 1;
}
+ if (type == FF_DWT97_INT) {
+ // pre-scaling to simulate dequantization which places the binary point at 1 bit above from LSB
+ for (j = 0; j< s->linelen[decomp_levels-1][0] * s->linelen[decomp_levels-1][1]; j++)
+ array[j] <<= 1;
+ }
ret = ff_dwt_decode(s, array);
if (ret < 0) {
fprintf(stderr, "ff_dwt_encode failed\n");
diff --git a/tests/ref/fate/j2k-dwt b/tests/ref/fate/j2k-dwt
index 42415f00f9..5c40a2f1f5 100644
--- a/tests/ref/fate/j2k-dwt
+++ b/tests/ref/fate/j2k-dwt
@@ -1,60 +1,60 @@
5/3i, decomp:15 border 151 170 140 183 milli-err2: 0
-9/7i, decomp:15 border 151 170 140 183 milli-err2: 544
+9/7i, decomp:15 border 151 170 140 183 milli-err2: 110
9/7f, decomp:15 border 151 170 140 183 err2: 0.000
5/3i, decomp:21 border 173 201 81 189 milli-err2: 0
-9/7i, decomp:21 border 173 201 81 189 milli-err2: 592
+9/7i, decomp:21 border 173 201 81 189 milli-err2: 109
9/7f, decomp:21 border 173 201 81 189 err2: 0.000
5/3i, decomp:22 border 213 227 76 245 milli-err2: 0
-9/7i, decomp:22 border 213 227 76 245 milli-err2: 533
+9/7i, decomp:22 border 213 227 76 245 milli-err2: 116
9/7f, decomp:22 border 213 227 76 245 err2: 0.000
5/3i, decomp:13 border 134 157 184 203 milli-err2: 0
-9/7i, decomp:13 border 134 157 184 203 milli-err2: 535
+9/7i, decomp:13 border 134 157 184 203 milli-err2: 107
9/7f, decomp:13 border 134 157 184 203 err2: 0.000
5/3i, decomp: 1 border 204 237 6 106 milli-err2: 0
-9/7i, decomp: 1 border 204 237 6 106 milli-err2: 219
+9/7i, decomp: 1 border 204 237 6 106 milli-err2: 96
9/7f, decomp: 1 border 204 237 6 106 err2: 0.000
5/3i, decomp:28 border 76 211 13 210 milli-err2: 0
-9/7i, decomp:28 border 76 211 13 210 milli-err2: 791
+9/7i, decomp:28 border 76 211 13 210 milli-err2: 118
9/7f, decomp:28 border 76 211 13 210 err2: 0.000
5/3i, decomp:21 border 76 99 43 123 milli-err2: 0
-9/7i, decomp:21 border 76 99 43 123 milli-err2: 686
+9/7i, decomp:21 border 76 99 43 123 milli-err2: 106
9/7f, decomp:21 border 76 99 43 123 err2: 0.000
5/3i, decomp:15 border 192 243 174 204 milli-err2: 0
-9/7i, decomp:15 border 192 243 174 204 milli-err2: 476
+9/7i, decomp:15 border 192 243 174 204 milli-err2: 122
9/7f, decomp:15 border 192 243 174 204 err2: 0.000
5/3i, decomp:21 border 17 68 93 204 milli-err2: 0
-9/7i, decomp:21 border 17 68 93 204 milli-err2: 633
+9/7i, decomp:21 border 17 68 93 204 milli-err2: 125
9/7f, decomp:21 border 17 68 93 204 err2: 0.000
5/3i, decomp:11 border 142 168 82 174 milli-err2: 0
-9/7i, decomp:11 border 142 168 82 174 milli-err2: 696
+9/7i, decomp:11 border 142 168 82 174 milli-err2: 114
9/7f, decomp:11 border 142 168 82 174 err2: 0.000
5/3i, decomp:23 border 142 209 171 235 milli-err2: 0
-9/7i, decomp:23 border 142 209 171 235 milli-err2: 626
+9/7i, decomp:23 border 142 209 171 235 milli-err2: 120
9/7f, decomp:23 border 142 209 171 235 err2: 0.000
5/3i, decomp:30 border 37 185 79 245 milli-err2: 0
-9/7i, decomp:30 border 37 185 79 245 milli-err2: 953
+9/7i, decomp:30 border 37 185 79 245 milli-err2: 115
9/7f, decomp:30 border 37 185 79 245 err2: 0.000
5/3i, decomp: 5 border 129 236 30 243 milli-err2: 0
-9/7i, decomp: 5 border 129 236 30 243 milli-err2: 620
+9/7i, decomp: 5 border 129 236 30 243 milli-err2: 117
9/7f, decomp: 5 border 129 236 30 243 err2: 0.000
5/3i, decomp:10 border 5 160 146 247 milli-err2: 0
-9/7i, decomp:10 border 5 160 146 247 milli-err2: 797
+9/7i, decomp:10 border 5 160 146 247 milli-err2: 117
9/7f, decomp:10 border 5 160 146 247 err2: 0.000
5/3i, decomp: 5 border 104 162 6 47 milli-err2: 0
-9/7i, decomp: 5 border 104 162 6 47 milli-err2: 603
+9/7i, decomp: 5 border 104 162 6 47 milli-err2: 119
9/7f, decomp: 5 border 104 162 6 47 err2: 0.000
5/3i, decomp:24 border 78 250 102 218 milli-err2: 0
-9/7i, decomp:24 border 78 250 102 218 milli-err2: 836
+9/7i, decomp:24 border 78 250 102 218 milli-err2: 113
9/7f, decomp:24 border 78 250 102 218 err2: 0.000
5/3i, decomp:28 border 86 98 56 79 milli-err2: 0
-9/7i, decomp:28 border 86 98 56 79 milli-err2: 597
+9/7i, decomp:28 border 86 98 56 79 milli-err2: 115
9/7f, decomp:28 border 86 98 56 79 err2: 0.000
5/3i, decomp: 6 border 95 238 197 214 milli-err2: 0
-9/7i, decomp: 6 border 95 238 197 214 milli-err2: 478
+9/7i, decomp: 6 border 95 238 197 214 milli-err2: 115
9/7f, decomp: 6 border 95 238 197 214 err2: 0.000
5/3i, decomp:17 border 77 169 93 165 milli-err2: 0
-9/7i, decomp:17 border 77 169 93 165 milli-err2: 616
+9/7i, decomp:17 border 77 169 93 165 milli-err2: 123
9/7f, decomp:17 border 77 169 93 165 err2: 0.000
5/3i, decomp:22 border 178 187 7 119 milli-err2: 0
-9/7i, decomp:22 border 178 187 7 119 milli-err2: 392
+9/7i, decomp:22 border 178 187 7 119 milli-err2: 95
9/7f, decomp:22 border 178 187 7 119 err2: 0.000
diff --git a/tests/ref/fate/jpeg2000-dcinema b/tests/ref/fate/jpeg2000-dcinema
index cdf8cd4fc6..217b8c8377 100644
--- a/tests/ref/fate/jpeg2000-dcinema
+++ b/tests/ref/fate/jpeg2000-dcinema
@@ -3,5 +3,5 @@
#codec_id 0: rawvideo
#dimensions 0: 1920x1080
#sar 0: 1/1
-0, 0, 0, 1, 12441600, 0xfcf6a127
-0, 1, 1, 1, 12441600, 0x577b6a64
+0, 0, 0, 1, 12441600, 0x9c79568e
+0, 1, 1, 1, 12441600, 0xd96342dd
diff --git a/tests/ref/fate/jpeg2000dec-p0_04 b/tests/ref/fate/jpeg2000dec-p0_04
index 5de7880c44..c293084a50 100644
--- a/tests/ref/fate/jpeg2000dec-p0_04
+++ b/tests/ref/fate/jpeg2000dec-p0_04
@@ -3,4 +3,4 @@
#codec_id 0: rawvideo
#dimensions 0: 640x480
#sar 0: 0/1
-0, 0, 0, 1, 921600, 0x097d9665
+0, 0, 0, 1, 921600, 0x8577ffee
diff --git a/tests/ref/fate/jpeg2000dec-p0_05 b/tests/ref/fate/jpeg2000dec-p0_05
index bb215043a1..bd5cc4b77a 100644
--- a/tests/ref/fate/jpeg2000dec-p0_05
+++ b/tests/ref/fate/jpeg2000dec-p0_05
@@ -3,4 +3,4 @@
#codec_id 0: rawvideo
#dimensions 0: 1024x1024
#sar 0: 0/1
-0, 0, 0, 1, 2621440, 0x081f5048
+0, 0, 0, 1, 2621440, 0x99604189
diff --git a/tests/ref/fate/jpeg2000dec-p0_09 b/tests/ref/fate/jpeg2000dec-p0_09
index 1755e7cc7d..ff78bf9dc7 100644
--- a/tests/ref/fate/jpeg2000dec-p0_09
+++ b/tests/ref/fate/jpeg2000dec-p0_09
@@ -3,4 +3,4 @@
#codec_id 0: rawvideo
#dimensions 0: 17x37
#sar 0: 0/1
-0, 0, 0, 1, 629, 0x5c9c389d
+0, 0, 0, 1, 629, 0xf35d38d6
diff --git a/tests/ref/vsynth/vsynth1-jpeg2000-97 b/tests/ref/vsynth/vsynth1-jpeg2000-97
index c979ab5c36..9c2f7c3fa3 100644
--- a/tests/ref/vsynth/vsynth1-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth1-jpeg2000-97
@@ -1,4 +1,4 @@
-5e6d32b7205d31245b0d1f015d08b515 *tests/data/fate/vsynth1-jpeg2000-97.avi
-3643886 tests/data/fate/vsynth1-jpeg2000-97.avi
-a2262f1da2f49bc196b780a6b47ec4e8 *tests/data/fate/vsynth1-jpeg2000-97.out.rawvideo
-stddev: 4.23 PSNR: 35.59 MAXDIFF: 53 bytes: 7603200/ 7603200
+8c1a0792a42c436fa503d85de52b02a8 *tests/data/fate/vsynth1-jpeg2000-97.avi
+4466600 tests/data/fate/vsynth1-jpeg2000-97.avi
+4f89d293fb5fbba785bb18188146f223 *tests/data/fate/vsynth1-jpeg2000-97.out.rawvideo
+stddev: 3.82 PSNR: 36.49 MAXDIFF: 49 bytes: 7603200/ 7603200
diff --git a/tests/ref/vsynth/vsynth2-jpeg2000-97 b/tests/ref/vsynth/vsynth2-jpeg2000-97
index 591f8b6bb3..87d15aeaa3 100644
--- a/tests/ref/vsynth/vsynth2-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth2-jpeg2000-97
@@ -1,4 +1,4 @@
-aa5573136c54b1855d8d00efe2a149bd *tests/data/fate/vsynth2-jpeg2000-97.avi
-2464134 tests/data/fate/vsynth2-jpeg2000-97.avi
-1f63c8b065e847e4c63d57ce23442ea8 *tests/data/fate/vsynth2-jpeg2000-97.out.rawvideo
-stddev: 3.21 PSNR: 37.99 MAXDIFF: 26 bytes: 7603200/ 7603200
+c08f075e29f51268b09e345ebf3e439b *tests/data/fate/vsynth2-jpeg2000-97.avi
+3225618 tests/data/fate/vsynth2-jpeg2000-97.avi
+75232789766a9c4d489d786263f67cea *tests/data/fate/vsynth2-jpeg2000-97.out.rawvideo
+stddev: 2.55 PSNR: 39.97 MAXDIFF: 22 bytes: 7603200/ 7603200
diff --git a/tests/ref/vsynth/vsynth3-jpeg2000-97 b/tests/ref/vsynth/vsynth3-jpeg2000-97
index 5d9d083791..e25a71b314 100644
--- a/tests/ref/vsynth/vsynth3-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth3-jpeg2000-97
@@ -1,4 +1,4 @@
-522e12684aca4262a9d613cb2db7006c *tests/data/fate/vsynth3-jpeg2000-97.avi
-85526 tests/data/fate/vsynth3-jpeg2000-97.avi
-8def36ad1413ab3a5c2af2e1af4603f9 *tests/data/fate/vsynth3-jpeg2000-97.out.rawvideo
-stddev: 4.51 PSNR: 35.04 MAXDIFF: 47 bytes: 86700/ 86700
+67d63aa91a9b06498d9a45b5df2fc8ef *tests/data/fate/vsynth3-jpeg2000-97.avi
+95634 tests/data/fate/vsynth3-jpeg2000-97.avi
+85aeb8676e40ae0f53aca76c9849598c *tests/data/fate/vsynth3-jpeg2000-97.out.rawvideo
+stddev: 4.11 PSNR: 35.84 MAXDIFF: 46 bytes: 86700/ 86700
diff --git a/tests/ref/vsynth/vsynth_lena-jpeg2000-97 b/tests/ref/vsynth/vsynth_lena-jpeg2000-97
index 0539300185..7d67ac2b6d 100644
--- a/tests/ref/vsynth/vsynth_lena-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth_lena-jpeg2000-97
@@ -1,4 +1,4 @@
-80fe872c8afaad914da6ef037957d93b *tests/data/fate/vsynth_lena-jpeg2000-97.avi
-1937216 tests/data/fate/vsynth_lena-jpeg2000-97.avi
-1b97333a8dc115a5ba609b0070d89d4d *tests/data/fate/vsynth_lena-jpeg2000-97.out.rawvideo
-stddev: 2.82 PSNR: 39.10 MAXDIFF: 24 bytes: 7603200/ 7603200
+ad51111c0127e8cd78c85fbdcf61f93e *tests/data/fate/vsynth_lena-jpeg2000-97.avi
+2599870 tests/data/fate/vsynth_lena-jpeg2000-97.avi
+8f2eed1d1415f7173c1149e47dd65517 *tests/data/fate/vsynth_lena-jpeg2000-97.out.rawvideo
+stddev: 2.23 PSNR: 41.15 MAXDIFF: 20 bytes: 7603200/ 7603200
--
2.43.0
More information about the ffmpeg-devel
mailing list