Fix for the integer version of the inverse 9-7 DWT processing (FF_DWT97_INT, https://trac.ffmpeg.org/ticket/10123), which is activated with `-flags +bitexact`.
I went through the code path for the DWT 9-7 transform (integer) and improved precision to match conformance codestream. As a result, the encoded codestream size is slightly larger for a given Q value. For example, `-flags +bitexact -i lena.pnm -q: 20 -format j2k -y tmp.j2c` gives 13K (HEAD) and 19K (with this patch). This commit also updates the source and reference files for affected FATE tests. Signed-off-by: Osamu Watanabe <owata...@es.takushoku-u.ac.jp> --- libavcodec/jpeg2000.c | 6 ++-- libavcodec/jpeg2000dec.c | 2 +- libavcodec/jpeg2000dwt.c | 43 ++++++++++++++---------- libavcodec/tests/jpeg2000dwt.c | 5 +++ tests/ref/fate/j2k-dwt | 40 +++++++++++----------- tests/ref/fate/jpeg2000-dcinema | 4 +-- tests/ref/fate/jpeg2000dec-p0_04 | 2 +- tests/ref/fate/jpeg2000dec-p0_05 | 2 +- tests/ref/fate/jpeg2000dec-p0_09 | 2 +- tests/ref/vsynth/vsynth1-jpeg2000-97 | 8 ++--- tests/ref/vsynth/vsynth2-jpeg2000-97 | 8 ++--- tests/ref/vsynth/vsynth3-jpeg2000-97 | 8 ++--- tests/ref/vsynth/vsynth_lena-jpeg2000-97 | 8 ++--- 13 files changed, 74 insertions(+), 64 deletions(-) diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c index d6ffb02319..f1a7d55ae1 100644 --- a/libavcodec/jpeg2000.c +++ b/libavcodec/jpeg2000.c @@ -260,9 +260,7 @@ static void init_band_stepsize(AVCodecContext *avctx, band->f_stepsize *= F_LFTG_X * F_LFTG_X * 4; break; } - if (codsty->transform == FF_DWT97) { - band->f_stepsize *= pow(F_LFTG_K, 2*(codsty->nreslevels2decode - reslevelno) + lband - 2); - } + band->f_stepsize *= pow(F_LFTG_K, 2*(codsty->nreslevels2decode - reslevelno) + lband - 2); } if (band->f_stepsize > (INT_MAX >> 15)) { @@ -270,7 +268,7 @@ static void init_band_stepsize(AVCodecContext *avctx, av_log(avctx, AV_LOG_ERROR, "stepsize out of range\n"); } - band->i_stepsize = band->f_stepsize * (1 << 15); + band->i_stepsize = lrint(band->f_stepsize * (1 << 15) + 0.5f); /* FIXME: In OpenJPEG code stepsize = stepsize * 0.5. Why? * If not set output of entropic decoder is not correct. */ diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index 2e09b279dc..f68e41ed6a 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -2136,7 +2136,7 @@ static void dequantization_int_97(int x, int y, Jpeg2000Cblk *cblk, int32_t *datap = &comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * (y + j) + x]; int *src = t1->data + j*t1->stride; for (i = 0; i < w; ++i) - datap[i] = (src[i] * (int64_t)band->i_stepsize + (1<<15)) >> 16; + datap[i] = (int32_t)(src[i] * (int64_t)band->i_stepsize + (1 << 14)) >> 15; } } diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c index 34e33553f7..d24f15d39b 100644 --- a/libavcodec/jpeg2000dwt.c +++ b/libavcodec/jpeg2000dwt.c @@ -39,12 +39,12 @@ /* Lifting parameters in integer format. * Computed as param = (float param) * (1 << 16) */ -#define I_LFTG_ALPHA 103949ll -#define I_LFTG_BETA 3472ll -#define I_LFTG_GAMMA 57862ll -#define I_LFTG_DELTA 29066ll -#define I_LFTG_K 80621ll -#define I_LFTG_X 53274ll +#define I_LFTG_ALPHA_PRIME 38413ll // = 103949 - 65536, (= alpha - 1.0) +#define I_LFTG_BETA 3472ll +#define I_LFTG_GAMMA 57862ll +#define I_LFTG_DELTA 29066ll +#define I_LFTG_K 80621ll +#define I_LFTG_X 53274ll #define I_PRESHIFT 8 static inline void extend53(int *p, int i0, int i1) @@ -234,8 +234,11 @@ static void sd_1d97_int(int *p, int i0, int i1) extend97_int(p, i0, i1); i0++; i1++; - for (i = (i0>>1) - 2; i < (i1>>1) + 1; i++) - p[2 * i + 1] -= (I_LFTG_ALPHA * (p[2 * i] + p[2 * i + 2]) + (1 << 15)) >> 16; + for (i = (i0>>1) - 2; i < (i1>>1) + 1; i++) { + const int64_t sum = p[2 * i] + p[2 * i + 2]; + p[2 * i + 1] -= sum; + p[2 * i + 1] -= (I_LFTG_ALPHA_PRIME * sum + (1 << 15)) >> 16; + } for (i = (i0>>1) - 1; i < (i1>>1) + 1; i++) p[2 * i] -= (I_LFTG_BETA * (p[2 * i - 1] + p[2 * i + 1]) + (1 << 15)) >> 16; for (i = (i0>>1) - 1; i < (i1>>1); i++) @@ -276,7 +279,7 @@ static void dwt_encode97_int(DWTContext *s, int *t) // copy back and deinterleave for (i = mv; i < lv; i+=2, j++) - t[w*j + lp] = ((l[i] * I_LFTG_X) + (1 << 15)) >> 16; + t[w*j + lp] = l[i]; for (i = 1-mv; i < lv; i+=2, j++) t[w*j + lp] = l[i]; } @@ -293,7 +296,7 @@ static void dwt_encode97_int(DWTContext *s, int *t) // copy back and deinterleave for (i = mh; i < lh; i+=2, j++) - t[w*lp + j] = ((l[i] * I_LFTG_X) + (1 << 15)) >> 16; + t[w*lp + j] = l[i]; for (i = 1-mh; i < lh; i+=2, j++) t[w*lp + j] = l[i]; } @@ -301,7 +304,7 @@ static void dwt_encode97_int(DWTContext *s, int *t) } for (i = 0; i < w * h; i++) - t[i] = (t[i] + ((1<<I_PRESHIFT)>>1)) >> I_PRESHIFT; + t[i] = (t[i] + ((1<<(I_PRESHIFT))>>1)) >> (I_PRESHIFT); } static void sr_1d53(unsigned *p, int i0, int i1) @@ -471,8 +474,11 @@ static void sr_1d97_int(int32_t *p, int i0, int i1) for (i = (i0 >> 1); i < (i1 >> 1) + 1; i++) p[2 * i] += (I_LFTG_BETA * (p[2 * i - 1] + (int64_t)p[2 * i + 1]) + (1 << 15)) >> 16; /* step 6 */ - for (i = (i0 >> 1); i < (i1 >> 1); i++) - p[2 * i + 1] += (I_LFTG_ALPHA * (p[2 * i] + (int64_t)p[2 * i + 2]) + (1 << 15)) >> 16; + for (i = (i0 >> 1); i < (i1 >> 1); i++) { + const int64_t sum = p[2 * i] + (int64_t) p[2 * i + 2]; + p[2 * i + 1] += sum; + p[2 * i + 1] += (I_LFTG_ALPHA_PRIME * sum + (1 << 15)) >> 16; + } } static void dwt_decode97_int(DWTContext *s, int32_t *t) @@ -500,9 +506,9 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t) l = line + mh; for (lp = 0; lp < lv; lp++) { int i, j = 0; - // rescale with interleaving + // interleaving for (i = mh; i < lh; i += 2, j++) - l[i] = ((data[w * lp + j] * I_LFTG_K) + (1 << 15)) >> 16; + l[i] = data[w * lp + j]; for (i = 1 - mh; i < lh; i += 2, j++) l[i] = data[w * lp + j]; @@ -516,9 +522,9 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t) l = line + mv; for (lp = 0; lp < lh; lp++) { int i, j = 0; - // rescale with interleaving + // interleaving for (i = mv; i < lv; i += 2, j++) - l[i] = ((data[w * j + lp] * I_LFTG_K) + (1 << 15)) >> 16; + l[i] = data[w * j + lp]; for (i = 1 - mv; i < lv; i += 2, j++) l[i] = data[w * j + lp]; @@ -530,7 +536,8 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t) } for (i = 0; i < w * h; i++) - data[i] = (data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT; + // We shift down by `I_PRESHIFT + 1` because the input coefficients `datap[]` were shifted down to 1 bit above from the binary point. + data[i] = (int32_t)(data[i] + ((1LL<<(I_PRESHIFT + 1))>>1)) >> (I_PRESHIFT + 1); } int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2], diff --git a/libavcodec/tests/jpeg2000dwt.c b/libavcodec/tests/jpeg2000dwt.c index 520ecc05a3..c4aa34282c 100644 --- a/libavcodec/tests/jpeg2000dwt.c +++ b/libavcodec/tests/jpeg2000dwt.c @@ -46,6 +46,11 @@ static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, i fprintf(stderr, "ff_dwt_encode failed\n"); return 1; } + if (type == FF_DWT97_INT) { + // pre-scaling to simulate dequantization which places the binary point at 1 bit above from LSB + for (j = 0; j< s->linelen[decomp_levels-1][0] * s->linelen[decomp_levels-1][1]; j++) + array[j] <<= 1; + } ret = ff_dwt_decode(s, array); if (ret < 0) { fprintf(stderr, "ff_dwt_encode failed\n"); diff --git a/tests/ref/fate/j2k-dwt b/tests/ref/fate/j2k-dwt index 42415f00f9..5c40a2f1f5 100644 --- a/tests/ref/fate/j2k-dwt +++ b/tests/ref/fate/j2k-dwt @@ -1,60 +1,60 @@ 5/3i, decomp:15 border 151 170 140 183 milli-err2: 0 -9/7i, decomp:15 border 151 170 140 183 milli-err2: 544 +9/7i, decomp:15 border 151 170 140 183 milli-err2: 110 9/7f, decomp:15 border 151 170 140 183 err2: 0.000 5/3i, decomp:21 border 173 201 81 189 milli-err2: 0 -9/7i, decomp:21 border 173 201 81 189 milli-err2: 592 +9/7i, decomp:21 border 173 201 81 189 milli-err2: 109 9/7f, decomp:21 border 173 201 81 189 err2: 0.000 5/3i, decomp:22 border 213 227 76 245 milli-err2: 0 -9/7i, decomp:22 border 213 227 76 245 milli-err2: 533 +9/7i, decomp:22 border 213 227 76 245 milli-err2: 116 9/7f, decomp:22 border 213 227 76 245 err2: 0.000 5/3i, decomp:13 border 134 157 184 203 milli-err2: 0 -9/7i, decomp:13 border 134 157 184 203 milli-err2: 535 +9/7i, decomp:13 border 134 157 184 203 milli-err2: 107 9/7f, decomp:13 border 134 157 184 203 err2: 0.000 5/3i, decomp: 1 border 204 237 6 106 milli-err2: 0 -9/7i, decomp: 1 border 204 237 6 106 milli-err2: 219 +9/7i, decomp: 1 border 204 237 6 106 milli-err2: 96 9/7f, decomp: 1 border 204 237 6 106 err2: 0.000 5/3i, decomp:28 border 76 211 13 210 milli-err2: 0 -9/7i, decomp:28 border 76 211 13 210 milli-err2: 791 +9/7i, decomp:28 border 76 211 13 210 milli-err2: 118 9/7f, decomp:28 border 76 211 13 210 err2: 0.000 5/3i, decomp:21 border 76 99 43 123 milli-err2: 0 -9/7i, decomp:21 border 76 99 43 123 milli-err2: 686 +9/7i, decomp:21 border 76 99 43 123 milli-err2: 106 9/7f, decomp:21 border 76 99 43 123 err2: 0.000 5/3i, decomp:15 border 192 243 174 204 milli-err2: 0 -9/7i, decomp:15 border 192 243 174 204 milli-err2: 476 +9/7i, decomp:15 border 192 243 174 204 milli-err2: 122 9/7f, decomp:15 border 192 243 174 204 err2: 0.000 5/3i, decomp:21 border 17 68 93 204 milli-err2: 0 -9/7i, decomp:21 border 17 68 93 204 milli-err2: 633 +9/7i, decomp:21 border 17 68 93 204 milli-err2: 125 9/7f, decomp:21 border 17 68 93 204 err2: 0.000 5/3i, decomp:11 border 142 168 82 174 milli-err2: 0 -9/7i, decomp:11 border 142 168 82 174 milli-err2: 696 +9/7i, decomp:11 border 142 168 82 174 milli-err2: 114 9/7f, decomp:11 border 142 168 82 174 err2: 0.000 5/3i, decomp:23 border 142 209 171 235 milli-err2: 0 -9/7i, decomp:23 border 142 209 171 235 milli-err2: 626 +9/7i, decomp:23 border 142 209 171 235 milli-err2: 120 9/7f, decomp:23 border 142 209 171 235 err2: 0.000 5/3i, decomp:30 border 37 185 79 245 milli-err2: 0 -9/7i, decomp:30 border 37 185 79 245 milli-err2: 953 +9/7i, decomp:30 border 37 185 79 245 milli-err2: 115 9/7f, decomp:30 border 37 185 79 245 err2: 0.000 5/3i, decomp: 5 border 129 236 30 243 milli-err2: 0 -9/7i, decomp: 5 border 129 236 30 243 milli-err2: 620 +9/7i, decomp: 5 border 129 236 30 243 milli-err2: 117 9/7f, decomp: 5 border 129 236 30 243 err2: 0.000 5/3i, decomp:10 border 5 160 146 247 milli-err2: 0 -9/7i, decomp:10 border 5 160 146 247 milli-err2: 797 +9/7i, decomp:10 border 5 160 146 247 milli-err2: 117 9/7f, decomp:10 border 5 160 146 247 err2: 0.000 5/3i, decomp: 5 border 104 162 6 47 milli-err2: 0 -9/7i, decomp: 5 border 104 162 6 47 milli-err2: 603 +9/7i, decomp: 5 border 104 162 6 47 milli-err2: 119 9/7f, decomp: 5 border 104 162 6 47 err2: 0.000 5/3i, decomp:24 border 78 250 102 218 milli-err2: 0 -9/7i, decomp:24 border 78 250 102 218 milli-err2: 836 +9/7i, decomp:24 border 78 250 102 218 milli-err2: 113 9/7f, decomp:24 border 78 250 102 218 err2: 0.000 5/3i, decomp:28 border 86 98 56 79 milli-err2: 0 -9/7i, decomp:28 border 86 98 56 79 milli-err2: 597 +9/7i, decomp:28 border 86 98 56 79 milli-err2: 115 9/7f, decomp:28 border 86 98 56 79 err2: 0.000 5/3i, decomp: 6 border 95 238 197 214 milli-err2: 0 -9/7i, decomp: 6 border 95 238 197 214 milli-err2: 478 +9/7i, decomp: 6 border 95 238 197 214 milli-err2: 115 9/7f, decomp: 6 border 95 238 197 214 err2: 0.000 5/3i, decomp:17 border 77 169 93 165 milli-err2: 0 -9/7i, decomp:17 border 77 169 93 165 milli-err2: 616 +9/7i, decomp:17 border 77 169 93 165 milli-err2: 123 9/7f, decomp:17 border 77 169 93 165 err2: 0.000 5/3i, decomp:22 border 178 187 7 119 milli-err2: 0 -9/7i, decomp:22 border 178 187 7 119 milli-err2: 392 +9/7i, decomp:22 border 178 187 7 119 milli-err2: 95 9/7f, decomp:22 border 178 187 7 119 err2: 0.000 diff --git a/tests/ref/fate/jpeg2000-dcinema b/tests/ref/fate/jpeg2000-dcinema index cdf8cd4fc6..217b8c8377 100644 --- a/tests/ref/fate/jpeg2000-dcinema +++ b/tests/ref/fate/jpeg2000-dcinema @@ -3,5 +3,5 @@ #codec_id 0: rawvideo #dimensions 0: 1920x1080 #sar 0: 1/1 -0, 0, 0, 1, 12441600, 0xfcf6a127 -0, 1, 1, 1, 12441600, 0x577b6a64 +0, 0, 0, 1, 12441600, 0x9c79568e +0, 1, 1, 1, 12441600, 0xd96342dd diff --git a/tests/ref/fate/jpeg2000dec-p0_04 b/tests/ref/fate/jpeg2000dec-p0_04 index 5de7880c44..c293084a50 100644 --- a/tests/ref/fate/jpeg2000dec-p0_04 +++ b/tests/ref/fate/jpeg2000dec-p0_04 @@ -3,4 +3,4 @@ #codec_id 0: rawvideo #dimensions 0: 640x480 #sar 0: 0/1 -0, 0, 0, 1, 921600, 0x097d9665 +0, 0, 0, 1, 921600, 0x8577ffee diff --git a/tests/ref/fate/jpeg2000dec-p0_05 b/tests/ref/fate/jpeg2000dec-p0_05 index bb215043a1..bd5cc4b77a 100644 --- a/tests/ref/fate/jpeg2000dec-p0_05 +++ b/tests/ref/fate/jpeg2000dec-p0_05 @@ -3,4 +3,4 @@ #codec_id 0: rawvideo #dimensions 0: 1024x1024 #sar 0: 0/1 -0, 0, 0, 1, 2621440, 0x081f5048 +0, 0, 0, 1, 2621440, 0x99604189 diff --git a/tests/ref/fate/jpeg2000dec-p0_09 b/tests/ref/fate/jpeg2000dec-p0_09 index 1755e7cc7d..ff78bf9dc7 100644 --- a/tests/ref/fate/jpeg2000dec-p0_09 +++ b/tests/ref/fate/jpeg2000dec-p0_09 @@ -3,4 +3,4 @@ #codec_id 0: rawvideo #dimensions 0: 17x37 #sar 0: 0/1 -0, 0, 0, 1, 629, 0x5c9c389d +0, 0, 0, 1, 629, 0xf35d38d6 diff --git a/tests/ref/vsynth/vsynth1-jpeg2000-97 b/tests/ref/vsynth/vsynth1-jpeg2000-97 index c979ab5c36..9c2f7c3fa3 100644 --- a/tests/ref/vsynth/vsynth1-jpeg2000-97 +++ b/tests/ref/vsynth/vsynth1-jpeg2000-97 @@ -1,4 +1,4 @@ -5e6d32b7205d31245b0d1f015d08b515 *tests/data/fate/vsynth1-jpeg2000-97.avi -3643886 tests/data/fate/vsynth1-jpeg2000-97.avi -a2262f1da2f49bc196b780a6b47ec4e8 *tests/data/fate/vsynth1-jpeg2000-97.out.rawvideo -stddev: 4.23 PSNR: 35.59 MAXDIFF: 53 bytes: 7603200/ 7603200 +8c1a0792a42c436fa503d85de52b02a8 *tests/data/fate/vsynth1-jpeg2000-97.avi +4466600 tests/data/fate/vsynth1-jpeg2000-97.avi +4f89d293fb5fbba785bb18188146f223 *tests/data/fate/vsynth1-jpeg2000-97.out.rawvideo +stddev: 3.82 PSNR: 36.49 MAXDIFF: 49 bytes: 7603200/ 7603200 diff --git a/tests/ref/vsynth/vsynth2-jpeg2000-97 b/tests/ref/vsynth/vsynth2-jpeg2000-97 index 591f8b6bb3..87d15aeaa3 100644 --- a/tests/ref/vsynth/vsynth2-jpeg2000-97 +++ b/tests/ref/vsynth/vsynth2-jpeg2000-97 @@ -1,4 +1,4 @@ -aa5573136c54b1855d8d00efe2a149bd *tests/data/fate/vsynth2-jpeg2000-97.avi -2464134 tests/data/fate/vsynth2-jpeg2000-97.avi -1f63c8b065e847e4c63d57ce23442ea8 *tests/data/fate/vsynth2-jpeg2000-97.out.rawvideo -stddev: 3.21 PSNR: 37.99 MAXDIFF: 26 bytes: 7603200/ 7603200 +c08f075e29f51268b09e345ebf3e439b *tests/data/fate/vsynth2-jpeg2000-97.avi +3225618 tests/data/fate/vsynth2-jpeg2000-97.avi +75232789766a9c4d489d786263f67cea *tests/data/fate/vsynth2-jpeg2000-97.out.rawvideo +stddev: 2.55 PSNR: 39.97 MAXDIFF: 22 bytes: 7603200/ 7603200 diff --git a/tests/ref/vsynth/vsynth3-jpeg2000-97 b/tests/ref/vsynth/vsynth3-jpeg2000-97 index 5d9d083791..e25a71b314 100644 --- a/tests/ref/vsynth/vsynth3-jpeg2000-97 +++ b/tests/ref/vsynth/vsynth3-jpeg2000-97 @@ -1,4 +1,4 @@ -522e12684aca4262a9d613cb2db7006c *tests/data/fate/vsynth3-jpeg2000-97.avi -85526 tests/data/fate/vsynth3-jpeg2000-97.avi -8def36ad1413ab3a5c2af2e1af4603f9 *tests/data/fate/vsynth3-jpeg2000-97.out.rawvideo -stddev: 4.51 PSNR: 35.04 MAXDIFF: 47 bytes: 86700/ 86700 +67d63aa91a9b06498d9a45b5df2fc8ef *tests/data/fate/vsynth3-jpeg2000-97.avi +95634 tests/data/fate/vsynth3-jpeg2000-97.avi +85aeb8676e40ae0f53aca76c9849598c *tests/data/fate/vsynth3-jpeg2000-97.out.rawvideo +stddev: 4.11 PSNR: 35.84 MAXDIFF: 46 bytes: 86700/ 86700 diff --git a/tests/ref/vsynth/vsynth_lena-jpeg2000-97 b/tests/ref/vsynth/vsynth_lena-jpeg2000-97 index 0539300185..7d67ac2b6d 100644 --- a/tests/ref/vsynth/vsynth_lena-jpeg2000-97 +++ b/tests/ref/vsynth/vsynth_lena-jpeg2000-97 @@ -1,4 +1,4 @@ -80fe872c8afaad914da6ef037957d93b *tests/data/fate/vsynth_lena-jpeg2000-97.avi -1937216 tests/data/fate/vsynth_lena-jpeg2000-97.avi -1b97333a8dc115a5ba609b0070d89d4d *tests/data/fate/vsynth_lena-jpeg2000-97.out.rawvideo -stddev: 2.82 PSNR: 39.10 MAXDIFF: 24 bytes: 7603200/ 7603200 +ad51111c0127e8cd78c85fbdcf61f93e *tests/data/fate/vsynth_lena-jpeg2000-97.avi +2599870 tests/data/fate/vsynth_lena-jpeg2000-97.avi +8f2eed1d1415f7173c1149e47dd65517 *tests/data/fate/vsynth_lena-jpeg2000-97.out.rawvideo +stddev: 2.23 PSNR: 41.15 MAXDIFF: 20 bytes: 7603200/ 7603200 -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".