Hi, Thanks for the feedback. >This is an infinite loop that can hang the decoder. Will try to get the upstream to accept a fix.
>overflows in the entropy decoding system The assertions should be able to catch them, so they should be fine. They'll trigger if there's a desync with the bitstream. I might add more in case they're not enough. >What is the point of having DaalaSharedContext in addition to DaalaBitstreamHeader? I was looking at VP9 when starting to write this and it had this structure. I'll remove the shared context one. >Third, the decoder seems to return only the first frame, repeatedly, >instead of the following frames. You did read that it can only decode I-frames (keyframes). So you need to encode your files with -k 1 to make all frames keyframes. >First, this fails building with --enable-shared, because daala_find_p_format >is not static. Fixed yesterday, but sent the same patch twice by mistake. It's been fixed, but I'd like to post a v3 of the patch to address some of the issues found. Thanks, Rostislav On 29 December 2015 at 15:55, Andreas Cadhalpun < andreas.cadhal...@googlemail.com> wrote: > On 29.12.2015 03:12, Rostislav Pehlivanov wrote: > > Hmm, kinda odd. > > I've attached a v2 of the RFC Patch which does this: > > 1. Fixed some typos. > > 2. Fixed some missing newlines at the end of daalatab* > > 3. Moves the pix_fmt finding function inside the main decoder file. > > > > Try building now. > > First, this fails building with --enable-shared, because > daala_find_p_format > is not static. > > Second, this decoder crashes, hangs and triggers lots of undefined > behavior. :-/ > > Third, the decoder seems to return only the first frame, repeatedly, > instead of the following frames. > > More details are interleaved in the diff below. > > > diff --git a/libavcodec/daala.h b/libavcodec/daala.h > > new file mode 100644 > > index 0000000..535e78f > > --- /dev/null > > +++ b/libavcodec/daala.h > > @@ -0,0 +1,78 @@ > [...] > > +typedef struct DaalaBitstreamHeader { > > + uint8_t key_frame; > > + uint8_t bipred; > > + uint8_t ref_num; > > + uint8_t act_mask; > > + uint8_t qm; > > + uint8_t haar; > > + uint8_t golden; > > + uint8_t pvq_qm[DAALA_MAX_PLANES][DAALA_QM_SIZE]; > > +} DaalaBitstreamHeader; > > + > > +typedef struct DaalaSharedContext { > > + DaalaBitstreamHeader h; > > +} DaalaSharedContext; > > What is the point of having DaalaSharedContext in addition to > DaalaBitstreamHeader? > > > diff --git a/libavcodec/daala_entropy.h b/libavcodec/daala_entropy.h > > new file mode 100644 > > index 0000000..3fdcaef > > --- /dev/null > > +++ b/libavcodec/daala_entropy.h > > @@ -0,0 +1,554 @@ > [...] > > +/* Updates the generic exponential probability model */ > > +static av_always_inline void daalaent_exp_model_update(DaalaCDF *c, int > *ex, int x, > > + int xs, int id, > int integrate) > > +{ > > + int i, xenc; > > + ent_rng *cdf = &c->cdf[id*c->y]; > > + if (cdf[15] + c->inc > 32767) { > > + for (i = 0; i < 16; i++) > > + cdf[i] = (cdf[i] >> 1) + i + 1; > > + } > > + xenc = FFMIN(15, xs); > > + for (i = xenc; i < 16; i++) > > + cdf[i] += c->inc; > > + x = FFMIN(x, 32767); > > + *ex += ((x << 16) - *ex) >> integrate; > > This subtraction can overflow. > > [...] > > +/* "+derf | It was a hack for the screen coding wavelet tools." */ > > A rather bad hack... > > > +static inline int daalaent_decode_unary(DaalaEntropy *e) > > +{ > > + int rval = 0; > > + while (!daalaent_decode_bits(e, 1)) > > This is an infinite loop that can hang the decoder. > > > + rval++; > > + return rval; > > +} > [...] > > +/* Decodes quantized coefficients from the bitsteam */ > > +static inline void daalaent_decode_laplace_vector(DaalaEntropy *e, > dctcoef *y, > > + int n, int k, dctcoef > *curr, > > + const dctcoef *means) > > +{ > > + int i, exp_q8, mean_k_q8, mean_sum_ex_q8, sum_ex = 0, kn = k, > ran_delta = 0; > > + if (k <= 1) { > > + daalaent_decode_laplace_delta(e, y, n, k, curr, means); > > + return; > > + } > > + if (!k) { > > + curr[DAALAENT_PVQ_COUNT] = DAALAENT_PVQ_NOVAL; > > + curr[DAALAENT_PVQ_COUNT_EX] = DAALAENT_PVQ_NOVAL; > > + curr[DAALAENT_PVQ_K] = 0; > > + curr[DAALAENT_PVQ_SUM_EX] = 0; > > + memset(y, 0, n*sizeof(dctcoef)); > > + return; > > + } > > + mean_k_q8 = means[DAALAENT_PVQ_K]; > > + mean_sum_ex_q8 = means[DAALAENT_PVQ_SUM_EX]; > > + if (mean_k_q8 < 1 << 23) > > + exp_q8 = 256*mean_k_q8/(1 + mean_sum_ex_q8); > > This multiplication can overflow. > > > + else > > + exp_q8 = mean_k_q8/(1 + (mean_sum_ex_q8 >> 8)); > > + for (i = 0; i < n; i++) { > > + int x, ex; > > + if (!kn) > > + break; > > + if (kn <= 1 && i != n - 1) { > > + daalaent_decode_laplace_delta(e, y + i, n - i, kn, curr, > means); > > + ran_delta = 1; > > + i = n; > > + break; > > + } > > + ex = (2*exp_q8*kn + (n - i))/(2*(n - i)); > > + if (ex > kn*256) > > + ex = kn*256; > > + sum_ex += (2*256*kn + (n - i))/(2*(n - i)); > > All these multiplications involving kn can overflow. > > > + if (i != n - 1) > > + x = daalaent_decode_laplace_pvq(e, ex, kn); > > + else > > + x = kn; > > + y[i] = x*daalaent_cphase(e, x); > > + kn -= abs(x); > > + } > > + memset(&y[i], 0, (n - i)*sizeof(dctcoef)); /* Zero the rest */ > > + if (!ran_delta) { > > + curr[DAALAENT_PVQ_COUNT] = DAALAENT_PVQ_NOVAL; > > + curr[DAALAENT_PVQ_COUNT_EX] = DAALAENT_PVQ_NOVAL; > > + } > > + curr[DAALAENT_PVQ_K] = k - kn; > > + curr[DAALAENT_PVQ_SUM_EX] = sum_ex; > > +} > > + > > +/* Expectation value is in Q16 */ > > +static inline int daalaent_decode_generic(DaalaEntropy *e, DaalaCDF *c, > int *ex, > > + int max, int integrate) > > +{ > > + int rval, lsb = 0, log_ex = daalaent_log_ex(*ex); > > + const int shift = FFMAX(0, (log_ex - 5) >> 1); > > + const int id = FFMIN(DAALAENT_MODEL_TAB - 1, log_ex); > > + const int ms = (max + (1 << shift >> 1)) >> shift; > > + int xs = (max == -1) ? 16 : FFMIN(ms + 1, 16); > > + ent_rng *cdf = &c->cdf[id*c->y]; > > + if (!max) > > + return 0; > > + if ((xs = daalaent_decode_cdf(e, cdf, xs, 0, CDF_UNSCALED)) == 15) { > > + int g = ((2*(*ex) >> 8) + (1 << shift >> 1)) >> shift; > > This multiplication can overflow, causing g to become negative, e.g. -256. > > > + ent_win decay = FFMAX(2, FFMIN(254, 256*g/(g + 256))); > > In that case this gives a division by zero crash. > > > +static inline void daalaent_decode_init(DaalaEntropy *e, const uint8_t > *buf, > > + int buf_size) > > +{ > > + e->rbuf = buf; > > + e->erbuf = buf + buf_size; > > This doesn't check if buf_size is 0. In that case e->end_window is always > 0, > causing daalaent_decode_bits to always return 0, leading to the infinite > loop > in daalaent_decode_unary. > > > + e->buf = buf; > > + e->ebuf = buf + buf_size; > > + e->err = 0; > > + e->diff = 0; > > + e->range = 32768; > > + e->count = -15; > > + e->eos_offset = 18 - DAALAENT_WSIZE; > > + e->end_window = 0; > > + e->end_window_size = 0; > > + daalaent_fillup(e); > > +} > [...] > > diff --git a/libavcodec/daala_pvq.h b/libavcodec/daala_pvq.h > > new file mode 100644 > > index 0000000..ea8a86b > > --- /dev/null > > +++ b/libavcodec/daala_pvq.h > > @@ -0,0 +1,369 @@ > [...] > > +static inline void daalapvq_synth(dctcoef *xcoeff, dctcoef *ypulse, > dctcoef *ref, > > + int n, double gr, uint8_t ref_p, > double gain, > > + double theta, const int16_t *qmatrix, > > + const int16_t *qmatrix_inv) > > +{ > > + int i, m, nn = n - ref_p, s = 0, yy = 0; > > + double scale, r[DAALAPVQ_MAX_PART_SIZE], x[DAALAPVQ_MAX_PART_SIZE]; > > + if (ref_p) { > > + for (i = 0; i < n; i++) > > + r[i] = ref[i]*qmatrix[i]*DAALA_QM_SCALE_UNIT; > > The 'ref[i]*qmatrix[i]' multiplication can overflow. > > > + } > > + m = !ref_p ? 0 : daalapvq_householder_c(r, n, gr, &s); > > + for (i = 0; i < nn; i++) > > + yy += ypulse[i]*ypulse[i]; > > + scale = !yy ? 0 : gain/sqrt(yy); > > + if (!ref_p) { > > + for (i = 0; i < n; i++) > > + xcoeff[i] = > lrint((ypulse[i]*scale)*(qmatrix_inv[i]*DAALA_QM_INV_SCALE_UNIT)); > > + } else { > > + scale *= sin(theta); > > + for (i = 0; i < m; i++) > > + x[i] = ypulse[i]*scale; > > + x[m] = -s*gain*cos(theta); > > + for (i = m; i < nn; i++) > > + x[i+1] = ypulse[i]*scale; > > + daalapvq_householder_a(x, r, n); > > + for (i = 0; i < n; i++) > > + xcoeff[i] = > lrint(x[i]*qmatrix_inv[i]*DAALA_QM_INV_SCALE_UNIT); > > + } > > +} > > + > > +static av_always_inline void daalapvq_adapt_shuffle(int *dst, int *src, > int spd, > > + int idx, int mul) > > +{ > > + if (src[idx] < 1) > > + return; > > + dst[idx+0] += (mul*src[idx+0] - dst[idx+0]) >> spd; > > This multiplication can overflow. > > [...] > > +static inline void daalapvq_decode_vector(DaalaEntropy *e, DaalaPVQ > *pvq, > > + dctcoef *out, dctcoef *ref, > > + const double beta, > > + uint8_t key_frame, int p, > > + uint8_t *skip_rest, > > + uint8_t has_err, int band_idx, > > + int qm_off, enum DaalaBsize > bsize) > > +{ > > + int i, k; > > + int qg = 0, skip = 0, itheta = (!!key_frame), has_ref = !key_frame; > > + double qcg, gain, theta = 0.0f, gr = 0.0f, gain_off = 0.0f; > > + dctcoef tmp[DAALAPVQ_MAX_PART_SIZE] = {0}; > > + > > + const int robust = has_err || key_frame; > > + const int band_len = pvq->size[band_idx]; > > + const int16_t *qmatrix = &pvq->qmatrix[qm_off]; > > + const int16_t *qmatrix_inv = &pvq->qmatrix_inv[qm_off]; > > + > > + if (!skip_rest[(band_idx + 2) % 3]) { > > + int iloc = (!!p)*DAALA_NBSIZES*DAALAPVQ_PARTITIONS_MAX + > bsize*DAALAPVQ_PARTITIONS_MAX + band_idx; > > + i = daalaent_decode_cdf_adapt(e, &pvq->pvqtheta_gain_cdf, iloc, > 8 + 7*pvq->skip[band_idx]); > > + if (!key_frame && i >= 10) > > + i++; > > + if (key_frame && i >= 8) > > + i++; > > + if (i >= 8) { > > + i -= 8; > > + skip_rest[0] = skip_rest[1] = skip_rest[2] = 1; > > + } > > + qg = i & 1; > > + itheta = (i >> 1) - 1; > > + has_ref = !(itheta == -1); > > + } > > + if (qg) { > > + int *ex = pvq->pvqgain_ex[p][bsize] + band_idx, ex_tmp = *ex; > > + DaalaCDF *mcdf = has_ref ? &pvq->pvqgain_ref_mcdf : > &pvq->pvqgain_noref_mcdf; > > + qg = 1 + daalaent_decode_generic(e, mcdf, &ex_tmp, -1, 2); > > + *ex += ((qg << 16) - *ex) >> 2; > > The left shift can overflow, also the subtraction, and the final addition, > too. > > [...] > > diff --git a/libavcodec/daaladec.c b/libavcodec/daaladec.c > > new file mode 100644 > > index 0000000..3501a6b > > --- /dev/null > > +++ b/libavcodec/daaladec.c > > @@ -0,0 +1,804 @@ > [...] > > +/* Get DC level */ > > +static void get_haar_dc_sb(DaalaContext *s, HaarGradient *g, dctcoef > *d, int x, int y, > > + uint8_t p, uint8_t lim_pass) > > +{ > > + int q, q_dc; > > + int xdec = s->fmt->dec[p][0]; > > + const int aw = s->width >> xdec; > > + const int ln = DAALA_LOG_BSIZE_MAX - xdec; > > + dctcoef dc_pred = 0, **dc_buf = s->haar_dc_buf[p]; > > + if (!s->quantizer[p]) > > + q_dc = 1; > > + else > > + q_dc = FFMAX(1, > s->quantizer[p]*s->s.h.pvq_qm[p][daala_get_qm_idx(DAALA_NBSIZES - 1, 0)] >> > 4); > > + if (x > 0 && y > 0) { > > + if (lim_pass) { /* ALERT: coeffs could change */ > > + dc_pred = 22*dc_buf[x-1][y-0] - 9*dc_buf[x-1][y-1] + > > + 15*dc_buf[x+0][y-1] + 4*dc_buf[x+1][y-1]; > > + } else { > > + dc_pred = 23*dc_buf[x-1][y-0] - 10*dc_buf[x-1][y-1] + > 19*dc_buf[x-0][y-1]; > > + } > > These two dc_pred calculations can overflow in various ways. > > > + dc_pred = (dc_pred + 16) >> 5; > > + } else { > > + dc_pred += x > 0 ? dc_buf[x-1][y-0] : 0; > > + dc_pred += y > 0 ? dc_buf[x-0][y-1] : 0; > > + } > > + q = daalaent_decode_generic(&s->e, &s->haar_dc_mcdf[p], > &s->haar_sb_ex[p], -1, 2); > > + q *= daalaent_cphase(&s->e, q); > > + q = q*q_dc + dc_pred; > > This multiplication can overflow. > > [...] > > +/* Haar block decoding and transform */ > > +static void decode_block_haar(DaalaContext *s, int x, int y, int p, > enum DaalaBsize bsize) > > +{ > > + int i, j, k, l, n = 1 << (bsize + 2); > > + const int dx = x << bsize, dy = y << bsize; > > + const int aw = s->width >> s->fmt->dec[p][0]; > > + const int boffset = (dy << 2)*aw + (dx << 2); > > + > > + dctcoef tree[4][4]; > > + dctcoef pred[DAALA_BSIZE_MAX*DAALA_BSIZE_MAX]; > > + dctcoef tpred[DAALA_BSIZE_MAX*DAALA_BSIZE_MAX]; > > + > > + daala_calc_prediction(s, pred, s->dcoef[p], dx, dy, p, bsize); > > + memcpy(tpred, pred, n*n*sizeof(dctcoef)); > > + > > + tree[0][0] = daalaent_decode_cdf_adapt(&s->e, &s->haar_bit_cdf, p, > 16); > > + if (tree[0][0] == 15) > > + tree[0][0] += daalaent_decode_unary(&s->e); > > + > > + if (tree[0][0] > 24) { > > + s->e.err = 1; > > + return; > > + } else if (tree[0][0] > 1) { > > + int tmp = daalaent_decode_bits(&s->e, tree[0][0] - 1); > > + tree[0][0] = (1 << (tree[0][0] - 1)) | tmp; > > + } > > + > > + tree[1][1] = decode_haar_coeff_tree_split(s, tree[0][0], > 3, 0); > > + tree[0][1] = decode_haar_coeff_tree_split(s, tree[0][0] - > tree[1][1], 4, 0); > > + tree[1][0] = tree[0][0] - tree[1][1] - tree[0][1]; > > + > > + decode_tree_sum(s, pred, 1, 0, tree[0][1], bsize + 2, 0); > > + decode_tree_sum(s, pred, 0, 1, tree[1][0], bsize + 2, 1); > > + decode_tree_sum(s, pred, 1, 1, tree[1][1], bsize + 2, 2); > > + > > + for (i = 0; i < n; i++) { > > + for (j = (i == 0); j < n; j++) > > + pred[i*n + j] *= daalaent_cphase(&s->e, pred[i*n + j]); > > + } > > + for (i = 0; i < 3; i++) { /* Direction */ > > + for (j = 0; j < bsize+2; j++) { /* Level */ > > + int bo = (((i + 1) >> 1) << j)*n + (((i + 1) & 1) << j); > > + int q = !s->quantizer[p] ? 1 : > s->quantizer[p]*daala_haar_qm[i == 2][j] >> 4; > > + for (k = 0; k < 1 << j; k++) > > + for (l = 0; l < 1 << j; l++) > > + pred[bo + k*n + l] = q*pred[bo + k*n + l] + > tpred[bo + k*n + l]; > > The q*pred[] multiplication can overflow. > > [...] > > diff --git a/libavcodec/daaladsp.c b/libavcodec/daaladsp.c > > new file mode 100644 > > index 0000000..ba03520 > > --- /dev/null > > +++ b/libavcodec/daaladsp.c > > @@ -0,0 +1,2123 @@ > [...] > > +static av_always_inline void idct_1D_4(pixel *x, int xstride, const > pixel y[4]) > > +{ > > + int t2h, t0 = y[0], t1 = y[1], t2 = y[2], t3 = y[3]; > > + t3 += (t1*18293 + 8192) >> 14; > > + t1 -= (t3*21407 + 16384) >> 15; > > + t3 += (t1*23013 + 16384) >> 15; > > + t2 = t0 - t2; > > These four lines can overflow. > > > + t2h = DAALA_DCT_RSHIFT(t2, 1); > > + t0 -= t2h - DAALA_DCT_RSHIFT(t3, 1); > > + t1 = t2h - t1; > > These two, as well. > > > + *(x + 0*xstride) = (pixel)t0; > > + *(x + 1*xstride) = (pixel)(t2 - t1); > > This subtraction can overflow. > > > + *(x + 2*xstride) = (pixel)t1; > > + *(x + 3*xstride) = (pixel)(t0 - t3); > > This one, too. > > > +} > > + > > +static av_always_inline void idct_1D_8(pixel *x, int xstride, const > pixel y[16]) > > +{ > > + int t1h, t4h, t6h, t0 = y[0], t1 = y[1], t2 = y[2], t3 = y[3], t4 = > y[4]; > > + int t5 = y[5], t6 = y[6], t7 = y[7]; > > + t5 -= (t3*2485 + 4096) >> 13; > > + t3 += (t5*18205 + 16384) >> 15; > > + t5 -= (t3*2485 + 4096) >> 13; > > + t7 -= (t1*3227 + 16384) >> 15; > > + t1 += (t7*6393 + 16384) >> 15; > > + t7 -= (t1*3227 + 16384) >> 15; > > + t1 += t3; > > These seven lines can overflow. > > > + t1h = DAALA_DCT_RSHIFT(t1, 1); > > + t3 = t1h - t3; > > This line, too. > > > + t5 += t7; > > + t7 = DAALA_DCT_RSHIFT(t5, 1) - t7; > > + t3 += (t5*7489 + 4096) >> 13; > > + t5 -= (t3*11585 + 8192) >> 14; > > + t3 -= (t5*19195 + 16384) >> 15; > > + t6 += (t2*21895 + 16384) >> 15; > > + t2 -= (t6*15137 + 8192) >> 14; > > + t6 += (t2*21895 + 16384) >> 15; > > + t0 += (t4*13573 + 16384) >> 15; > > + t4 -= (t0*11585 + 8192) >> 14; > > + t0 += (t4*13573 + 16384) >> 15; > > These nine lines can also overflow. > > > + t4 = t2 - t4; > > + t4h = DAALA_DCT_RSHIFT(t4, 1); > > + t2 = t4h - t2; > > + t6 = t0 - t6; > > + t6h = DAALA_DCT_RSHIFT(t6, 1); > > + t0 -= t6h; > > + t7 = t6h - t7; > > + t6 -= t7; > > + t2 += DAALA_DCT_RSHIFT(t3, 1); > > + t3 = t2 - t3; > > + t5 += t4h; > > + t4 -= t5; > > This can overflow. > > > + t0 += t1h; > > + t1 = t0 - t1; > > + *(x + 0*xstride) = (pixel)t0; > > + *(x + 1*xstride) = (pixel)t4; > > + *(x + 2*xstride) = (pixel)t2; > > + *(x + 3*xstride) = (pixel)t6; > > + *(x + 4*xstride) = (pixel)t7; > > + *(x + 5*xstride) = (pixel)t3; > > + *(x + 6*xstride) = (pixel)t5; > > + *(x + 7*xstride) = (pixel)t1; > > +} > > + > > +static av_always_inline void idct_1D_16(pixel *x, int xstride, const > pixel y[16]) > > +{ > > + int tfh, tbh, t1h, tdh, t8h, tch, tah, t2h; > > + int t0 = y[0], t1 = y[1], t2 = y[2], t3 = y[3], t4 = y[4], t5 = > y[5]; > > + int t6 = y[6], t7 = y[7], t8 = y[8], t9 = y[9], ta = y[10], tb = > y[11]; > > + int tc = y[12], td = y[13], te = y[14], tf = y[15]; > > + t1 += (tf*13573 + 16384) >> 15; > > + tf -= (t1*11585 + 8192) >> 14; > > + t1 += ((tf*13573 + 16384) >> 15)+t7; > > + td -= (t3*10947 + 8192) >> 14; > > + t3 += (td*15137 + 8192) >> 14; > > + t5 += (tb*10947 + 8192) >> 14; > > + tb -= (t5*15137 + 8192) >> 14; > > + t5 += (tb*10947 + 8192) >> 14; > > + td += t5 - ((t3*21895 + 16384) >> 15); > > These nine lines can overflow. > > > + tf = t9 - tf; > > + tb += t3; > > + tfh = DAALA_DCT_RSHIFT(tf, 1); > > + t9 -= tfh; > > + tbh = DAALA_DCT_RSHIFT(tb, 1); > > + t3 += tfh - tbh; > > + t1h = DAALA_DCT_RSHIFT(t1, 1); > > + t7 = t1h - t7 + tbh; > > + tdh = DAALA_DCT_RSHIFT(td, 1); > > + t5 += t1h - tdh; > > + t9 = tdh - t9; > > + td -= t9; > > + tf = t3 - tf; > > + t1 -= t5 + ((tf*20055 + 16384) >> 15); > > + tf += (t1*23059 + 8192) >> 14; > > + t1 -= (tf*5417 + 4096) >> 13; > > These three lines, too. > > > + tb = t7 - tb; > > + t9 += (t7*14101 + 8192) >> 14; > > This line, too. > > > + t7 += (t9*3363 + 4096) >> 13; > > + t9 -= (t7*12905 + 8192) >> 14; > > + tb -= (td*4379 + 8192) >> 14; > > + td += (tb*20435 + 8192) >> 14; > > + tb -= (td*17515 + 16384) >> 15; > > + t3 += (t5*851 + 4096) >> 13; > > + t5 += (t3*14699 + 8192) >> 14; > > + t3 -= (t5*1035 + 1024) >> 11; > > + t6 -= (ta*7335 + 16384) >> 15; > > + ta -= (t6*12873 + 8192) >> 14; > > + te += (t2*2873 + 1024) >> 11; > > + t2 += (te*9041 + 16384) >> 15; > > + t6 = DAALA_DCT_RSHIFT(t2, 1) - t6 - ((ta*8593 + 8192) >> 14); > > + te = DAALA_DCT_RSHIFT(ta, 1) - te + ((t2*2275 + 1024) >> 11); > > These thirteen lines, too. > > > + t2 -= t6; > > + ta -= te; > > + t6 -= (ta*13573 + 16384) >> 15; > > + ta += (t6*11585 + 8192) >> 14; > > + t6 -= (ta*13573 + 16384) >> 15; > > + tc += (t4*9147 + 4096) >> 13; > > + t4 -= (tc*10703 + 8192) >> 14; > > + tc += (t4*23013 + 16384) >> 15; > > These six lines, too. > > [...] > > +static av_always_inline void idct_1D_32(pixel *x, int xstride, const > pixel y[32]) > > +{ > > + int t0 = y[0], tg = y[1], t8 = y[2], to = y[3], t4 = y[4], tk > = y[5]; > > + int tc = y[6], ts = y[7], t2 = y[8], ti = y[9], ta = y[10], tq > = y[11]; > > + int t6 = y[12], tm = y[13], te = y[14], tu = y[15], t1 = y[16], th > = y[17]; > > + int t9 = y[18], tp = y[19], t5 = y[20], tl = y[21], td = y[22], tt > = y[23]; > > + int t3 = y[24], tj = y[25], tb = y[26], tr = y[27], t7 = y[28], tn > = y[29]; > > + int tf = y[30], tv = y[31]; > > + OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, > te, tu, > > + t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, > tf, tv); > > This can overflow. > > [...] > > +static av_always_inline void idct_1D_64(pixel *x, int xstride, const > pixel y[64]) > > +{ > > + int t0 = y[0], tw = y[1], tg = y[2], tM = y[3], t8 = y[4], tE > = y[5]; > > + int to = y[6], tU = y[7], t4 = y[8], tA = y[9], tk = y[10], tQ > = y[11]; > > + int tc = y[12], tI = y[13], ts = y[14], tY = y[15], t2 = y[16], ty > = y[17]; > > + int ti = y[18], tO = y[19], ta = y[20], tG = y[21], tq = y[22], tW > = y[23]; > > + int t6 = y[24], tC = y[25], tm = y[26], tS = y[27], te = y[28], tK > = y[29]; > > + int tu = y[30], t_ = y[31], t1 = y[32], tx = y[33], th = y[34], tN > = y[35]; > > + int t9 = y[36], tF = y[37], tp = y[38], tV = y[39], t5 = y[40], tB > = y[41]; > > + int tl = y[42], tR = y[43], td = y[44], tJ = y[45], tt = y[46], tZ > = y[47]; > > + int t3 = y[48], tz = y[49], tj = y[50], tP = y[51], tb = y[52], tH > = y[53]; > > + int tr = y[54], tX = y[55], t7 = y[56], tD = y[57], tn = y[58], tT > = y[59]; > > + int tf = y[60], tL = y[61], tv = y[62], t = y[63]; > > + OD_IDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, > ts, tY, > > + t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, > tu, t_, t1, tx, > > + th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, > t3, tz, tj, tP, > > + tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t); > > This can overflow. > > +static void postfilter_4x4(pixel *dst, const pixel *src) > > +{ > > + int t[4]; > > + t[3] = src[0]-src[3]; > > + t[2] = src[1]-src[2]; > > + t[1] = src[1]-(t[2]>>1); > > These two lines can overflow. > > > + t[0] = src[0]-(t[3]>>1); > > + t[2] -= (t[3]*FILTER_PARAM_4_3+32)>>6; > > + t[3] -= (t[2]*FILTER_PARAM_4_2+32)>>6; > > + #if FILTER_PARAM_4_1 != 64 > > + t[3] = t[3]*(1 << 6)/FILTER_PARAM_4_1; > > + #endif > > + #if FILTER_PARAM_4_0 != 64 > > + t[2] = t[2]*(1 << 6)/FILTER_PARAM_4_0; > > + #endif > > These four multiplications can overflow. > [...] > > +/* Chroma from luma */ > > +static void daala_cfl_resample(uint8_t *_dst, int dstride, const > uint8_t *_src, > > + int istride, int xdec, int ydec, int bs, > int chroma_bs) > > +{ > > + int i, j; > > + const int n = 4 << bs; > > + pixel *dst = (pixel *)_dst; > > + pixel *src = (pixel *)_src; > > + if (!chroma_bs && (xdec || ydec)) { > > + if (xdec) { > > + if (ydec) { > > + daala_sf_ful_up(dst, dstride, src, istride, n, n, n); > > + for (i = 0; i < 4; i++) { > > + for (j = 0; j < 4; j++) { > > + dst[i*dstride + j] = > (daaladsp_cfl_scale[j][i]*dst[i*dstride + j] + 64) >> 7; > > The daaladsp_cfl_scale * dst multiplication can overflow, as well. > > Best regards, > Andreas > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel