This is done by actually handling the cascaded LMS data as if it were int16_t, thus requiring switching at various locations the computations. --- libavcodec/wmalosslessdec.c | 61 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+)
diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c index f7f249b..3885dc1 100644 --- a/libavcodec/wmalosslessdec.c +++ b/libavcodec/wmalosslessdec.c @@ -497,15 +497,29 @@ static int decode_cdlms(WmallDecodeCtx *s) s->cdlms[c][i].bitsend = get_bitsz(&s->gb, cbits) + 2; shift_l = 32 - s->cdlms[c][i].bitsend; shift_r = 32 - s->cdlms[c][i].scaling - 2; + if (s->bits_per_sample > 16) { for (j = 0; j < s->cdlms[c][i].coefsend; j++) s->cdlms[c][i].coefs[j] = (get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r; + } else { + for (j = 0; j < s->cdlms[c][i].coefsend; j++) { + int16_t *ptr = (int16_t*)s->cdlms[c][i].coefs; + ptr[j] = (get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r; + } + } } } + if (s->bits_per_sample > 16) { for (i = 0; i < s->cdlms_ttl[c]; i++) memset(s->cdlms[c][i].coefs + s->cdlms[c][i].order, 0, WMALL_COEFF_PAD_SIZE); + } else { + for (i = 0; i < s->cdlms_ttl[c]; i++) { + int16_t *ptr = (int16_t*)s->cdlms[c][i].coefs; + memset(ptr + s->cdlms[c][i].order, 0, 2*WMALL_COEFF_PAD_SIZE); + } + } } return 0; @@ -702,6 +716,7 @@ static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int input) int range = 1 << s->bits_per_sample - 1; int order = s->cdlms[ich][ilms].order; + if (s->bits_per_sample > 16) { if (recent) recent--; else { @@ -720,6 +735,26 @@ static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int input) s->cdlms[ich][ilms].recent = recent; memset(s->cdlms[ich][ilms].lms_updates + recent + order, 0, sizeof(s->cdlms[ich][ilms].lms_updates) - 4*(recent+order)); + } else { + int16_t *prevvalues = s->cdlms[ich][ilms].lms_prevvalues; + int16_t *updates = s->cdlms[ich][ilms].lms_updates; + if (recent) + recent--; + else { + memcpy(prevvalues + order, prevvalues, 2 * order); + memcpy(updates + order, updates, 2 * order); + recent = order - 1; + } + + prevvalues[recent] = av_clip(input, -range, range - 1); + updates[recent] = WMASIGN(input) * s->update_speed[ich]; + + updates[recent + (order >> 4)] >>= 2; + updates[recent + (order >> 3)] >>= 1; + s->cdlms[ich][ilms].recent = recent; + memset(updates + recent + order, 0, + sizeof(s->cdlms[ich][ilms].lms_updates) - 2*(recent+order)); + } } static void use_high_update_speed(WmallDecodeCtx *s, int ich) @@ -729,6 +764,7 @@ static void use_high_update_speed(WmallDecodeCtx *s, int ich) recent = s->cdlms[ich][ilms].recent; if (s->update_speed[ich] == 16) continue; + if (s->bits_per_sample > 16) { if (s->bV3RTM) { for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) s->cdlms[ich][ilms].lms_updates[icoef + recent] *= 2; @@ -736,6 +772,12 @@ static void use_high_update_speed(WmallDecodeCtx *s, int ich) for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) s->cdlms[ich][ilms].lms_updates[icoef] *= 2; } + } else { + int16_t *updates = (int16_t *)s->cdlms[ich][ilms].lms_updates; + if (s->bV3RTM) updates += recent; + for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) + updates[icoef] *= 2; + } } s->update_speed[ich] = 16; } @@ -747,12 +789,19 @@ static void use_normal_update_speed(WmallDecodeCtx *s, int ich) recent = s->cdlms[ich][ilms].recent; if (s->update_speed[ich] == 8) continue; + if (s->bits_per_sample > 16) { if (s->bV3RTM) for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) s->cdlms[ich][ilms].lms_updates[icoef + recent] /= 2; else for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) s->cdlms[ich][ilms].lms_updates[icoef] /= 2; + } else { + int16_t *updates = (int16_t *)s->cdlms[ich][ilms].lms_updates; + if (s->bV3RTM) updates += recent; + for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) + updates[icoef] /= 2; + } } s->update_speed[ich] = 8; } @@ -767,6 +816,7 @@ static void revert_cdlms(WmallDecodeCtx *s, int ch, for (icoef = coef_begin; icoef < coef_end; icoef++) { pred = 1 << (s->cdlms[ch][ilms].scaling - 1); residue = s->channel_residues[ch][icoef]; + if (s->bits_per_sample > 16) { pred += s->dsp.scalarproduct_and_madd_int32(s->cdlms[ch][ilms].coefs, s->cdlms[ch][ilms].lms_prevvalues + s->cdlms[ch][ilms].recent, @@ -775,6 +825,17 @@ static void revert_cdlms(WmallDecodeCtx *s, int ch, FFALIGN(s->cdlms[ch][ilms].order, WMALL_COEFF_PAD_SIZE), WMASIGN(residue)); + } else { + int16_t *coeffs = s->cdlms[ch][ilms].coefs; + int16_t *prevvalues = s->cdlms[ch][ilms].lms_prevvalues; + int16_t *updates = s->cdlms[ch][ilms].lms_updates; + pred += s->dsp.scalarproduct_and_madd_int16(coeffs, + prevvalues + s->cdlms[ch][ilms].recent, + updates + s->cdlms[ch][ilms].recent, + FFALIGN(s->cdlms[ch][ilms].order, + WMALL_COEFF_PAD_SIZE), + WMASIGN(residue)); + } input = residue + (pred >> s->cdlms[ch][ilms].scaling); lms_update(s, ch, ilms, input); s->channel_residues[ch][icoef] = input; -- 2.8.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel