This is done by actually handling the cascaded LMS data as if it were int16_t, thus requiring switching at various locations the computations. --- libavcodec/wmalosslessdec.c | 146 +++++++++++++++++++++++++------------------- 1 file changed, 84 insertions(+), 62 deletions(-)
diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c index 9d56d97..f3a2217 100644 --- a/libavcodec/wmalosslessdec.c +++ b/libavcodec/wmalosslessdec.c @@ -147,9 +147,9 @@ typedef struct WmallDecodeCtx { int scaling; int coefsend; int bitsend; - DECLARE_ALIGNED(16, int32_t, coefs)[MAX_ORDER + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)]; - DECLARE_ALIGNED(16, int32_t, lms_prevvalues)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)]; - DECLARE_ALIGNED(16, int32_t, lms_updates)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)]; + DECLARE_ALIGNED(16, int32_t, coefs)[MAX_ORDER + WMALL_COEFF_PAD_SIZE/sizeof(int32_t)]; + DECLARE_ALIGNED(16, int32_t, lms_prevvalues)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int32_t)]; + DECLARE_ALIGNED(16, int32_t, lms_updates)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int32_t)]; int recent; } cdlms[WMALL_MAX_CHANNELS][9]; @@ -458,6 +458,7 @@ static int decode_cdlms(WmallDecodeCtx *s) int cdlms_send_coef = get_bits1(&s->gb); for (c = 0; c < s->num_channels; c++) { + int shift = s->bits_per_sample > 16 ? 0 : 1; s->cdlms_ttl[c] = get_bits(&s->gb, 3) + 1; for (i = 0; i < s->cdlms_ttl[c]; i++) { s->cdlms[c][i].order = (get_bits(&s->gb, 7) + 1) * 8; @@ -495,14 +496,20 @@ static int decode_cdlms(WmallDecodeCtx *s) s->cdlms[c][i].bitsend = get_bitsz(&s->gb, cbits) + 2; shift_l = 32 - s->cdlms[c][i].bitsend; shift_r = 32 - s->cdlms[c][i].scaling - 2; + if (s->bits_per_sample > 16) { for (j = 0; j < s->cdlms[c][i].coefsend; j++) s->cdlms[c][i].coefs[j] = (get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r; + } else { + int16_t *ptr = (int16_t*)s->cdlms[c][i].coefs; + for (j = 0; j < s->cdlms[c][i].coefsend; j++) + ptr[j] = (get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r; + } } } for (i = 0; i < s->cdlms_ttl[c]; i++) - memset(s->cdlms[c][i].coefs + s->cdlms[c][i].order, + memset(s->cdlms[c][i].coefs + (s->cdlms[c][i].order >> shift), 0, WMALL_COEFF_PAD_SIZE); } @@ -694,32 +701,6 @@ static void revert_mclms(WmallDecodeCtx *s, int tile_size) } } -static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int input) -{ - int recent = s->cdlms[ich][ilms].recent; - int range = 1 << s->bits_per_sample - 1; - int order = s->cdlms[ich][ilms].order; - - if (recent) - recent--; - else { - memcpy(s->cdlms[ich][ilms].lms_prevvalues + order, - s->cdlms[ich][ilms].lms_prevvalues, sizeof(*s->cdlms[ich][ilms].lms_prevvalues) * order); - memcpy(s->cdlms[ich][ilms].lms_updates + order, - s->cdlms[ich][ilms].lms_updates, sizeof(*s->cdlms[ich][ilms].lms_updates) * order); - recent = order - 1; - } - - s->cdlms[ich][ilms].lms_prevvalues[recent] = av_clip(input, -range, range - 1); - s->cdlms[ich][ilms].lms_updates[recent] = WMASIGN(input) * s->update_speed[ich]; - - s->cdlms[ich][ilms].lms_updates[recent + (order >> 4)] >>= 2; - s->cdlms[ich][ilms].lms_updates[recent + (order >> 3)] >>= 1; - s->cdlms[ich][ilms].recent = recent; - memset(s->cdlms[ich][ilms].lms_updates + recent + order, 0, - sizeof(s->cdlms[ich][ilms].lms_updates) - 4*(recent+order)); -} - static void use_high_update_speed(WmallDecodeCtx *s, int ich) { int ilms, recent, icoef; @@ -727,12 +708,16 @@ static void use_high_update_speed(WmallDecodeCtx *s, int ich) recent = s->cdlms[ich][ilms].recent; if (s->update_speed[ich] == 16) continue; - if (s->bV3RTM) { + if (s->bits_per_sample > 16) { + int32_t *updates = s->cdlms[ich][ilms].lms_updates; + if (s->bV3RTM) updates += recent; for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) - s->cdlms[ich][ilms].lms_updates[icoef + recent] *= 2; + updates[icoef] *= 2; } else { + int16_t *updates = (int16_t *)s->cdlms[ich][ilms].lms_updates; + if (s->bV3RTM) updates += recent; for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) - s->cdlms[ich][ilms].lms_updates[icoef] *= 2; + updates[icoef] *= 2; } } s->update_speed[ich] = 16; @@ -745,42 +730,76 @@ static void use_normal_update_speed(WmallDecodeCtx *s, int ich) recent = s->cdlms[ich][ilms].recent; if (s->update_speed[ich] == 8) continue; - if (s->bV3RTM) + if (s->bits_per_sample > 16) { + int32_t *updates = s->cdlms[ich][ilms].lms_updates; + if (s->bV3RTM) updates += recent; for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) - s->cdlms[ich][ilms].lms_updates[icoef + recent] /= 2; - else + updates[icoef] /= 2; + } else { + int16_t *updates = (int16_t *)s->cdlms[ich][ilms].lms_updates; + if (s->bV3RTM) updates += recent; for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) - s->cdlms[ich][ilms].lms_updates[icoef] /= 2; + updates[icoef] /= 2; + } } s->update_speed[ich] = 8; } -static void revert_cdlms(WmallDecodeCtx *s, int ch, - int coef_begin, int coef_end) -{ - int icoef, pred, ilms, num_lms, residue, input; - - num_lms = s->cdlms_ttl[ch]; - for (ilms = num_lms - 1; ilms >= 0; ilms--) { - for (icoef = coef_begin; icoef < coef_end; icoef++) { - pred = 1 << (s->cdlms[ch][ilms].scaling - 1); - residue = s->channel_residues[ch][icoef]; - pred += s->dsp.scalarproduct_and_madd_int32(s->cdlms[ch][ilms].coefs, - s->cdlms[ch][ilms].lms_prevvalues - + s->cdlms[ch][ilms].recent, - s->cdlms[ch][ilms].lms_updates - + s->cdlms[ch][ilms].recent, - FFALIGN(s->cdlms[ch][ilms].order, - WMALL_COEFF_PAD_SIZE), - WMASIGN(residue)); - input = residue + (pred >> s->cdlms[ch][ilms].scaling); - lms_update(s, ch, ilms, input); - s->channel_residues[ch][icoef] = input; - } - } - emms_c(); +#define CD_LMS(bits, ROUND) \ +static void lms_update ## bits (WmallDecodeCtx *s, int ich, int ilms, int input) \ +{ \ + int recent = s->cdlms[ich][ilms].recent; \ + int range = 1 << s->bits_per_sample - 1; \ + int order = s->cdlms[ich][ilms].order; \ + int ##bits##_t *prev = (int##bits##_t *)s->cdlms[ich][ilms].lms_prevvalues; \ + int ##bits##_t *upd = (int##bits##_t *)s->cdlms[ich][ilms].lms_updates; \ + \ + if (recent) \ + recent--; \ + else { \ + memcpy(prev + order, prev, (bits/8) * order); \ + memcpy(upd + order, upd, (bits/8) * order); \ + recent = order - 1; \ + } \ + \ + prev[recent] = av_clip(input, -range, range - 1); \ + upd[recent] = WMASIGN(input) * s->update_speed[ich]; \ + \ + upd[recent + (order >> 4)] >>= 2; \ + upd[recent + (order >> 3)] >>= 1; \ + s->cdlms[ich][ilms].recent = recent; \ + memset(upd + recent + order, 0, (bits/8)*(MAX_ORDER * 2 - recent - order)); \ +} \ + \ +static void revert_cdlms ## bits (WmallDecodeCtx *s, int ch, \ + int coef_begin, int coef_end) \ +{ \ + int icoef, pred, ilms, num_lms, residue, input; \ + \ + num_lms = s->cdlms_ttl[ch]; \ + for (ilms = num_lms - 1; ilms >= 0; ilms--) { \ + for (icoef = coef_begin; icoef < coef_end; icoef++) { \ + int##bits##_t *coeffs = (int##bits##_t *)s->cdlms[ch][ilms].coefs; \ + int##bits##_t *prevvalues = (int##bits##_t *)s->cdlms[ch][ilms].lms_prevvalues; \ + int##bits##_t *updates = (int##bits##_t *)s->cdlms[ch][ilms].lms_updates; \ + pred = 1 << (s->cdlms[ch][ilms].scaling - 1); \ + residue = s->channel_residues[ch][icoef]; \ + pred += s->dsp.scalarproduct_and_madd_int## bits (coeffs, \ + prevvalues + s->cdlms[ch][ilms].recent, \ + updates + s->cdlms[ch][ilms].recent, \ + FFALIGN(s->cdlms[ch][ilms].order, ROUND), \ + WMASIGN(residue)); \ + input = residue + (pred >> s->cdlms[ch][ilms].scaling); \ + lms_update ## bits(s, ch, ilms, input); \ + s->channel_residues[ch][icoef] = input; \ + } \ + } \ + if (bits <= 16) emms_c(); \ } +CD_LMS(16, WMALL_COEFF_PAD_SIZE) +CD_LMS(32, 8) + static void revert_inter_ch_decorr(WmallDecodeCtx *s, int tile_size) { if (s->num_channels != 2) @@ -953,7 +972,10 @@ static int decode_subframe(WmallDecodeCtx *s) use_high_update_speed(s, i); else use_normal_update_speed(s, i); - revert_cdlms(s, i, 0, subframe_len); + if (s->bits_per_sample > 16) + revert_cdlms32(s, i, 0, subframe_len); + else + revert_cdlms16(s, i, 0, subframe_len); } else { memset(s->channel_residues[i], 0, sizeof(**s->channel_residues) * subframe_len); } -- 2.8.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel