This commit improves the intensity stereo phase detection by considering the distortion measurement of both phases and picking the phase with the lowest distortion. This way the phase is always explicitly chosen with producing the least amount of distortions in both channels. This commit also enables the use of M/S encoding in parallel with IS. The specifications say that this is allowed as long as the phase of any bands marked as M/S is reversed. Previously this was disabled due to the very primitive nature of the previous phase detection algorithm, which always naively used a simple majority from all spectral coefficients of both channels. This resulted in distortions due to incorrect phase setting, which are now fixed. --- libavcodec/aaccoder.c | 159 ++++++++++++++++++++++++++++---------------------- 1 file changed, 89 insertions(+), 70 deletions(-)
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index 0441971..7872a9d 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -1214,102 +1214,121 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne } } -static void search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe) +struct is_error { + int pass; + int phase; + float error; +}; + +static struct is_error calc_encoding_err_is(AACEncContext *s, ChannelElement *cpe, + float *L34, float *R34, float ener0, + float ener1, float ener01, + int start, int g, int w, int phase) { + int i, w2; float IS[128]; - float *L34 = s->scoefs + 128*0, *R34 = s->scoefs + 128*1; float *I34 = s->scoefs + 128*2; + float dist1 = 0.0f, dist2 = 0.0f; + struct is_error is_error = {0}; + SingleChannelElement *sce0 = &cpe->ch[0]; + SingleChannelElement *sce1 = &cpe->ch[1]; + + if (cpe->ms_mode) + phase = 1 - 2 * cpe->ms_mask[w*16+g]; + + for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { + FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; + FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; + int is_band_type, is_sf_idx = FFMAX(1, sce0->sf_idx[(w+w2)*16+g]-4); + float e01_34 = phase*pow(sqrt(ener1/ener0), 3.0/4.0); + float maxval, dist_spec_err = 0.0f; + float minthr = FFMIN(band0->threshold, band1->threshold); + for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { + IS[i] = (sce0->pcoeffs[start+(w+w2)*128+i]+ + phase*sce1->pcoeffs[start+(w+w2)*128+i])* + sqrt(ener0/ener01); + } + abs_pow34_v(I34, IS, sce0->ics.swb_sizes[g]); + maxval = find_max_val(1, sce0->ics.swb_sizes[g], I34); + is_band_type = find_min_book(maxval, is_sf_idx); + dist1 += quantize_band_cost(s, sce0->coeffs + start + (w+w2)*128, L34, + sce0->ics.swb_sizes[g], + sce0->sf_idx[(w+w2)*16+g], + sce0->band_type[(w+w2)*16+g], + s->lambda / band0->threshold, INFINITY, NULL, 0); + dist1 += quantize_band_cost(s, sce1->coeffs + start + (w+w2)*128, R34, + sce1->ics.swb_sizes[g], + sce1->sf_idx[(w+w2)*16+g], + sce1->band_type[(w+w2)*16+g], + s->lambda / band1->threshold, INFINITY, NULL, 0); + dist2 += quantize_band_cost(s, IS, I34, sce0->ics.swb_sizes[g], + is_sf_idx, is_band_type, + s->lambda / minthr, INFINITY, NULL, 0); + for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { + dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]); + dist_spec_err += (R34[i] - I34[i]*e01_34)*(R34[i] - I34[i]*e01_34); + } + dist_spec_err *= s->lambda / minthr; + dist2 += dist_spec_err; + } + + is_error.phase = phase; + is_error.pass = dist2 <= dist1; + is_error.error = (dist1 - dist2)*(dist1 - dist2); + + return is_error; +} + +static void search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe) +{ SingleChannelElement *sce0 = &cpe->ch[0]; SingleChannelElement *sce1 = &cpe->ch[1]; - int start = 0, count = 0, i, w, w2, g; + int start = 0, count = 0, w, g, w2, i; const float freq_mult = avctx->sample_rate/(1024.0f/sce0->ics.num_windows)/2.0f; - const float lambda = s->lambda; + float *L34 = s->scoefs + 128*0, *R34 = s->scoefs + 128*1; for (w = 0; w < 128; w++) if (sce1->band_type[w] >= INTENSITY_BT2) sce1->band_type[w] = 0; - if (!cpe->common_window) - return; - for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { - start = 0; - for (g = 0; g < sce0->ics.num_swb; g++) { - if (start*freq_mult > INT_STEREO_LOW_LIMIT*(lambda/170.0f) && - cpe->ch[0].band_type[w*16+g] != NOISE_BT && !cpe->ch[0].zeroes[w*16+g] && - cpe->ch[1].band_type[w*16+g] != NOISE_BT && !cpe->ch[1].zeroes[w*16+g]) { - int phase = 0; - float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f; - float dist1 = 0.0f, dist2 = 0.0f; + if (!cpe->common_window) + return; + for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { + start = 0; + for (g = 0; g < sce0->ics.num_swb; g++) { + if (start*freq_mult > INT_STEREO_LOW_LIMIT*(s->lambda/170.0f) && + cpe->ch[0].band_type[w*16+g] != NOISE_BT && !cpe->ch[0].zeroes[w*16+g] && + cpe->ch[1].band_type[w*16+g] != NOISE_BT && !cpe->ch[1].zeroes[w*16+g]) { + float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f; + struct is_error ph_err1, ph_err2, *erf; for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { + abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); + abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { float coef0 = sce0->pcoeffs[start+(w+w2)*128+i]; float coef1 = sce1->pcoeffs[start+(w+w2)*128+i]; - phase += coef0*coef1 >= 0.0f ? 1 : -1; ener0 += coef0*coef0; ener1 += coef1*coef1; ener01 += (coef0 + coef1)*(coef0 + coef1); } } - if (!phase) { /* Too much phase difference between channels */ - start += sce0->ics.swb_sizes[g]; - continue; - } - phase = av_clip(phase, -1, 1); - for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { - FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; - FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; - int is_band_type, is_sf_idx = FFMAX(1, sce0->sf_idx[(w+w2)*16+g]-4); - float e01_34 = phase*pow(sqrt(ener1/ener0), 3.0/4.0); - float maxval, dist_spec_err = 0.0f; - float minthr = FFMIN(band0->threshold, band1->threshold); - for (i = 0; i < sce0->ics.swb_sizes[g]; i++) - IS[i] = (sce0->pcoeffs[start+(w+w2)*128+i] + phase*sce1->pcoeffs[start+(w+w2)*128+i]) * sqrt(ener0/ener01); - abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); - abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); - abs_pow34_v(I34, IS, sce0->ics.swb_sizes[g]); - maxval = find_max_val(1, sce0->ics.swb_sizes[g], I34); - is_band_type = find_min_book(maxval, is_sf_idx); - dist1 += quantize_band_cost(s, sce0->coeffs + start + (w+w2)*128, - L34, - sce0->ics.swb_sizes[g], - sce0->sf_idx[(w+w2)*16+g], - sce0->band_type[(w+w2)*16+g], - lambda / band0->threshold, INFINITY, NULL, 0); - dist1 += quantize_band_cost(s, sce1->coeffs + start + (w+w2)*128, - R34, - sce1->ics.swb_sizes[g], - sce1->sf_idx[(w+w2)*16+g], - sce1->band_type[(w+w2)*16+g], - lambda / band1->threshold, INFINITY, NULL, 0); - dist2 += quantize_band_cost(s, IS, - I34, - sce0->ics.swb_sizes[g], - is_sf_idx, - is_band_type, - lambda / minthr, INFINITY, NULL, 0); - for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { - dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]); - dist_spec_err += (R34[i] - I34[i]*e01_34)*(R34[i] - I34[i]*e01_34); - } - dist_spec_err *= lambda / minthr; - dist2 += dist_spec_err; - } - if (dist2 <= dist1) { + ph_err1 = calc_encoding_err_is(s, cpe, L34, R34, ener0, ener1, + ener01, start, g, w, -1); + ph_err2 = calc_encoding_err_is(s, cpe, L34, R34, ener0, ener1, + ener01, start, g, w, +1); + erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2; + if (erf->pass) { cpe->is_mask[w*16+g] = 1; - cpe->ms_mask[w*16+g] = 0; cpe->ch[0].is_ener[w*16+g] = sqrt(ener0/ener01); cpe->ch[1].is_ener[w*16+g] = ener0/ener1; - if (phase) - cpe->ch[1].band_type[w*16+g] = INTENSITY_BT; - else - cpe->ch[1].band_type[w*16+g] = INTENSITY_BT2; + cpe->ch[1].band_type[w*16+g] = erf->phase ? INTENSITY_BT : INTENSITY_BT2; count++; } + } + start += sce0->ics.swb_sizes[g]; } - start += sce0->ics.swb_sizes[g]; } - } - cpe->is_mode = !!count; + cpe->is_mode = !!count; } static void search_for_ms(AACEncContext *s, ChannelElement *cpe) @@ -1325,7 +1344,7 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe) for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { start = 0; for (g = 0; g < sce0->ics.num_swb; g++) { - if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g] && !cpe->is_mask[w*16+g]) { + if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) { float dist1 = 0.0f, dist2 = 0.0f; for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; -- 2.5.0.rc2.392.g76e840b _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel