Here's an objective comparison of the difference the patch makes: Original spectrum: https://0x0.st/T7.png
Encoded without the patchset: https://0x0.st/Th.png Encoded with the patchset: https://0x0.st/TF.png Difference: https://0x0.st/TR.png <https://0x0.st/T5.png> Made by: "$ composite Encoded_clean.png Encoded_noise.png -compose difference Difference.png" On 12 April 2015 at 05:50, Rostislav Pehlivanov <atomnu...@gmail.com> wrote: > This commit enables the use of the pseudo-codebook NOISE_BT for encoding > noise values for the twoloop coder. It uses the energy values from the > psychoacoustic model to determine whether it's acceptible to use noise for > encoding and if so, determine the energy of the noise. The cost system was > modified to accept the 13th codebook (skipping the nonexistant 12). The > system was extended such that in the future it should be easy to add > support for intensity stereo coding, hence the use of arrays for the maps. > > The parameters used (such as the factor by which uplims is multiplied when > comparing and the cost returned by the BT_NOISE case) and the way energy > values are converted to scalefactor indices have not been extensively > tested, so safe values which should not break anything were used. They are > to be tweaked in the future to optimize audio quality if needed. > --- > libavcodec/aaccoder.c | 128 > +++++++++++++++++++++++++++++++++----------------- > 1 file changed, 86 insertions(+), 42 deletions(-) > > diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c > index 64eee32..f7662fd 100644 > --- a/libavcodec/aaccoder.c > +++ b/libavcodec/aaccoder.c > @@ -40,6 +40,9 @@ > #include "aacenc.h" > #include "aactab.h" > > +/** Total number of usable codebooks **/ > +#define CB_TOT 13 > + > /** bits needed to code codebook run value for long windows */ > static const uint8_t run_value_bits_long[64] = { > 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, > @@ -57,6 +60,10 @@ static const uint8_t * const run_value_bits[2] = { > run_value_bits_long, run_value_bits_short > }; > > +/** Map to convert values from BandCodingPath index to a codebook index > **/ > +static const uint8_t aac_cb_out_map[CB_TOT] = > {0,1,2,3,4,5,6,7,8,9,10,11,13}; > +/** Inverse map to convert from codebooks to BandCodingPath indices **/ > +static const uint8_t aac_cb_in_map[CB_TOT+1] = > {0,1,2,3,4,5,6,7,8,9,10,11,0,12}; > > /** > * Quantize one coefficient. > @@ -108,7 +115,7 @@ static av_always_inline float > quantize_and_encode_band_cost_template( > const float *scaled, int size, int > scale_idx, > int cb, const float lambda, const float > uplim, > int *bits, int BT_ZERO, int BT_UNSIGNED, > - int BT_PAIR, int BT_ESC) > + int BT_PAIR, int BT_ESC, int BT_NOISE) > { > const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - > SCALE_DIV_512; > const float Q = ff_aac_pow2sf_tab [q_idx]; > @@ -119,8 +126,6 @@ static av_always_inline float > quantize_and_encode_band_cost_template( > float cost = 0; > const int dim = BT_PAIR ? 2 : 4; > int resbits = 0; > - const int range = aac_cb_range[cb]; > - const int maxval = aac_cb_maxval[cb]; > int off; > > if (BT_ZERO) { > @@ -130,15 +135,22 @@ static av_always_inline float > quantize_and_encode_band_cost_template( > *bits = 0; > return cost * lambda; > } > + if (BT_NOISE) { > + for (i = 0; i < size; i++) > + cost += in[i]*in[i]; > + if (bits) > + *bits = 0; > + return cost * lambda; > + } > if (!scaled) { > abs_pow34_v(s->scoefs, in, size); > scaled = s->scoefs; > } > - quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, > maxval); > + quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, > aac_cb_maxval[cb]); > if (BT_UNSIGNED) { > off = 0; > } else { > - off = maxval; > + off = aac_cb_maxval[cb]; > } > for (i = 0; i < size; i += dim) { > const float *vec; > @@ -147,7 +159,7 @@ static av_always_inline float > quantize_and_encode_band_cost_template( > int curbits; > float rd = 0.0f; > for (j = 0; j < dim; j++) { > - curidx *= range; > + curidx *= aac_cb_range[cb]; > curidx += quants[j] + off; > } > curbits = ff_aac_spectral_bits[cb-1][curidx]; > @@ -207,8 +219,8 @@ static av_always_inline float > quantize_and_encode_band_cost_template( > return cost; > } > > -#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, > BT_PAIR, BT_ESC) \ > -static float quantize_and_encode_band_cost_ ## NAME( > \ > +#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, > BT_PAIR, BT_ESC, BT_NOISE) \ > +static float quantize_and_encode_band_cost_ ## NAME( > \ > struct AACEncContext *s, > \ > PutBitContext *pb, const float *in, > \ > const float *scaled, int size, int > scale_idx, \ > @@ -217,15 +229,16 @@ static float quantize_and_encode_band_cost_ ## NAME( > return quantize_and_encode_band_cost_template( > \ > s, pb, in, scaled, size, scale_idx, > \ > BT_ESC ? ESC_BT : cb, lambda, uplim, > bits, \ > - BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC); > \ > + BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, > BT_NOISE); \ > } > > -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0) > -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0) > -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0) > -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0) > -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0) > -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC, 0, 1, 1, 1) > +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0, 0) > +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0) > +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0) > +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0) > +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0) > +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC, 0, 1, 1, 1, 0) > +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1) > > static float (*const quantize_and_encode_band_cost_arr[])( > struct AACEncContext *s, > @@ -245,6 +258,8 @@ static float (*const > quantize_and_encode_band_cost_arr[])( > quantize_and_encode_band_cost_UPAIR, > quantize_and_encode_band_cost_UPAIR, > quantize_and_encode_band_cost_ESC, > + NULL, > + quantize_and_encode_band_cost_NOISE, > }; > > #define quantize_and_encode_band_cost( \ > @@ -312,7 +327,7 @@ typedef struct BandCodingPath { > static void encode_window_bands_info(AACEncContext *s, > SingleChannelElement *sce, > int win, int group_len, const float > lambda) > { > - BandCodingPath path[120][12]; > + BandCodingPath path[120][CB_TOT]; > int w, swb, cb, start, size; > int i, j; > const int max_sfb = sce->ics.max_sfb; > @@ -325,7 +340,7 @@ static void encode_window_bands_info(AACEncContext *s, > SingleChannelElement *sce > > abs_pow34_v(s->scoefs, sce->coeffs, 1024); > start = win*128; > - for (cb = 0; cb < 12; cb++) { > + for (cb = 0; cb < CB_TOT; cb++) { > path[0][cb].cost = 0.0f; > path[0][cb].prev_idx = -1; > path[0][cb].run = 0; > @@ -333,7 +348,7 @@ static void encode_window_bands_info(AACEncContext *s, > SingleChannelElement *sce > for (swb = 0; swb < max_sfb; swb++) { > size = sce->ics.swb_sizes[swb]; > if (sce->zeroes[win*16 + swb]) { > - for (cb = 0; cb < 12; cb++) { > + for (cb = 0; cb < CB_TOT; cb++) { > path[swb+1][cb].prev_idx = cb; > path[swb+1][cb].cost = path[swb][cb].cost; > path[swb+1][cb].run = path[swb][cb].run + 1; > @@ -343,14 +358,14 @@ static void encode_window_bands_info(AACEncContext > *s, SingleChannelElement *sce > int mincb = next_mincb; > next_minrd = INFINITY; > next_mincb = 0; > - for (cb = 0; cb < 12; cb++) { > + for (cb = 0; cb < CB_TOT; cb++) { > float cost_stay_here, cost_get_here; > float rd = 0.0f; > for (w = 0; w < group_len; w++) { > FFPsyBand *band = &s->psy.ch > [s->cur_channel].psy_bands[(win+w)*16+swb]; > rd += quantize_band_cost(s, sce->coeffs + start + > w*128, > s->scoefs + start + w*128, > size, > - sce->sf_idx[(win+w)*16+swb], > cb, > + sce->sf_idx[(win+w)*16+swb], > aac_cb_out_map[cb], > lambda / band->threshold, > INFINITY, NULL); > } > cost_stay_here = path[swb][cb].cost + rd; > @@ -379,7 +394,7 @@ static void encode_window_bands_info(AACEncContext *s, > SingleChannelElement *sce > //convert resulting path from backward-linked list > stack_len = 0; > idx = 0; > - for (cb = 1; cb < 12; cb++) > + for (cb = 1; cb < CB_TOT; cb++) > if (path[max_sfb][cb].cost < path[max_sfb][idx].cost) > idx = cb; > ppos = max_sfb; > @@ -394,12 +409,13 @@ static void encode_window_bands_info(AACEncContext > *s, SingleChannelElement *sce > //perform actual band info encoding > start = 0; > for (i = stack_len - 1; i >= 0; i--) { > - put_bits(&s->pb, 4, stackcb[i]); > + cb = aac_cb_out_map[stackcb[i]]; > + put_bits(&s->pb, 4, cb); > count = stackrun[i]; > - memset(sce->zeroes + win*16 + start, !stackcb[i], count); > + memset(sce->zeroes + win*16 + start, !cb, count); > //XXX: memset when band_type is also uint8_t > for (j = 0; j < count; j++) { > - sce->band_type[win*16 + start] = stackcb[i]; > + sce->band_type[win*16 + start] = cb; > start++; > } > while (count >= run_esc) { > @@ -413,7 +429,7 @@ static void encode_window_bands_info(AACEncContext *s, > SingleChannelElement *sce > static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement > *sce, > int win, int group_len, const float > lambda) > { > - BandCodingPath path[120][12]; > + BandCodingPath path[120][CB_TOT]; > int w, swb, cb, start, size; > int i, j; > const int max_sfb = sce->ics.max_sfb; > @@ -426,7 +442,7 @@ static void codebook_trellis_rate(AACEncContext *s, > SingleChannelElement *sce, > > abs_pow34_v(s->scoefs, sce->coeffs, 1024); > start = win*128; > - for (cb = 0; cb < 12; cb++) { > + for (cb = 0; cb < CB_TOT; cb++) { > path[0][cb].cost = run_bits+4; > path[0][cb].prev_idx = -1; > path[0][cb].run = 0; > @@ -450,7 +466,7 @@ static void codebook_trellis_rate(AACEncContext *s, > SingleChannelElement *sce, > } > next_minbits = path[swb+1][0].cost; > next_mincb = 0; > - for (cb = 1; cb < 12; cb++) { > + for (cb = 1; cb < CB_TOT; cb++) { > path[swb+1][cb].cost = 61450; > path[swb+1][cb].prev_idx = -1; > path[swb+1][cb].run = 0; > @@ -459,6 +475,7 @@ static void codebook_trellis_rate(AACEncContext *s, > SingleChannelElement *sce, > float minbits = next_minbits; > int mincb = next_mincb; > int startcb = sce->band_type[win*16+swb]; > + startcb = aac_cb_in_map[startcb]; > next_minbits = INFINITY; > next_mincb = 0; > for (cb = 0; cb < startcb; cb++) { > @@ -466,13 +483,20 @@ static void codebook_trellis_rate(AACEncContext *s, > SingleChannelElement *sce, > path[swb+1][cb].prev_idx = -1; > path[swb+1][cb].run = 0; > } > - for (cb = startcb; cb < 12; cb++) { > + for (cb = startcb; cb < CB_TOT; cb++) { > float cost_stay_here, cost_get_here; > float bits = 0.0f; > + if (cb == 12 && sce->band_type[win*16+swb] != NOISE_BT) { > + path[swb+1][cb].cost = 61450; > + path[swb+1][cb].prev_idx = -1; > + path[swb+1][cb].run = 0; > + continue; > + } > for (w = 0; w < group_len; w++) { > bits += quantize_band_cost(s, sce->coeffs + start + > w*128, > s->scoefs + start + w*128, > size, > - > sce->sf_idx[(win+w)*16+swb], cb, > + > sce->sf_idx[(win+w)*16+swb], > + aac_cb_out_map[cb], > 0, INFINITY, NULL); > } > cost_stay_here = path[swb][cb].cost + bits; > @@ -501,7 +525,7 @@ static void codebook_trellis_rate(AACEncContext *s, > SingleChannelElement *sce, > //convert resulting path from backward-linked list > stack_len = 0; > idx = 0; > - for (cb = 1; cb < 12; cb++) > + for (cb = 1; cb < CB_TOT; cb++) > if (path[max_sfb][cb].cost < path[max_sfb][idx].cost) > idx = cb; > ppos = max_sfb; > @@ -517,12 +541,13 @@ static void codebook_trellis_rate(AACEncContext *s, > SingleChannelElement *sce, > //perform actual band info encoding > start = 0; > for (i = stack_len - 1; i >= 0; i--) { > - put_bits(&s->pb, 4, stackcb[i]); > + cb = aac_cb_out_map[stackcb[i]]; > + put_bits(&s->pb, 4, cb); > count = stackrun[i]; > - memset(sce->zeroes + win*16 + start, !stackcb[i], count); > + memset(sce->zeroes + win*16 + start, !cb, count); > //XXX: memset when band_type is also uint8_t > for (j = 0; j < count; j++) { > - sce->band_type[win*16 + start] = stackcb[i]; > + sce->band_type[win*16 + start] = cb; > start++; > } > while (count >= run_esc) { > @@ -711,8 +736,9 @@ static void > search_for_quantizers_twoloop(AVCodecContext *avctx, > { > int start = 0, i, w, w2, g; > int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / > avctx->channels * (lambda / 120.f); > - float dists[128] = { 0 }, uplims[128]; > + float dists[128] = { 0 }, uplims[128] = { 0 }, energies[128] = { 0 }; > float maxvals[128]; > + float energy_avg = 0; > int fflag, minscaler; > int its = 0; > int allz = 0; > @@ -724,32 +750,47 @@ static void > search_for_quantizers_twoloop(AVCodecContext *avctx, > //XXX: some heuristic to determine initial quantizers will reduce > search time > //determine zero bands and upper limits > for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { > + start = 0; > for (g = 0; g < sce->ics.num_swb; g++) { > int nz = 0; > - float uplim = 0.0f; > + float uplim = 0.0f, energy = 0.0f; > + float freq = > (w*16+g)*(avctx->sample_rate/(1024/sce->ics.num_windows)/2); > for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { > FFPsyBand *band = &s->psy.ch > [s->cur_channel].psy_bands[(w+w2)*16+g]; > uplim += band->threshold; > - if (band->energy <= band->threshold || band->threshold == > 0.0f) { > + energy += band->energy; > + if (band->threshold == 0.0f || band->energy < > band->threshold) { > sce->zeroes[(w+w2)*16+g] = 1; > continue; > } > nz = 1; > } > uplims[w*16+g] = uplim *512; > + energies[w*16+g] = log2f(2*(energy*energy)); > + energy_avg = (energies[w*16+g] + energy_avg)/2; > + if (freq > 4000.0f && energy <= uplim * 1.52f) { > + sce->band_type[w*16+g] = NOISE_BT; > + nz = 1; > + } else { /* Will be determined in the two-loop search */ > + sce->band_type[w*16+g] = 0; > + } > sce->zeroes[w*16+g] = !nz; > if (nz) > minthr = FFMIN(minthr, uplim); > allz |= nz; > + start += sce->ics.swb_sizes[g]; > } > } > for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { > for (g = 0; g < sce->ics.num_swb; g++) { > if (sce->zeroes[w*16+g]) { > sce->sf_idx[w*16+g] = SCALE_ONE_POS; > - continue; > + } else if (sce->band_type[w*16+g] == NOISE_BT) { > + float energy_norm = (energies[w*16+g]/energy_avg); > + sce->sf_idx[w*16+g] = av_clip((energy_norm*256) - 70, > -100, 155); > + } else { > + sce->sf_idx[w*16+g] = SCALE_ONE_POS + > FFMIN(log2f(uplims[w*16+g]/minthr)*4,59); > } > - sce->sf_idx[w*16+g] = SCALE_ONE_POS + > FFMIN(log2f(uplims[w*16+g]/minthr)*4,59); > } > } > > @@ -785,7 +826,8 @@ static void > search_for_quantizers_twoloop(AVCodecContext *avctx, > int cb; > float dist = 0.0f; > > - if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= > 218) { > + if (sce->zeroes[w*16+g] || sce->band_type[w*16+g] >= > NOISE_BT || > + sce->sf_idx[w*16+g] >= 218) { > start += sce->ics.swb_sizes[g]; > continue; > } > @@ -814,11 +856,11 @@ static void > search_for_quantizers_twoloop(AVCodecContext *avctx, > } > if (tbits > destbits) { > for (i = 0; i < 128; i++) > - if (sce->sf_idx[i] < 218 - qstep) > + if (sce->sf_idx[i] < 218 - qstep && sce->band_type[i] > < NOISE_BT) > sce->sf_idx[i] += qstep; > } else { > for (i = 0; i < 128; i++) > - if (sce->sf_idx[i] > 60 - qstep) > + if (sce->sf_idx[i] > 60 - qstep && sce->band_type[i] > < NOISE_BT) > sce->sf_idx[i] -= qstep; > } > qstep >>= 1; > @@ -831,7 +873,7 @@ static void > search_for_quantizers_twoloop(AVCodecContext *avctx, > for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) > { > for (g = 0; g < sce->ics.num_swb; g++) { > int prevsc = sce->sf_idx[w*16+g]; > - if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > > 60) { > + if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > > 60 && sce->band_type[w*16+g] < NOISE_BT) { > if (find_min_book(maxvals[w*16+g], > sce->sf_idx[w*16+g]-1)) > sce->sf_idx[w*16+g]--; > else //Try to make sure there is some energy in every > band > @@ -839,6 +881,8 @@ static void > search_for_quantizers_twoloop(AVCodecContext *avctx, > } > sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], > minscaler, minscaler + SCALE_MAX_DIFF); > sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219); > + if (sce->band_type[w*16+g] >= NOISE_BT) > + continue; > if (sce->sf_idx[w*16+g] != prevsc) > fflag = 1; > sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], > sce->sf_idx[w*16+g]); > -- > 2.1.4 > > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel