bit_rate_tolerance=0 has a few problems: - infinite loop if frame_bits doesn't become small enough for any lambda - bad quality, as it's never increasing lambda above the initial value - not doing the restoring of coeffs after adjusting lambda
Attempt to address these: - target bitrate a bit below frame_bits cap with the usual code path - if frame_bits exceeds cap, find good lambda with a zero finding method The zero finding usually converges in 1-3 iterations. Remaining problems: - instead of the infinite loop, we now silently return the too large frame, and let the caller handle it. This is still a bug, but fixing it needs something else than playing with lambda. - it appears the resulting frame_bits depends also on some other state than s->lambda. iteration with lambda1, lambda2>lambda1, and then again with lambda1 produces different frame_bits on the two lambda1 iterations. In this case the root finding can fail, as it cannot any more return to a previous "good" lambda. The sound quality from this patch with bit_rate_tolerance=0 is improved, as it now maintains sufficient bitrate closer to the target. Encoding is also faster now that less re-encoding is done: Before: ffmpeg -i sample.flac -c aac -b:a 200k -bt:a 0 -y before.aac size= 2776KiB time=00:03:10.98 bitrate= 119.1kbits/s speed=14.4x After: ffmpeg -i sample.flac -c aac -b:a 200k -bt:a 0 -y after.aac size= 3897KiB time=00:03:10.98 bitrate= 167.1kbits/s speed=23.6x Signed-off-by: Pauli Virtanen <p...@iki.fi> --- libavcodec/aacenc.c | 153 +++++++++++++++++++++++++++++++++++--------- libavcodec/aacenc.h | 1 + 2 files changed, 124 insertions(+), 30 deletions(-) diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c index 88037c7f87..ffa72be217 100644 --- a/libavcodec/aacenc.c +++ b/libavcodec/aacenc.c @@ -826,6 +826,58 @@ static void copy_input_samples(AACEncContext *s, const AVFrame *frame) } } +/** + * Finding zero of function f(x). + * The initial zero bracketing assumes f(x) is increasing. + */ +typedef struct FindZero { + int init; ///< bitmask of whether x[0] (0x1) and x[1] (0x2) are valid + float x[2]; ///< zero of f(x) is in interval (x[0], x[1]) + float f[2]; ///< interpolation values + int i; ///< which x[i] is latest + float b; ///< bracketing multiplier +} FindZero; + +/** Return next x to evaluate f(x) at to approach the zero. */ +static float find_zero_next(FindZero *r, float x, float f) +{ + if (r->init != 0x3) { + /* Bracket the zero, assuming x > 0 and f(x) is increasing */ + r->b = FFMIN(2 + 2 * r->b, 65536.0f); + if (f < 0) { + r->x[0] = x; + r->f[0] = f; + r->init |= 0x1; + if (r->init != 0x3) + return x * r->b; + } else { + r->x[1] = x; + r->f[1] = f; + r->init |= 0x2; + if (r->init != 0x3) + return x / r->b; + } + r->i = 1; + } else { + /* Anderson-Bjoerck false position method */ + if ((f < 0) != (r->f[r->i] < 0)) { + r->i = !r->i; + } else { + float m = 1 - (float)f / r->f[r->i]; + + if (m <= 0) + m = 0.5f; + + r->f[!r->i] *= m; + } + + r->x[r->i] = x; + r->f[r->i] = f; + } + + return (r->x[0] * r->f[1] - r->x[1] * r->f[0]) / (r->f[1] - r->f[0]); +} + static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr) { @@ -839,6 +891,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0; int chan_el_counter[4]; FFPsyWindowInfo windows[AAC_MAX_CHANNELS]; + FindZero find_lambda = { 0 }; /* add current frame to queue */ if (frame) { @@ -1100,32 +1153,58 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, */ frame_bits = put_bits_count(&s->pb); rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate; - rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3); + rate_bits = FFMIN(rate_bits, s->max_frame_bits); too_many_bits = FFMAX(target_bits, rate_bits); - too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3); + too_many_bits = FFMIN(too_many_bits, s->max_frame_bits); too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits); - /* When strict bit-rate control is demanded */ - if (avctx->bit_rate_tolerance == 0) { - if (rate_bits < frame_bits) { - float ratio = ((float)rate_bits) / frame_bits; - s->lambda *= FFMIN(0.9f, ratio); - continue; - } - /* reset lambda when solution is found */ - s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120; - break; - } - /* When using ABR, be strict (but only for increasing) */ too_few_bits = too_few_bits - too_few_bits/8; too_many_bits = too_many_bits + too_many_bits/2; - if ( its == 0 /* for steady-state Q-scale tracking */ - || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits)) - || frame_bits >= 6144 * s->channels - 3 ) + av_log(NULL, AV_LOG_TRACE, + "%08d: frame_bits:%d max:%d lambda:%f its:%d%s\n", + s->lambda_count, frame_bits, s->max_frame_bits, s->lambda, its, + (frame_bits > s->max_frame_bits) ? " BAD" : its ? " RETRY" : ""); + + if (frame_bits >= s->max_frame_bits || find_lambda.init) { + /* Search for lambda with frame_bits == rate_bits < max_frame_bits */ + float lambda; + int value = frame_bits - rate_bits; + int value_max = s->max_frame_bits - rate_bits; + + lambda = find_zero_next(&find_lambda, s->lambda, value); + lambda = av_clipf(lambda, FLT_EPSILON, 65536.f); + + /* Close enough? */ + if (value < value_max && (value > -rate_bits / 20 || + value > too_few_bits - rate_bits || + fabsf(lambda - s->lambda) < 0.05f * fabsf(lambda))) + break; + + if (its > 10 || s->lambda == lambda) { + /* Not making enough progress, use whatever we have now. */ + if (value < value_max) + break; + + if (!(find_lambda.init & 0x1)) { + /* Could't find any lambda that gives a small enough frame. + * Give up, produce the bad frame, and reset lambda for next. + */ + s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120; + break; + } + + lambda = find_lambda.x[0] * 0.9f; + memset(&find_lambda, 0, sizeof(find_lambda)); + } + + s->lambda = lambda; + } else if ( its == 0 /* for steady-state Q-scale tracking */ + || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))) { float ratio = ((float)rate_bits) / frame_bits; + float prev_lambda = s->lambda; if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) { /* @@ -1142,24 +1221,27 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, } s->lambda = av_clipf(s->lambda * ratio, FLT_EPSILON, 65536.f); + /* Give up if we're not making progress. */ + if (s->lambda == prev_lambda) + break; + /* Keep iterating if we must reduce and lambda is in the sky */ - if (ratio > 0.9f && ratio < 1.1f) { + if (ratio > 0.9f && ratio < 1.1f) break; - } else { - if (is_mode || ms_mode || tns_mode || pred_mode) { - for (i = 0; i < s->chan_map[0]; i++) { - // Must restore coeffs - chans = tag == TYPE_CPE ? 2 : 1; - cpe = &s->cpe[i]; - for (ch = 0; ch < chans; ch++) - memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs)); - } - } - its++; - } } else { break; } + + if (is_mode || ms_mode || tns_mode || pred_mode) { + for (i = 0; i < s->chan_map[0]; i++) { + // Must restore coeffs + chans = tag == TYPE_CPE ? 2 : 1; + cpe = &s->cpe[i]; + for (ch = 0; ch < chans; ch++) + memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs)); + } + } + its++; } while (1); if (s->options.ltp && s->coder->ltp_insert_new_frame) @@ -1302,6 +1384,17 @@ static av_cold int aac_encode_init(AVCodecContext *avctx) avctx->bit_rate = (int64_t)FFMIN(6144 * s->channels / 1024.0 * avctx->sample_rate, avctx->bit_rate); + /* Strict bitrate limiting (custom maximum bits per frame). + * Reduce target bitrate below the limit to avoid frequent re-encoding. + */ + if (avctx->bit_rate_tolerance == 0) { + s->max_frame_bits = FFMAX(744 * s->channels - 3, + avctx->bit_rate * 1024 / avctx->sample_rate); + avctx->bit_rate = (int64_t)avctx->bit_rate * 85 / 100; + } else { + s->max_frame_bits = 6144 * s->channels - 3; + } + /* Profile and option setting */ avctx->profile = avctx->profile == AV_PROFILE_UNKNOWN ? AV_PROFILE_AAC_LOW : avctx->profile; diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h index ae15f91e06..d3342d2f6e 100644 --- a/libavcodec/aacenc.h +++ b/libavcodec/aacenc.h @@ -228,6 +228,7 @@ typedef struct AACEncContext { int channels; ///< channel count const uint8_t *reorder_map; ///< lavc to aac reorder map const uint8_t *chan_map; ///< channel configuration map + int max_frame_bits; ///< maximum bits per frame (0=default) ChannelElement *cpe; ///< channel elements FFPsyContext psy; -- 2.46.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".