Re: [FFmpeg-devel] [PATCH 8/8] aacdec: add a decoder for AAC USAC (xHE-AAC)

Lynne via ffmpeg-devel Thu, 16 May 2024 08:00:51 -0700

On 16/05/2024 12:26, Andreas Rheinhardt wrote:

Lynne via ffmpeg-devel:

This commit adds a decoder for the frequency-domain part of USAC.


What works:
  - Mono
  - Stereo (no prediction)
  - Stereo (mid/side coding)

What doesn't:
  - Preroll decoding (every single decoder seems faulty or weird?)
  - Complex stereo prediction

Known issues:
  - Spec incompliance (noise synthesis in particular)
  - Lack of robustness
---

diff --git a/libavcodec/aac/aacdec.h b/libavcodec/aac/aacdec.h
index 20545a24d4..3e6592cf0e 100644
--- a/libavcodec/aac/aacdec.h
+++ b/libavcodec/aac/aacdec.h
@@ -42,6 +42,8 @@
  #include "libavcodec/avcodec.h"
  #include "libavcodec/mpeg4audio.h"

+#include "aacdec_ac.h"

+
  typedef struct AACDecContext AACDecContext;

/**

@@ -69,6 +71,32 @@ enum CouplingPoint {
      AFTER_IMDCT = 3,
  };

+enum AACUsacElem {

+    ID_USAC_SCE = 0,
+    ID_USAC_CPE = 1,
+    ID_USAC_LFE = 2,
+    ID_USAC_EXT = 3,
+};
+
+enum ExtensionHeaderType {
+    ID_CONFIG_EXT_FILL = 0,
+    ID_CONFIG_EXT_LOUDNESS_INFO = 2,
+    ID_CONFIG_EXT_STREAM_ID = 7,
+};
+
+enum AACUsacExtension {
+    ID_EXT_ELE_FILL,
+    ID_EXT_ELE_MPEGS,
+    ID_EXT_ELE_SAOC,
+    ID_EXT_ELE_AUDIOPREROLL,
+    ID_EXT_ELE_UNI_DRC,
+};
+
+enum AACUSACLoudnessExt {
+    UNIDRCLOUDEXT_TERM = 0x0,
+    UNIDRCLOUDEXT_EQ = 0x1,
+};
+
  // Supposed to be equal to AAC_RENAME() in case of USE_FIXED.
  #define RENAME_FIXED(name) name ## _fixed

@@ -93,6 +121,40 @@ typedef struct LongTermPrediction {

      int8_t used[MAX_LTP_LONG_SFB];
  } LongTermPrediction;

+/* Per channel core mode */

+typedef struct AACUsacElemData {
+    uint8_t core_mode;
+    uint8_t scale_factor_grouping;
+
+    /* Timewarping ratio */
+#define NUM_TW_NODES 16
+    uint8_t tw_ratio[NUM_TW_NODES];
+
+    struct {
+        uint8_t acelp_core_mode : 3;
+        uint8_t lpd_mode : 5;
+
+        uint8_t bpf_control_info : 1;
+        uint8_t core_mode_last : 1;
+        uint8_t fac_data_present : 1;
+
+        int last_lpd_mode;
+    } ldp;
+
+    struct {
+        unsigned int seed;
+        uint8_t level : 3;
+        uint8_t offset : 5;
+    } noise;
+
+    struct {
+        uint8_t gain;
+        uint32_t kv[8 /* (1024 / 16) / 8 */][8];
+    } fac;
+
+    AACArithState ac;
+} AACUsacElemData;
+
  /**
   * Individual Channel Stream
   */
@@ -145,6 +207,7 @@ typedef struct ChannelCoupling {
   */
  typedef struct SingleChannelElement {
      IndividualChannelStream ics;
+    AACUsacElemData ue;                             ///< USAC element data
      TemporalNoiseShaping tns;
      enum BandType band_type[128];                   ///< band types
      int sfo[128];                                   ///< scalefactor offsets
@@ -163,25 +226,141 @@ typedef struct SingleChannelElement {
      };
  } SingleChannelElement;

+typedef struct AACUsacStereo {

+    uint8_t common_window;
+    uint8_t common_tw;
+
+    uint8_t ms_mask_mode;
+    uint8_t config_idx;
+
+    struct {
+        uint8_t use_prev_frame;
+        uint8_t pred_dir;
+        uint8_t delta_code_time;
+        uint8_t pred_used[8][64];
+
+        AVComplexFloat pred[8][64];
+    } cplx;
+} AACUsacStereo;
+
  /**
   * channel element - generic struct for SCE/CPE/CCE/LFE
   */
  typedef struct ChannelElement {
      int present;
      // CPE specific
+    uint8_t max_sfb_ste;      ///< (USAC) Maximum of both max_sfb values
      uint8_t ms_mask[128];     ///< Set if mid/side stereo is used for each 
scalefactor window band
      // shared
      SingleChannelElement ch[2];
      // CCE specific
      ChannelCoupling coup;
+    // USAC stereo coupling data
+    AACUsacStereo us;
  } ChannelElement;

+typedef struct AACUSACLoudnessInfo {

+    uint8_t drc_set_id : 6;
+    uint8_t downmix_id : 7;
+    struct {
+        uint16_t lvl : 12;
+        uint8_t present : 1;
+    } sample_peak;
+
+    struct {
+        uint16_t lvl : 12;
+        uint8_t measurement : 4;
+        uint8_t reliability : 2;
+        uint8_t present : 1;
+    } true_peak;
+
+    uint8_t nb_measurements : 4;
+    struct {
+        uint8_t method_def : 4;
+        uint8_t method_val;
+        uint8_t measurement : 4;
+        uint8_t reliability : 2;
+    } measurements[16];
+} AACUSACLoudnessInfo;
+
+typedef struct AACUsacElemConfig {
+    enum AACUsacElem type;
+
+    uint8_t tw_mdct : 1;
+    uint8_t noise_fill : 1;
+
+    uint8_t stereo_config_index;
+
+    struct {
+        int ratio;
+
+        uint8_t harmonic_sbr : 1; /* harmonicSBR */
+        uint8_t bs_intertes : 1; /* bs_interTes */
+        uint8_t bs_pvc : 1; /* bs_pvc */
+
+        struct {
+            uint8_t start_freq; /* dflt_start_freq */
+            uint8_t stop_freq; /* dflt_stop_freq */
+
+            uint8_t freq_scale; /* dflt_freq_scale */
+            uint8_t alter_scale : 1; /* dflt_alter_scale */
+            uint8_t noise_scale; /* dflt_noise_scale */
+
+            uint8_t limiter_bands; /* dflt_limiter_bands */
+            uint8_t limiter_gains; /* dflt_limiter_gains */
+            uint8_t interpol_freq : 1; /* dflt_interpol_freq */
+            uint8_t smoothing_mode : 1; /* dflt_smoothing_mode */
+        } dflt;
+    } sbr;
+
+    struct {
+        uint8_t freq_res; /* bsFreqRes */
+        uint8_t fixed_gain; /* bsFixedGainDMX */
+        uint8_t temp_shape_config; /* bsTempShapeConfig */
+        uint8_t decorr_config; /* bsDecorrConfig */
+        uint8_t high_rate_mode : 1; /* bsHighRateMode */
+        uint8_t phase_coding : 1; /* bsPhaseCoding */
+
+        uint8_t otts_bands_phase; /* bsOttBandsPhase */
+        uint8_t residual_coding; /* bsResidualCoding */
+        uint8_t residual_bands; /* bsResidualBands */
+        uint8_t pseudo_lr : 1; /* bsPseudoLr */
+        uint8_t env_quant_mode : 1; /* bsEnvQuantMode */


Is using bitfields really worth it given that they force to use masking
for accesses?

+    } mps;
+
+    struct {
+        enum AACUsacExtension type;
+        uint8_t payload_frag;
+        uint32_t default_len;
+        uint32_t pl_data_offset;
+        uint8_t *pl_data;
+    } ext;
+} AACUsacElemConfig;
+
+typedef struct AACUSACConfig {
+    uint8_t core_sbr_frame_len_idx; /* coreSbrFrameLengthIndex */
+    uint8_t rate_idx;
+    uint16_t core_frame_len;
+    uint16_t stream_identifier;
+
+    AACUsacElemConfig elems[64];
+    int nb_elems;
+
+    struct {
+        uint8_t nb_album;
+        AACUSACLoudnessInfo album_info[64];
+        uint8_t nb_info;
+        AACUSACLoudnessInfo info[64];
+    } loudness;
+} AACUSACConfig;
+
  typedef struct OutputConfiguration {
      MPEG4AudioConfig m4ac;
      uint8_t layout_map[MAX_ELEM_ID*4][3];
      int layout_map_tags;
      AVChannelLayout ch_layout;
      enum OCStatus status;
+    AACUSACConfig usac;
  } OutputConfiguration;

/**

diff --git a/libavcodec/aac/aacdec_ac.c b/libavcodec/aac/aacdec_ac.c
new file mode 100644
index 0000000000..326d716bd3
--- /dev/null
+++ b/libavcodec/aac/aacdec_ac.c
@@ -0,0 +1,224 @@
+/*
+ * AAC definitions and structures
+ * Copyright (c) 2024 Lynne
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/aactab.h"
+#include "aacdec_ac.h"
+
+uint32_t ff_aac_ac_map_process(AACArithState *state, int reset, int N)
+{
+    float ratio;
+    if (reset) {
+        memset(state->last, 0, sizeof(state->last));
+        state->last_len = N;
+        memset(state->cur, 0, sizeof(state->cur));
+        state->cur[3] = 0;
+        state->cur[2] = 0;
+        state->cur[1] = 0;
+        state->cur[0] = 1;
+        state->last[0] = 0 << 12;
+        state->state_pre = 0;
+        return 0;
+    } else if (state->last_len != N) {
+        int i;
+        uint8_t last[512 /* 2048 / 4 */];
+        memcpy(last, state->last, sizeof(last));
+
+        ratio = state->last_len / (float)N;
+        for (i = 0; i < N/2; i++) {
+            int k = (int)(i * ratio);
+            state->last[i] = last[k];
+        }
+
+        for (; i < FF_ARRAY_ELEMS(state->last); i++)
+            state->last[i] = 0;
+
+        state->last_len = N;
+    }
+
+    memset(state->cur, 0, sizeof(state->cur));
+    state->cur[3] = 0;
+    state->cur[2] = 0;
+    state->cur[1] = 0;
+    state->cur[0] = 1;
+
+    state->state_pre = state->last[0] << 12;
+    return state->last[0] << 12;
+}
+
+extern int ec_debug;
+
+int trig = 0;
+
+uint32_t ff_aac_ac_get_context(AACArithState *state, uint32_t c, int i, int N)
+{
+    c = state->state_pre >> 8;
+    c = c + (state->last[i + 1] << 8);
+    c = (c << 4);
+    c += state->cur[1];
+
+    state->state_pre = c;
+
+    if (i > 3 &&
+        ((state->cur[3] + state->cur[2] + state->cur[1]) < 5))
+        return c + 0x10000;
+
+    return c;
+}
+
+uint32_t ff_aac_ac_get_pk(uint32_t c)
+{
+    int i_min = -1;
+    int i, j;
+    int i_max = FF_ARRAY_ELEMS(ff_aac_ac_lookup_m) - 1;
+    while ((i_max - i_min) > 1) {
+        i = i_min + ((i_max - i_min) / 2);
+        j = ff_aac_ac_hash_m[i];
+        if (c < (j >> 8))
+            i_max = i;
+        else if (c > (j >> 8))
+            i_min = i;
+        else
+            return (j & 0xFF);
+    }
+    return ff_aac_ac_lookup_m[i_max];
+}
+
+void ff_aac_ac_update_context(AACArithState *state, int idx,
+                              uint16_t a, uint16_t b)
+{
+    state->cur[0] = a + b + 1;
+    if (state->cur[0] > 0xF)
+        state->cur[0] = 0xF;
+
+    state->cur[3] = state->cur[2];
+    state->cur[2] = state->cur[1];
+    state->cur[1] = state->cur[0];
+
+    state->last[idx] = state->cur[0];
+}
+
+/* Initialize AC */
+void ff_aac_ac_init(AACArith *ac, GetBitContext *gb)
+{
+    ac->low = 0;
+    ac->high = UINT16_MAX;
+    ac->val = get_bits(gb, 16);
+}
+
+uint16_t ff_aac_ac_decode(AACArith *ac, GetBitContext *gb,
+                          const uint16_t *cdf, uint16_t cdf_len)
+{
+    int val = ac->val;
+    int low = ac->low;
+    int high = ac->high;
+
+    int rng = high - low + 1;
+    int c = ((((int)(val - low + 1)) << 14) - ((int)1));
+
+    /* Note: this could be done faster via heuristics, the total number of
+     * configurations is low */
+    const uint16_t *p = cdf - 1;
+    const uint16_t *q;
+
+    switch (cdf_len) {
+    case 2:
+        if ((p[1] * rng) > c)
+            p += 1;
+        break;
+    case 4:
+        if ((p[2] * rng) > c)
+            p += 2;
+        if ((p[1] * rng) > c)
+            p += 1;
+        break;
+    case 17:
+        /* First check if the current probability is even met at all */
+        if ((p[1] * rng) <= c)
+            break;
+        p += 1;
+        for (int i = 8; i >= 1; i >>= 1)
+            if ((p[i] * rng) > c)
+                p += i;
+        break;
+    case 27:
+        const uint16_t *p_24 = p + 24;
+
+        if ((p[16] * rng) > c)
+            p += 16;
+        if ((p[8] * rng) > c)
+            p += 8;
+        if (p != p_24)
+            if ((p[4] * rng) > c)
+                p += 4;
+        if ((p[2] * rng) > c)
+            p += 2;
+
+        if (p != &p_24[2])
+            if ((p[1] * rng) > c)
+                p += 1;
+        break;
+    default:
+        /* This should never happen */
+        av_assert2(0);
+    }
+
+    int sym = (int)((ptrdiff_t)(p - cdf)) + 1;
+    if (sym)
+        high = low + ((rng * cdf[sym - 1]) >> 14) - 1;
+    low += (rng * cdf[sym]) >> 14;
+
+    /* This loop could be done faster */
+    while (1) {
+        if (high < 32768) {
+            ;
+        } else if (low >= 32768) {
+            val -= 32768;
+            low -= 32768;
+            high -= 32768;
+        } else if (low >= 16384 && high < 49152) {
+            val -= 16384;
+            low -= 16384;
+            high -= 16384;
+        } else {
+            break;
+        }
+        low += low;
+        high += high + 1;
+        val = (val << 1) | get_bits1(gb);
+    };
+
+    ac->low = low;
+    ac->high = high;
+    ac->val = val;
+
+    return sym;
+}
+
+void ff_aac_ac_finish(AACArithState *state, int offset, int N)
+{
+    int i;
+
+    for (i = offset; i < N/2; i++)
+        state->last[i] = 1;
+
+    for (; i < FF_ARRAY_ELEMS(state->last); i++)
+        state->last[i] = 0;
+}
diff --git a/libavcodec/aac/aacdec_ac.h b/libavcodec/aac/aacdec_ac.h
new file mode 100644
index 0000000000..ef96bed770
--- /dev/null
+++ b/libavcodec/aac/aacdec_ac.h
@@ -0,0 +1,54 @@
+/*
+ * AAC definitions and structures
+ * Copyright (c) 2024 Lynne
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AACDEC_AC_H
+#define AVCODEC_AACDEC_AC_H
+
+#include "libavcodec/get_bits.h"
+
+typedef struct AACArithState {
+    uint8_t last[512 /* 2048 / 4 */];
+    int last_len;
+    uint8_t cur[4];
+    uint16_t state_pre;
+} AACArithState;
+
+typedef struct AACArith {
+    uint16_t low;
+    uint16_t high;
+    uint16_t val;
+} AACArith;
+
+#define FF_AAC_AC_ESCAPE 16
+
+uint32_t ff_aac_ac_map_process(AACArithState *state, int reset, int len);
+uint32_t ff_aac_ac_get_context(AACArithState *state, uint32_t old_c, int idx, 
int len);
+uint32_t ff_aac_ac_get_pk(uint32_t c);
+
+void ff_aac_ac_update_context(AACArithState *state, int idx, uint16_t a, 
uint16_t b);
+void ff_aac_ac_init(AACArith *ac, GetBitContext *gb);
+
+uint16_t ff_aac_ac_decode(AACArith *ac, GetBitContext *gb,
+                          const uint16_t *cdf, uint16_t cdf_len);
+
+void ff_aac_ac_finish(AACArithState *state, int offset, int nb);
+
+#endif /* AVCODEC_AACDEC_AC_H */
diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
index 59a69d88f3..8d31af22f8 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -88,8 +88,8 @@ static void AAC_RENAME(apply_mid_side_stereo)(AACDecContext 
*ac, ChannelElement
      INTFLOAT *ch1 = cpe->ch[1].AAC_RENAME(coeffs);
      const uint16_t *offsets = ics->swb_offset;
      for (int g = 0; g < ics->num_window_groups; g++) {
-        for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
-            const int idx = g*ics->max_sfb + sfb;
+        for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
+            const int idx = g*cpe->max_sfb_ste + sfb;
              if (cpe->ms_mask[idx] &&
                  cpe->ch[0].band_type[idx] < NOISE_BT &&
                  cpe->ch[1].band_type[idx] < NOISE_BT) {
diff --git a/libavcodec/aac/aacdec_latm.h b/libavcodec/aac/aacdec_latm.h
index e40a2fe1a7..047c11e0fb 100644
--- a/libavcodec/aac/aacdec_latm.h
+++ b/libavcodec/aac/aacdec_latm.h
@@ -56,7 +56,8 @@ static int latm_decode_audio_specific_config(struct 
LATMContext *latmctx,
  {
      AACDecContext *ac     = &latmctx->aac_ctx;
      AVCodecContext *avctx = ac->avctx;
-    MPEG4AudioConfig m4ac = { 0 };
+    OutputConfiguration oc = { 0 };
+    MPEG4AudioConfig *m4ac = &oc.m4ac;
      GetBitContext gbc;
      int config_start_bit  = get_bits_count(gb);
      int sync_extension    = 0;
@@ -76,7 +77,7 @@ static int latm_decode_audio_specific_config(struct 
LATMContext *latmctx,
      if (get_bits_left(gb) <= 0)
          return AVERROR_INVALIDDATA;

- bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &m4ac,

+    bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &oc,
                                                      &gbc, config_start_bit,
                                                      sync_extension);

@@ -88,11 +89,12 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,

        asclen = bits_consumed;

if (!latmctx->initialized ||

-        ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
-        ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
+        ac->oc[1].m4ac.sample_rate != m4ac->sample_rate ||
+        ac->oc[1].m4ac.chan_config != m4ac->chan_config) {

if (latmctx->initialized) {

-            av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, 
chan_config=%d)\n", m4ac.sample_rate, m4ac.chan_config);
+            av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, 
chan_config=%d)\n",
+                   m4ac->sample_rate, m4ac->chan_config);
          } else {
              av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
          }
@@ -280,7 +282,7 @@ static int latm_decode_frame(AVCodecContext *avctx, AVFrame 
*out,
          } else {
              push_output_configuration(&latmctx->aac_ctx);
              if ((err = decode_audio_specific_config(
-                    &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
+                    &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1],
                      avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
                  pop_output_configuration(&latmctx->aac_ctx);
                  return err;
diff --git a/libavcodec/aac/aacdec_lpd.c b/libavcodec/aac/aacdec_lpd.c
new file mode 100644
index 0000000000..be39e2c175
--- /dev/null
+++ b/libavcodec/aac/aacdec_lpd.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2024 Lynne <d...@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "aacdec_lpd.h"
+#include "aacdec_usac.h"
+
+const uint8_t ff_aac_lpd_mode_tab[32][4] = {
+    { 0, 0, 0, 0 },
+    { 1, 0, 0, 0 },
+    { 0, 1, 0, 0 },
+    { 1, 1, 0, 0 },
+    { 0, 0, 1, 0 },
+    { 1, 0, 1, 0 },
+    { 0, 1, 1, 0 },
+    { 1, 1, 1, 0 },
+    { 0, 0, 0, 1 },
+    { 1, 0, 0, 1 },
+    { 0, 1, 0, 1 },
+    { 1, 1, 0, 1 },
+    { 0, 0, 1, 1 },
+    { 1, 0, 1, 1 },
+    { 0, 1, 1, 1 },
+    { 1, 1, 1, 1 },
+    { 2, 2, 0, 0 },
+    { 2, 2, 1, 0 },
+    { 2, 2, 0, 1 },
+    { 2, 2, 1, 1 },
+    { 0, 0, 2, 2 },
+    { 1, 0, 2, 2 },
+    { 0, 1, 2, 2 },
+    { 1, 1, 2, 2 },
+    { 2, 2, 2, 2 },
+    { 3, 3, 3, 3 },
+    /* Larger values are reserved, but permit them for resilience */
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+};
+
+static void parse_qn(GetBitContext *gb, int *qn, int nk_mode, int no_qn)
+{
+    if (nk_mode == 1) {
+        for (int k = 0; k < no_qn; k++) {
+            qn[k] = ff_aac_get_vlclbf(gb);
+            if (qn[k])
+                qn[k]++;
+        }
+        return;
+    }
+
+    for (int k = 0; k < no_qn; k++)
+        qn[k] = get_bits(gb, 2) + 2;
+
+    if (nk_mode == 2) {
+        for (int k = 0; k < no_qn; k++) {
+            if (qn[k] > 4) {
+                qn[k] = ff_aac_get_vlclbf(gb);
+                if (qn[k])
+                    qn[k] += 4;
+            }
+        }
+        return;
+    }
+
+    for (int k = 0; k < no_qn; k++) {
+        if (qn[k] > 4) {
+            int qn_ext = ff_aac_get_vlclbf(gb);
+            switch (qn_ext) {
+            case 0: qn[k] = 5; break;
+            case 1: qn[k] = 6; break;
+            case 2: qn[k] = 0; break;
+            default: qn[k] = qn_ext + 4; break;
+            }
+        }
+    }
+}
+
+static int parse_codebook_idx(GetBitContext *gb, uint32_t *kv,
+                              int nk_mode, int no_qn)
+{
+    int n, nk;
+
+    int qn[2];
+    parse_qn(gb, qn, nk_mode, no_qn);
+
+    for (int k = 0; k < no_qn; k++) {
+        if (qn[k] > 4) {
+            nk = (qn[k] - 3) / 2;
+            n = qn[k] - nk*2;
+        } else {
+            nk = 0;
+            n = qn[k];
+        }
+    }
+
+    int idx = get_bits(gb, 4*n);
+
+    if (nk > 0)
+        for (int i = 0; i < 8; i++)
+            kv[i] = get_bits(gb, nk);
+
+    return 0;
+}
+
+int ff_aac_parse_fac_data(AACUsacElemData *ce, GetBitContext *gb,
+                          int use_gain, int len)
+{
+    int ret;
+    if (use_gain)
+        ce->fac.gain = get_bits(gb, 7);
+
+    for (int i = 0; i < len/8; i++) {
+        ret = parse_codebook_idx(gb, ce->fac.kv[i], 1, 1);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac,
+                                    AACUsacElemData *ce, GetBitContext *gb)
+{
+    ce->ldp.acelp_core_mode = get_bits(gb, 3);
+    ce->ldp.lpd_mode = get_bits(gb, 5);
+
+    ce->ldp.bpf_control_info = get_bits1(gb);
+    ce->ldp.core_mode_last = get_bits1(gb);
+    ce->ldp.fac_data_present = get_bits1(gb);
+
+    const uint8_t *mod = ff_aac_lpd_mode_tab[ce->ldp.lpd_mode];
+
+    int first_ldp_flag = !ce->ldp.core_mode_last;
+    int first_tcx_flag = 1;
+    if (first_ldp_flag)
+        ce->ldp.last_lpd_mode = -1; /* last_ldp_mode is a **STATEFUL** value */
+
+    int k = 0;
+    while (k < 0) {
+        if (!k) {
+            if (ce->ldp.core_mode_last && ce->ldp.fac_data_present)
+                ff_aac_parse_fac_data(ce, gb, 0, usac->core_frame_len/8);
+        } else {
+            if (!ce->ldp.last_lpd_mode && mod[k] > 0 ||
+                ce->ldp.last_lpd_mode && !mod[k])
+                ff_aac_parse_fac_data(ce, gb, 0, usac->core_frame_len/8);
+        }
+        if (!mod[k]) {
+//            parse_acelp_coding();
+            ce->ldp.last_lpd_mode = 0;
+            k++;
+        } else {
+//            parse_tcx_coding();
+            ce->ldp.last_lpd_mode = mod[k];
+            k += (1 << (mod[k] - 1));
+            first_tcx_flag = 0;
+        }
+    }
+
+//    parse_lpc_data(first_lpd_flag);
+
+    if (!ce->ldp.core_mode_last && ce->ldp.fac_data_present) {
+        uint16_t len_8 = usac->core_frame_len / 8;
+        uint16_t len_16 = usac->core_frame_len / 16;
+        uint16_t fac_len = get_bits1(gb) /* short_fac_flag */ ? len_8 : len_16;
+        int ret = ff_aac_parse_fac_data(ce, gb, 1, fac_len);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
diff --git a/libavcodec/aac/aacdec_lpd.h b/libavcodec/aac/aacdec_lpd.h
new file mode 100644
index 0000000000..924ff75e52
--- /dev/null
+++ b/libavcodec/aac/aacdec_lpd.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2024 Lynne <d...@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_AACDEC_LPD_H
+#define AVCODEC_AAC_AACDEC_LPD_H
+
+#include "aacdec.h"
+#include "libavcodec/get_bits.h"
+
+int ff_aac_parse_fac_data(AACUsacElemData *ce, GetBitContext *gb,
+                          int use_gain, int len);
+
+int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac,
+                                    AACUsacElemData *ce, GetBitContext *gb);
+
+#endif /* AVCODEC_AAC_AACDEC_LPD_H */
diff --git a/libavcodec/aac/aacdec_usac.c b/libavcodec/aac/aacdec_usac.c
new file mode 100644
index 0000000000..4b48c4d6ca
--- /dev/null
+++ b/libavcodec/aac/aacdec_usac.c
@@ -0,0 +1,1230 @@
+/*
+ * Copyright (c) 2024 Lynne <d...@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "aacdec_usac.h"
+#include "aacdec_tab.h"
+#include "aacdec_lpd.h"
+#include "aacdec_ac.h"
+
+#include "libavcodec/opusdsp.h"
+#include "libavcodec/aactab.h"
+#include "libavutil/mem.h"
+#include "libavcodec/mpeg4audio.h"
+
+/* Number of scalefactor bands per complex prediction band, equal to 2. */
+#define SFB_PER_PRED_BAND 2
+
+static inline uint32_t get_escaped_value(GetBitContext *gb, int nb1, int nb2, 
int nb3)
+{
+    uint32_t val = get_bits(gb, nb1);
+    if (val < ((1 << nb1) - 1))
+        return val;
+
+    val += get_bits(gb, nb2);
+    if (val == ((1 << nb2) - 1))
+        val += get_bits(gb, nb3);
+
+    return val;
+}
+
+static int aac_usac_samplerate[] = {


Missing const

+    96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
+    16000, 12000, 11025, 8000, 7350, -1, -1, 57600, 51200,
+    40000, 38400, 34150, 28800, 25600, 20000, 19200, 17075, 14400, 12800, 
9600, -1, -1, -1, -1,
+};
+

+static int parse_ext_ele(AACDecContext *ac, AACUsacElemConfig *e,
+                         GetBitContext *gb)
+{
+    if (get_bits1(gb)) { /* usacExtElementPresent */
+        uint32_t len;
+        if (get_bits1(gb)) { /* usacExtElementUseDefaultLength */
+            len = e->ext.default_len;
+        } else {
+            len = get_bits(gb, 8); /* usacExtElementPayloadLength */
+            if (len == 255)
+                len += get_bits(gb, 16) - 2;
+        }
+
+        if (len) {
+            uint8_t *tmp;
+            uint8_t pl_frag_start = 1;
+            uint8_t pl_frag_end = 1;
+            if (e->ext.payload_frag) {
+                pl_frag_start = get_bits1(gb); /* usacExtElementStart */
+                pl_frag_end = get_bits1(gb); /* usacExtElementStop */
+            }
+
+            if (pl_frag_start)
+                e->ext.pl_data_offset = 0;
+
+            tmp = av_realloc(e->ext.pl_data, e->ext.pl_data_offset + len);
+            if (!tmp) {
+                free(e->ext.pl_data);


Wrong deallocator.

+                return AVERROR(ENOMEM);
+            }
+            e->ext.pl_data = tmp;
+
+            for (int i = 0; i < len; i++)
+                e->ext.pl_data[e->ext.pl_data_offset + i] = get_bits(gb, 8);
+
+            if (pl_frag_end) {
+                int ret;
+                e->ext.pl_data_offset = 0;
+                switch (e->ext.type) {
+                case ID_EXT_ELE_FILL:
+                    av_freep(&e->ext.pl_data);
+                    break;
+                case ID_EXT_ELE_AUDIOPREROLL:
+                    ret = parse_audio_preroll(ac, e->ext.pl_data,
+                                              e->ext.pl_data_offset);
+                    if (ret < 0) {
+                        av_freep(&e->ext.pl_data);
+                        return ret;
+                    }
+                    break;
+                default:
+                    av_freep(&e->ext.pl_data);


Pointless if you abort in the next line

+                    /* This should never happen */
+                    av_assert0(0);
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+

+#include "libavcodec/opusdsp.h"
+
+#ifndef AVCODEC_AAC_AACDEC_USAC_H
+#define AVCODEC_AAC_AACDEC_USAC_H
+
+#include "aacdec.h"
+
+#include "libavcodec/get_bits.h"
+
+static inline uint8_t ff_aac_get_vlclbf(GetBitContext *gb)
+{
+    uint8_t ret = 0;
+    while (get_bits1(gb) && ret <= 36)
+        ret++;
+    return ret;
+}


Look at unary.h


That's convenient, thanks.

I've synced my changes on my repo in
https://github.com/cyanreg/FFmpeg/tree/xhe

Though I'll likely upload the patchset on the ML again tomorrow with some fixes for preroll parsing and complex synth.

OpenPGP_0xA2FEA5F03F034464.asc
Description: OpenPGP public key

OpenPGP_signature.asc
Description: OpenPGP digital signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 8/8] aacdec: add a decoder for AAC USAC (xHE-AAC)

Reply via email to