From: Djordje Pesut <djordje.pe...@imgtec.com> Add fixed point implementation
Signed-off-by: Nedeljko Babic <nedeljko.ba...@imgtec.com> --- libavcodec/aac.h | 98 +++++++-- libavcodec/aacdec.c | 5 + libavcodec/aacdec_fixed.c | 449 ++++++++++++++++++++++++++++++++++++++++++ libavcodec/aacdec_template.c | 459 +++++++++++++++++++++++++++++++------------ libavcodec/lpc.h | 15 +- libavcodec/mdct_template.c | 9 +- libavutil/fixed_dsp.c | 70 ++++++- libavutil/fixed_dsp.h | 53 +++++ 8 files changed, 999 insertions(+), 159 deletions(-) create mode 100644 libavcodec/aacdec_fixed.c diff --git a/libavcodec/aac.h b/libavcodec/aac.h index 387e103..8c6106f 100644 --- a/libavcodec/aac.h +++ b/libavcodec/aac.h @@ -30,7 +30,58 @@ #ifndef AVCODEC_AAC_H #define AVCODEC_AAC_H +#ifndef USE_FIXED +#define USE_FIXED 0 +#endif + +#if USE_FIXED + +#define FFT_FLOAT 0 +#define FFT_FIXED_32 1 + +#define AAC_RENAME(x) x ## _fixed +#define AAC_RENAME_32(x) x ## _fixed_32 +#define INTFLOAT int +#define SHORTFLOAT int16_t +#define AAC_FLOAT aac_float_t +#define AAC_SIGNE int +#define FIXR(a) ((int)((a) * 1 + 0.5)) +#define FIXR10(a) ((int)((a) * 1024.0 + 0.5)) +#define Q23(a) (int)((a) * 8388608.0 + 0.5) +#define Q30(x) (int)((x)*1073741824.0 + 0.5) +#define Q31(x) (int)((x)*2147483648.0 + 0.5) +#define RANGE15(x) x +#define GET_GAIN(x, y) (-(y) << (x)) + 1024 +#define AAC_MUL26(x, y) (int)(((int64_t)(x) * (y) + 0x2000000) >> 26) +#define AAC_MUL30(x, y) (int)(((int64_t)(x) * (y) + 0x20000000) >> 30) +#define AAC_MUL31(x, y) (int)(((int64_t)(x) * (y) + 0x40000000) >> 31) + +#else + +#define FFT_FLOAT 1 +#define FFT_FIXED_32 0 + +#define AAC_RENAME(x) x +#define AAC_RENAME_32(x) x +#define INTFLOAT float +#define SHORTFLOAT float +#define AAC_FLOAT float +#define AAC_SIGNE unsigned +#define FIXR(x) ((float)(x)) +#define FIXR10(x) ((float)(x)) +#define Q23(x) x +#define Q30(x) x +#define Q31(x) x +#define RANGE15(x) (32768.0 * (x)) +#define GET_GAIN(x, y) powf((x), -(y)) +#define AAC_MUL26(x, y) ((x) * (y)) +#define AAC_MUL30(x, y) ((x) * (y)) +#define AAC_MUL31(x, y) ((x) * (y)) + +#endif /* USE_FIXED */ + #include "libavutil/float_dsp.h" +#include "libavutil/fixed_dsp.h" #include "avcodec.h" #include "fft.h" #include "mpeg4audio.h" @@ -125,12 +176,12 @@ typedef struct OutputConfiguration { * Predictor State */ typedef struct PredictorState { - float cor0; - float cor1; - float var0; - float var1; - float r0; - float r1; + AAC_FLOAT cor0; + AAC_FLOAT cor1; + AAC_FLOAT var0; + AAC_FLOAT var1; + AAC_FLOAT r0; + AAC_FLOAT r1; } PredictorState; #define MAX_PREDICTORS 672 @@ -147,7 +198,7 @@ typedef struct PredictorState { typedef struct LongTermPrediction { int8_t present; int16_t lag; - float coef; + INTFLOAT coef; int8_t used[MAX_LTP_LONG_SFB]; } LongTermPrediction; @@ -181,7 +232,7 @@ typedef struct TemporalNoiseShaping { int length[8][4]; int direction[8][4]; int order[8][4]; - float coef[8][4][TNS_MAX_ORDER]; + INTFLOAT coef[8][4][TNS_MAX_ORDER]; } TemporalNoiseShaping; /** @@ -218,7 +269,7 @@ typedef struct ChannelCoupling { int ch_select[8]; /**< [0] shared list of gains; [1] list of gains for right channel; * [2] list of gains for left channel; [3] lists of gains for both channels */ - float gain[16][120]; + INTFLOAT gain[16][120]; } ChannelCoupling; /** @@ -230,15 +281,16 @@ typedef struct SingleChannelElement { Pulse pulse; enum BandType band_type[128]; ///< band types int band_type_run_end[120]; ///< band type run end points - float sf[120]; ///< scalefactors + INTFLOAT sf[120]; ///< scalefactors int sf_idx[128]; ///< scalefactor indices (used by encoder) uint8_t zeroes[128]; ///< band is not coded (used by encoder) - DECLARE_ALIGNED(32, float, coeffs)[1024]; ///< coefficients for IMDCT - DECLARE_ALIGNED(32, float, saved)[1536]; ///< overlap - DECLARE_ALIGNED(32, float, ret_buf)[2048]; ///< PCM output buffer - DECLARE_ALIGNED(16, float, ltp_state)[3072]; ///< time signal for LTP + DECLARE_ALIGNED(32, INTFLOAT, coeffs)[1024]; ///< coefficients for IMDCT + DECLARE_ALIGNED(32, INTFLOAT, saved)[1024]; ///< overlap + DECLARE_ALIGNED(32, INTFLOAT, ret_buf)[2048]; ///< PCM output buffer + DECLARE_ALIGNED(16, INTFLOAT, ltp_state)[3072]; ///< time signal for LTP + DECLARE_ALIGNED(32, int, temp_sbr)[2048]; ///< PCM intermediate buffer for SBR PredictorState predictor_state[MAX_PREDICTORS]; - float *ret; ///< PCM output + INTFLOAT *ret; ///< PCM output } SingleChannelElement; /** @@ -282,7 +334,7 @@ struct AACContext { * (We do not want to have these on the stack.) * @{ */ - DECLARE_ALIGNED(32, float, buf_mdct)[1024]; + DECLARE_ALIGNED(32, INTFLOAT, buf_mdct)[1024]; /** @} */ /** @@ -294,7 +346,11 @@ struct AACContext { FFTContext mdct_ld; FFTContext mdct_ltp; FmtConvertContext fmt_conv; +#if USE_FIXED + AVFixedDSPContext *fdsp; +#else AVFloatDSPContext fdsp; +#endif /* USE_FIXED */ int random_state; /** @} */ @@ -314,7 +370,7 @@ struct AACContext { int dmono_mode; ///< 0->not dmono, 1->use first channel, 2->use second channel /** @} */ - DECLARE_ALIGNED(32, float, temp)[128]; + DECLARE_ALIGNED(32, INTFLOAT, temp)[128]; OutputConfiguration oc[2]; int warned_num_aac_frames; @@ -322,11 +378,13 @@ struct AACContext { /* aacdec functions pointers */ void (*imdct_and_windowing)(AACContext *ac, SingleChannelElement *sce); void (*apply_ltp)(AACContext *ac, SingleChannelElement *sce); - void (*apply_tns)(float coef[1024], TemporalNoiseShaping *tns, + void (*apply_tns)(INTFLOAT coef[1024], TemporalNoiseShaping *tns, IndividualChannelStream *ics, int decode); - void (*windowing_and_mdct_ltp)(AACContext *ac, float *out, - float *in, IndividualChannelStream *ics); + void (*windowing_and_mdct_ltp)(AACContext *ac, INTFLOAT *out, + INTFLOAT *in, IndividualChannelStream *ics); void (*update_ltp)(AACContext *ac, SingleChannelElement *sce); + void (*vector_pow43)(int *coefs, int len); + void (*subband_scale)(int *dst, int *src, int scale, int offset, int len); }; diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index 5cfbee9..87b3f80 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -32,6 +32,11 @@ * @author Maxim Gavrilov ( maxim.gavrilov gmail com ) */ +#define FFT_FLOAT 1 +#define FFT_FIXED_32 0 +#define USE_FIXED 0 +#define CONFIG_FIXED 0 + #include "libavutil/float_dsp.h" #include "libavutil/opt.h" #include "avcodec.h" diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c new file mode 100644 index 0000000..ea18c60 --- /dev/null +++ b/libavcodec/aacdec_fixed.c @@ -0,0 +1,449 @@ +/* + * Copyright (c) 2013 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * AAC decoder fixed-point implementation + * + * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org ) + * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com ) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * AAC decoder + * @author Oded Shimon ( ods15 ods15 dyndns org ) + * @author Maxim Gavrilov ( maxim.gavrilov gmail com ) + * + * Fixed point implementation + * @author Stanislav Ocovaj ( stanislav.ocovaj imgtec com ) + */ + +#define FFT_FLOAT 0 +#define FFT_FIXED_32 1 +#define USE_FIXED 1 +#define CONFIG_FIXED 1 + +#include "libavutil/fixed_dsp.h" +#include "libavutil/opt.h" +#include "avcodec.h" +#include "internal.h" +#include "get_bits.h" +#include "fft.h" +#include "fmtconvert.h" +#include "lpc.h" +#include "kbdwin.h" +#include "sinewin.h" + +#include "aac.h" +#include "aactab.h" +#include "aacdectab.h" +#include "cbrt_tablegen.h" +#include "sbr.h" +#include "aacsbr.h" +#include "mpeg4audio.h" +#include "aacadtsdec.h" +#include "libavutil/intfloat.h" + +#include <assert.h> +#include <errno.h> +#include <math.h> +#include <string.h> + +static av_always_inline void reset_predict_state(PredictorState *ps) +{ + ps->r0.mant = 0; + ps->r0.exp = 0; + ps->r1.mant = 0; + ps->r1.exp = 0; + ps->cor0.mant = 0; + ps->cor0.exp = 0; + ps->cor1.mant = 0; + ps->cor1.exp = 0; + ps->var0.mant = 0x20000000; + ps->var0.exp = 1; + ps->var1.mant = 0x20000000; + ps->var1.exp = 1; +} + +int exp2tab[4] = { Q31(1.0000000000/2), Q31(1.1892071150/2), Q31(1.4142135624/2), Q31(1.6817928305/2) }; // 2^0, 2^0.25, 2^0.5, 2^0.75 + +static inline int *DEC_SPAIR(int *dst, unsigned idx) +{ + dst[0] = (idx & 15) - 4; + dst[1] = (idx >> 4 & 15) - 4; + + return dst + 2; +} + +static inline int *DEC_SQUAD(int *dst, unsigned idx) +{ + dst[0] = (idx & 3) - 1; + dst[1] = (idx >> 2 & 3) - 1; + dst[2] = (idx >> 4 & 3) - 1; + dst[3] = (idx >> 6 & 3) - 1; + + return dst + 4; +} + +static inline int *DEC_UPAIR(int *dst, unsigned idx, unsigned sign) +{ + dst[0] = (idx & 15) * (1 - (sign & 0xFFFFFFFE)); + dst[1] = (idx >> 4 & 15) * (1 - ((sign & 1) << 1)); + + return dst + 2; +} + +static inline int *DEC_UQUAD(int *dst, unsigned idx, unsigned sign) +{ + unsigned nz = idx >> 12; + + dst[0] = (idx & 3) * (1 + (((int)sign >> 31) << 1)); + sign <<= nz & 1; + nz >>= 1; + dst[1] = (idx >> 2 & 3) * (1 + (((int)sign >> 31) << 1)); + sign <<= nz & 1; + nz >>= 1; + dst[2] = (idx >> 4 & 3) * (1 + (((int)sign >> 31) << 1)); + sign <<= nz & 1; + nz >>= 1; + dst[3] = (idx >> 6 & 3) * (1 + (((int)sign >> 31) << 1)); + + return dst + 4; +} + +static void vector_pow43(int *coefs, int len) +{ + int i, coef; + + for (i=0; i<len; i++) { + coef = coefs[i]; + if (coef < 0) + coef = -(int)cbrt_tab[-coef]; + else + coef = (int)cbrt_tab[coef]; + coefs[i] = coef; + } +} + +static void subband_scale(int *dst, int *src, int scale, int offset, int len) +{ + int ssign = scale < 0 ? -1 : 1; + int s = FFABS(scale); + unsigned int round; + int i, out, c = exp2tab[s & 3]; + + s = offset - (s >> 2); + + if (s > 0) { + round = 1 << (s-1); + for (i=0; i<len; i++) { + out = (int)(((int64_t)src[i] * c) >> 32); + dst[i] = ((int)(out+round) >> s) * ssign; + } + } + else { + s = s + 32; + round = 1 << (s-1); + for (i=0; i<len; i++) { + out = (int)((int64_t)((int64_t)src[i] * c + round) >> s); + dst[i] = out * ssign; + } + } +} + +static void noise_scale(int *coefs, int scale, int band_energy, int len) +{ + int ssign = scale < 0 ? -1 : 1; + int s = FFABS(scale); + unsigned int round; + int i, out, c = exp2tab[s & 3]; + int nlz = 0; + + while (band_energy > 0x7fff) { + band_energy >>= 1; + nlz++; + } + c /= band_energy; + s = 21 + nlz - (s >> 2); + + if (s > 0) { + round = 1 << (s-1); + for (i=0; i<len; i++) { + out = (int)(((int64_t)coefs[i] * c) >> 32); + coefs[i] = ((int)(out+round) >> s) * ssign; + } + } + else { + s = s + 32; + round = 1 << (s-1); + for (i=0; i<len; i++) { + out = (int)((int64_t)((int64_t)coefs[i] * c + round) >> s); + coefs[i] = out * ssign; + } + } +} + +static av_always_inline SoftFloat flt16_round(SoftFloat pf) +{ + SoftFloat tmp; + int s; + + tmp.exp = pf.exp; + s = pf.mant >> 31; + tmp.mant = (pf.mant ^ s) - s; + tmp.mant = (tmp.mant + 0x00200000U) & 0xFFC00000U; + tmp.mant = (tmp.mant ^ s) - s; + + return tmp; +} + +static av_always_inline SoftFloat flt16_even(SoftFloat pf) +{ + SoftFloat tmp; + int s; + + tmp.exp = pf.exp; + s = pf.mant >> 31; + tmp.mant = (pf.mant ^ s) - s; + tmp.mant = (tmp.mant + 0x001FFFFFU + (tmp.mant & 0x00400000U >> 16)) & 0xFFC00000U; + tmp.mant = (tmp.mant ^ s) - s; + + return tmp; +} + +static av_always_inline SoftFloat flt16_trunc(SoftFloat pf) +{ + SoftFloat pun; + int s; + + pun.exp = pf.exp; + s = pf.mant >> 31; + pun.mant = (pf.mant ^ s) - s; + pun.mant = pun.mant & 0xFFC00000U; + pun.mant = (pun.mant ^ s) - s; + + return pun; +} + +static av_always_inline void predict(PredictorState *ps, int *coef, + int output_enable) +{ + const SoftFloat a = { 1023410176, 0 }; // 61.0 / 64 + const SoftFloat alpha = { 973078528, 0 }; // 29.0 / 32 + SoftFloat e0, e1; + SoftFloat pv; + SoftFloat k1, k2; + SoftFloat r0 = ps->r0, r1 = ps->r1; + SoftFloat cor0 = ps->cor0, cor1 = ps->cor1; + SoftFloat var0 = ps->var0, var1 = ps->var1; + SoftFloat tmp; + + if (var0.exp > 1 || (var0.exp == 1 && var0.mant > 0x20000000)) { + tmp = av_recip_sf(var0); + k1 = av_mul_sf(cor0, flt16_even(av_mul_sf(a, tmp))); + } + else { + k1.mant = 0; + k1.exp = 0; + } + + if (var1.exp > 1 || (var1.exp == 1 && var1.mant > 0x20000000)) { + tmp = av_recip_sf(var1); + k2 = av_mul_sf(cor1, flt16_even(av_mul_sf(a, tmp))); + } + else { + k2.mant = 0; + k2.exp = 0; + } + + tmp = av_mul_sf(k1, r0); + pv = flt16_round(av_add_sf(tmp, av_mul_sf(k2, r1))); + if (output_enable) { + int shift = 28 - pv.exp; + + if (shift < 31) + *coef += (pv.mant + (1 << (shift - 1))) >> shift; + } + + e0 = av_int2sf(*coef, 28); + e1 = av_sub_sf(e0, tmp); + + ps->cor1 = flt16_trunc(av_add_sf(av_mul_sf(alpha, cor1), av_mul_sf(r1, e1))); + tmp = av_add_sf(av_mul_sf(r1, r1), av_mul_sf(e1, e1)); + tmp.exp--; + ps->var1 = flt16_trunc(av_add_sf(av_mul_sf(alpha, var1), tmp)); + ps->cor0 = flt16_trunc(av_add_sf(av_mul_sf(alpha, cor0), av_mul_sf(r0, e0))); + tmp = av_add_sf(av_mul_sf(r0, r0), av_mul_sf(e0, e0)); + tmp.exp--; + ps->var0 = flt16_trunc(av_add_sf(av_mul_sf(alpha, var0), tmp)); + + ps->r1 = flt16_trunc(av_mul_sf(a, av_sub_sf(r0, av_mul_sf(k1, e0)))); + ps->r0 = flt16_trunc(av_mul_sf(a, e0)); +} + + +static const int cce_scale_fixed[8] = { + Q30(1.0), //2^(0/8) + Q30(1.0905077327), //2^(1/8) + Q30(1.1892071150), //2^(2/8) + Q30(1.2968395547), //2^(3/8) + Q30(1.4142135624), //2^(4/8) + Q30(1.5422108254), //2^(5/8) + Q30(1.6817928305), //2^(6/8) + Q30(1.8340080864), //2^(7/8) +}; + +/** + * Apply dependent channel coupling (applied before IMDCT). + * + * @param index index into coupling gain array + */ +static void apply_dependent_coupling_fixed(AACContext *ac, + SingleChannelElement *target, + ChannelElement *cce, int index) +{ + IndividualChannelStream *ics = &cce->ch[0].ics; + const uint16_t *offsets = ics->swb_offset; + int *dest = target->coeffs; + const int *src = cce->ch[0].coeffs; + int g, i, group, k, idx = 0; + if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) { + av_log(ac->avctx, AV_LOG_ERROR, + "Dependent coupling is not supported together with LTP\n"); + return; + } + for (g = 0; g < ics->num_window_groups; g++) { + for (i = 0; i < ics->max_sfb; i++, idx++) { + if (cce->ch[0].band_type[idx] != ZERO_BT) { + const int gain = cce->coup.gain[index][idx]; + int shift, round, c, tmp; + + if (gain < 0) { + c = -cce_scale_fixed[-gain & 7]; + shift = (-gain-1024) >> 3; + } + else { + c = cce_scale_fixed[gain & 7]; + shift = (gain-1024) >> 3; + } + + if (shift < 0) { + shift = -shift; + round = 1 << (shift - 1); + + for (group = 0; group < ics->group_len[g]; group++) { + for (k = offsets[i]; k < offsets[i + 1]; k++) { + tmp = (int)(((int64_t)src[group * 128 + k] * c + \ + (int64_t)0x1000000000) >> 37); + dest[group * 128 + k] += (tmp + round) >> shift; + } + } + } + else { + for (group = 0; group < ics->group_len[g]; group++) { + for (k = offsets[i]; k < offsets[i + 1]; k++) { + tmp = (int)(((int64_t)src[group * 128 + k] * c + \ + (int64_t)0x1000000000) >> 37); + dest[group * 128 + k] += tmp << shift; + } + } + } + } + } + dest += ics->group_len[g] * 128; + src += ics->group_len[g] * 128; + } +} + +/** + * Apply independent channel coupling (applied after IMDCT). + * + * @param index index into coupling gain array + */ +static void apply_independent_coupling_fixed(AACContext *ac, + SingleChannelElement *target, + ChannelElement *cce, int index) +{ + int i, c, shift, round, tmp; + const int gain = cce->coup.gain[index][0]; + const int *src = cce->ch[0].ret; + int *dest = target->ret; + const int len = 1024 << (ac->oc[1].m4ac.sbr == 1); + + c = cce_scale_fixed[gain & 7]; + shift = (gain-1024) >> 3; + if (shift < 0) { + shift = -shift; + round = 1 << (shift - 1); + + for (i = 0; i < len; i++) { + tmp = (int)(((int64_t)src[i] * c + (int64_t)0x1000000000) >> 37); + dest[i] += (tmp + round) >> shift; + } + } + else { + for (i = 0; i < len; i++) { + tmp = (int)(((int64_t)src[i] * c + (int64_t)0x1000000000) >> 37); + dest[i] += tmp << shift; + } + } +} + +#include "aacdec_template.c" + +AVCodec ff_aac_fixed_decoder = { + .name = "aac_fixed", + .type = AVMEDIA_TYPE_AUDIO, + .id = AV_CODEC_ID_AAC, + .priv_data_size = sizeof(AACContext), + .init = aac_decode_init, + .close = aac_decode_close, + .decode = aac_decode_frame, + .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"), + .sample_fmts = (const enum AVSampleFormat[]) { + AV_SAMPLE_FMT_S32P, AV_SAMPLE_FMT_NONE + }, + .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1, + .channel_layouts = aac_channel_layout, + .flush = flush, +}; diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c index e14825a..6787ed0 100644 --- a/libavcodec/aacdec_template.c +++ b/libavcodec/aacdec_template.c @@ -132,7 +132,7 @@ static av_cold int che_configure(AACContext *ac, if (!ac->che[type][id]) { if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement)))) return AVERROR(ENOMEM); - ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr); + AAC_RENAME(ff_aac_sbr_ctx_init)(ac, &ac->che[type][id]->sbr); } if (type != TYPE_CCE) { if (*channels >= MAX_CHANNELS - (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))) { @@ -147,7 +147,7 @@ static av_cold int che_configure(AACContext *ac, } } else { if (ac->che[type][id]) - ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr); + AAC_RENAME(ff_aac_sbr_ctx_close)(&ac->che[type][id]->sbr); av_freep(&ac->che[type][id]); } return 0; @@ -181,7 +181,7 @@ static int frame_configure_elements(AVCodecContext *avctx) /* map output channel pointers to AVFrame data */ for (ch = 0; ch < avctx->channels; ch++) { if (ac->output_element[ch]) - ac->output_element[ch]->ret = (float *)ac->frame->extended_data[ch]; + ac->output_element[ch]->ret = (INTFLOAT *)ac->frame->extended_data[ch]; } return 0; @@ -1043,8 +1043,11 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) ac->oc[1].m4ac.sample_rate = avctx->sample_rate; aacdec_init(ac); - +#if USE_FIXED + avctx->sample_fmt = AV_SAMPLE_FMT_S32P; +#else avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; +#endif /* USE_FIXED */ if (avctx->extradata_size > 0) { if ((ret = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac, @@ -1099,10 +1102,15 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) AAC_INIT_VLC_STATIC( 9, 366); AAC_INIT_VLC_STATIC(10, 462); - ff_aac_sbr_init(); + AAC_RENAME(ff_aac_sbr_init)(); ff_fmt_convert_init(&ac->fmt_conv, avctx); + +#if USE_FIXED + ac->fdsp = avpriv_alloc_fixed_dsp(avctx->flags & CODEC_FLAG_BITEXACT); +#else avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); +#endif /* USE_FIXED */ ac->random_state = 0x1f2e3d4c; @@ -1118,18 +1126,18 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) sizeof(ff_aac_scalefactor_code[0]), 352); - ff_mdct_init(&ac->mdct, 11, 1, 1.0 / (32768.0 * 1024.0)); - ff_mdct_init(&ac->mdct_ld, 10, 1, 1.0 / (32768.0 * 512.0)); - ff_mdct_init(&ac->mdct_small, 8, 1, 1.0 / (32768.0 * 128.0)); - ff_mdct_init(&ac->mdct_ltp, 11, 0, -2.0 * 32768.0); + AAC_RENAME_32(ff_mdct_init)(&ac->mdct, 11, 1, 1.0 / RANGE15(1024.0)); + AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ld, 10, 1, 1.0 / RANGE15(512.0)); + AAC_RENAME_32(ff_mdct_init)(&ac->mdct_small, 8, 1, 1.0 / RANGE15(128.0)); + AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ltp, 11, 0, RANGE15(-2.0)); // window initialization - ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024); - ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128); - ff_init_ff_sine_windows(10); - ff_init_ff_sine_windows( 9); - ff_init_ff_sine_windows( 7); + AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(ff_aac_kbd_long_1024), 4.0, 1024); + AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(ff_aac_kbd_short_128), 6.0, 128); + AAC_RENAME(ff_init_ff_sine_windows)(10); + AAC_RENAME(ff_init_ff_sine_windows)( 9); + AAC_RENAME(ff_init_ff_sine_windows)( 7); - cbrt_tableinit(); + AAC_RENAME(cbrt_tableinit)(); return 0; } @@ -1341,7 +1349,7 @@ static int decode_band_types(AACContext *ac, enum BandType band_type[120], * * @return Returns error status. 0 - OK, !0 - error */ -static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb, +static int decode_scalefactors(AACContext *ac, INTFLOAT sf[120], GetBitContext *gb, unsigned int global_gain, IndividualChannelStream *ics, enum BandType band_type[120], @@ -1356,7 +1364,7 @@ static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb, int run_end = band_type_run_end[idx]; if (band_type[idx] == ZERO_BT) { for (; i < run_end; i++, idx++) - sf[idx] = 0.0; + sf[idx] = FIXR(0.); } else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) { for (; i < run_end; i++, idx++) { @@ -1368,7 +1376,11 @@ static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb, "Clipped intensity stereo position (%d -> %d)", offset[2], clipped_offset); } +#if USE_FIXED + sf[idx] = 100 - clipped_offset; +#else sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO]; +#endif /* USE_FIXED */ } } else if (band_type[idx] == NOISE_BT) { for (; i < run_end; i++, idx++) { @@ -1383,7 +1395,11 @@ static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb, "Clipped noise gain (%d -> %d)", offset[1], clipped_offset); } +#if USE_FIXED + sf[idx] = -(100 + clipped_offset); +#else sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO]; +#endif /* USE_FIXED */ } } else { for (; i < run_end; i++, idx++) { @@ -1393,7 +1409,11 @@ static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb, "Scalefactor (%d) out of range.\n", offset[0]); return AVERROR_INVALIDDATA; } +#if USE_FIXED + sf[idx] = -offset[0]; +#else sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO]; +#endif /* USE_FIXED */ } } } @@ -1500,8 +1520,8 @@ static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb, * * @return Returns error status. 0 - OK, !0 - error */ -static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], - GetBitContext *gb, const float sf[120], +static int decode_spectrum_and_dequant(AACContext *ac, INTFLOAT coef[1024], + GetBitContext *gb, const INTFLOAT sf[120], int pulse_present, const Pulse *pulse, const IndividualChannelStream *ics, enum BandType band_type[120]) @@ -1509,49 +1529,63 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], int i, k, g, idx = 0; const int c = 1024 / ics->num_windows; const uint16_t *offsets = ics->swb_offset; - float *coef_base = coef; + INTFLOAT *coef_base = coef; for (g = 0; g < ics->num_windows; g++) memset(coef + g * 128 + offsets[ics->max_sfb], 0, - sizeof(float) * (c - offsets[ics->max_sfb])); + sizeof(INTFLOAT) * (c - offsets[ics->max_sfb])); for (g = 0; g < ics->num_window_groups; g++) { unsigned g_len = ics->group_len[g]; for (i = 0; i < ics->max_sfb; i++, idx++) { const unsigned cbt_m1 = band_type[idx] - 1; - float *cfo = coef + offsets[i]; + INTFLOAT *cfo = coef + offsets[i]; int off_len = offsets[i + 1] - offsets[i]; int group; if (cbt_m1 >= INTENSITY_BT2 - 1) { - for (group = 0; group < g_len; group++, cfo+=128) { - memset(cfo, 0, off_len * sizeof(float)); + for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) { + memset(cfo, 0, off_len * sizeof(INTFLOAT)); } } else if (cbt_m1 == NOISE_BT - 1) { - for (group = 0; group < g_len; group++, cfo+=128) { + for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) { +#if !USE_FIXED float scale; - float band_energy; +#endif /* !USE_FIXED */ + INTFLOAT band_energy; for (k = 0; k < off_len; k++) { ac->random_state = lcg_random(ac->random_state); +#if USE_FIXED + cfo[k] = ac->random_state >> 3; +#else cfo[k] = ac->random_state; +#endif /* USE_FIXED */ } +#if USE_FIXED + band_energy = ac->fdsp->scalarproduct_fixed(cfo, cfo, off_len); + band_energy = fixed_sqrt(band_energy, 31); + noise_scale(cfo, sf[idx], band_energy, off_len); +#else band_energy = ac->fdsp.scalarproduct_float(cfo, cfo, off_len); scale = sf[idx] / sqrtf(band_energy); ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len); +#endif /* USE_FIXED */ } } else { +#if !USE_FIXED const float *vq = ff_aac_codebook_vector_vals[cbt_m1]; +#endif /* !USE_FIXED */ const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1]; VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table; OPEN_READER(re, gb); switch (cbt_m1 >> 1) { case 0: - for (group = 0; group < g_len; group++, cfo+=128) { - float *cf = cfo; + for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) { + INTFLOAT *cf = cfo; int len = off_len; do { @@ -1561,14 +1595,18 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], UPDATE_CACHE(re, gb); GET_VLC(code, re, gb, vlc_tab, 8, 2); cb_idx = cb_vector_idx[code]; +#if USE_FIXED + cf = DEC_SQUAD(cf, cb_idx); +#else cf = VMUL4(cf, vq, cb_idx, sf + idx); +#endif /* USE_FIXED */ } while (len -= 4); } break; case 1: - for (group = 0; group < g_len; group++, cfo+=128) { - float *cf = cfo; + for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) { + INTFLOAT *cf = cfo; int len = off_len; do { @@ -1583,14 +1621,18 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], nnz = cb_idx >> 8 & 15; bits = nnz ? GET_CACHE(re, gb) : 0; LAST_SKIP_BITS(re, gb, nnz); +#if USE_FIXED + cf = DEC_UQUAD(cf, cb_idx, bits); +#else cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx); +#endif /* USE_FIXED */ } while (len -= 4); } break; case 2: - for (group = 0; group < g_len; group++, cfo+=128) { - float *cf = cfo; + for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) { + INTFLOAT *cf = cfo; int len = off_len; do { @@ -1600,15 +1642,19 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], UPDATE_CACHE(re, gb); GET_VLC(code, re, gb, vlc_tab, 8, 2); cb_idx = cb_vector_idx[code]; +#if USE_FIXED + cf = DEC_SPAIR(cf, cb_idx); +#else cf = VMUL2(cf, vq, cb_idx, sf + idx); +#endif /* USE_FIXED */ } while (len -= 2); } break; case 3: case 4: - for (group = 0; group < g_len; group++, cfo+=128) { - float *cf = cfo; + for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) { + INTFLOAT *cf = cfo; int len = off_len; do { @@ -1623,15 +1669,24 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], nnz = cb_idx >> 8 & 15; sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0; LAST_SKIP_BITS(re, gb, nnz); +#if USE_FIXED + cf = DEC_UPAIR(cf, cb_idx, sign); +#else cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx); +#endif /* USE_FIXED */ } while (len -= 2); } break; default: - for (group = 0; group < g_len; group++, cfo+=128) { + for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) { +#if USE_FIXED + int *icf = cfo; + int v; +#else float *cf = cfo; uint32_t *icf = (uint32_t *) cf; +#endif /* USE_FIXED */ int len = off_len; do { @@ -1675,18 +1730,33 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], b += 4; n = (1 << b) + SHOW_UBITS(re, gb, b); LAST_SKIP_BITS(re, gb, b); +#if USE_FIXED + v = n; + if (bits & 1U<<31) + v = -v; + *icf++ = v; +#else *icf++ = cbrt_tab[n] | (bits & 1U<<31); +#endif /* USE_FIXED */ bits <<= 1; } else { +#if USE_FIXED + v = cb_idx & 15; + if (bits & 1U<<31) + v = -v; + *icf++ = v; +#else unsigned v = ((const uint32_t*)vq)[cb_idx & 15]; *icf++ = (bits & 1U<<31) | v; +#endif /* USE_FIXED */ bits <<= !!v; } cb_idx >>= 4; } } while (len -= 2); - +#if !USE_FIXED ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); +#endif /* !USE_FIXED */ } } @@ -1699,19 +1769,48 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], if (pulse_present) { idx = 0; for (i = 0; i < pulse->num_pulse; i++) { - float co = coef_base[ pulse->pos[i] ]; + INTFLOAT co = coef_base[ pulse->pos[i] ]; while (offsets[idx + 1] <= pulse->pos[i]) idx++; if (band_type[idx] != NOISE_BT && sf[idx]) { - float ico = -pulse->amp[i]; + INTFLOAT ico = -pulse->amp[i]; +#if USE_FIXED + if (co) { + ico = co + (co > 0 ? -ico : ico); + } + coef_base[ pulse->pos[i] ] = ico; +#else if (co) { co /= sf[idx]; ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico); } coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx]; +#endif /* USE_FIXED */ } } } +#if USE_FIXED + coef = coef_base; + idx = 0; + for (g = 0; g < ics->num_window_groups; g++) { + unsigned g_len = ics->group_len[g]; + + for (i = 0; i < ics->max_sfb; i++, idx++) { + const unsigned cbt_m1 = band_type[idx] - 1; + int *cfo = coef + offsets[i]; + int off_len = offsets[i + 1] - offsets[i]; + int group; + + if (cbt_m1 < NOISE_BT - 1) { + for (group = 0; group < (int)g_len; group++, cfo+=128) { + ac->vector_pow43(cfo, off_len); + ac->subband_scale(cfo, cfo, sf[idx], 34, off_len); + } + } + } + coef += g_len << 7; + } +#endif /* USE_FIXED */ return 0; } @@ -1760,7 +1859,7 @@ static int decode_ics(AACContext *ac, SingleChannelElement *sce, Pulse pulse; TemporalNoiseShaping *tns = &sce->tns; IndividualChannelStream *ics = &sce->ics; - float *out = sce->coeffs; + INTFLOAT *out = sce->coeffs; int global_gain, eld_syntax, er_syntax, pulse_present = 0; int ret; @@ -1834,8 +1933,8 @@ static int decode_ics(AACContext *ac, SingleChannelElement *sce, static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe) { const IndividualChannelStream *ics = &cpe->ch[0].ics; - float *ch0 = cpe->ch[0].coeffs; - float *ch1 = cpe->ch[1].coeffs; + INTFLOAT *ch0 = cpe->ch[0].coeffs; + INTFLOAT *ch1 = cpe->ch[1].coeffs; int g, i, group, idx = 0; const uint16_t *offsets = ics->swb_offset; for (g = 0; g < ics->num_window_groups; g++) { @@ -1843,10 +1942,17 @@ static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe) if (cpe->ms_mask[idx] && cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) { +#if USE_FIXED + for (group = 0; group < ics->group_len[g]; group++) { + ac->fdsp->butterflies_fixed(ch0 + group * 128 + offsets[i], + ch1 + group * 128 + offsets[i], + offsets[i+1] - offsets[i]); +#else for (group = 0; group < ics->group_len[g]; group++) { ac->fdsp.butterflies_float(ch0 + group * 128 + offsets[i], ch1 + group * 128 + offsets[i], offsets[i+1] - offsets[i]); +#endif /* USE_FIXED */ } } } @@ -1867,11 +1973,11 @@ static void apply_intensity_stereo(AACContext *ac, { const IndividualChannelStream *ics = &cpe->ch[1].ics; SingleChannelElement *sce1 = &cpe->ch[1]; - float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs; + INTFLOAT *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs; const uint16_t *offsets = ics->swb_offset; int g, group, i, idx = 0; int c; - float scale; + INTFLOAT scale; for (g = 0; g < ics->num_window_groups; g++) { for (i = 0; i < ics->max_sfb;) { if (sce1->band_type[idx] == INTENSITY_BT || @@ -1883,10 +1989,18 @@ static void apply_intensity_stereo(AACContext *ac, c *= 1 - 2 * cpe->ms_mask[idx]; scale = c * sce1->sf[idx]; for (group = 0; group < ics->group_len[g]; group++) +#if USE_FIXED + ac->subband_scale(coef1 + group * 128 + offsets[i], + coef0 + group * 128 + offsets[i], + scale, + 23, + offsets[i + 1] - offsets[i]); +#else ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i], coef0 + group * 128 + offsets[i], scale, offsets[i + 1] - offsets[i]); +#endif /* USE_FIXED */ } } else { int bt_run_end = sce1->band_type_run_end[idx]; @@ -1962,7 +2076,7 @@ static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che) int num_gain = 0; int c, g, sfb, ret; int sign; - float scale; + INTFLOAT scale; SingleChannelElement *sce = &che->ch[0]; ChannelCoupling *coup = &che->coup; @@ -1982,7 +2096,7 @@ static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che) coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1); sign = get_bits(gb, 1); - scale = cce_scale[get_bits(gb, 2)]; + scale = AAC_RENAME(cce_scale)[get_bits(gb, 2)]; if ((ret = decode_ics(ac, sce, gb, 0, 0))) return ret; @@ -1991,11 +2105,11 @@ static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che) int idx = 0; int cge = 1; int gain = 0; - float gain_cache = 1.0; + INTFLOAT gain_cache = FIXR10(1.); if (c) { cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb); gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0; - gain_cache = powf(scale, -gain); + gain_cache = GET_GAIN(scale, gain); } if (coup->coupling_point == AFTER_IMDCT) { coup->gain[c][0] = gain_cache; @@ -2012,7 +2126,7 @@ static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che) s -= 2 * (t & 0x1); t >>= 1; } - gain_cache = powf(scale, -t) * s; + gain_cache = GET_GAIN(scale, t) * s; } } coup->gain[c][idx] = gain_cache; @@ -2163,7 +2277,7 @@ static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt, ac->oc[1].m4ac.sbr = 1; ac->avctx->profile = FF_PROFILE_AAC_HE; } - res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type); + res = AAC_RENAME(ff_decode_sbr_extension)(ac, &che->sbr, gb, crc_flag, cnt, elem_type); break; case EXT_DYNAMIC_RANGE: res = decode_dynamic_range(&ac->che_drc, gb); @@ -2186,14 +2300,14 @@ static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt, * @param decode 1 if tool is used normally, 0 if tool is used in LTP. * @param coef spectral coefficients */ -static void apply_tns(float coef[1024], TemporalNoiseShaping *tns, +static void apply_tns(INTFLOAT coef[1024], TemporalNoiseShaping *tns, IndividualChannelStream *ics, int decode) { const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb); int w, filt, m, i; int bottom, top, order, start, end, size, inc; - float lpc[TNS_MAX_ORDER]; - float tmp[TNS_MAX_ORDER+1]; + INTFLOAT lpc[TNS_MAX_ORDER]; + INTFLOAT tmp[TNS_MAX_ORDER+1]; for (w = 0; w < ics->num_windows; w++) { bottom = ics->num_swb; @@ -2205,7 +2319,7 @@ static void apply_tns(float coef[1024], TemporalNoiseShaping *tns, continue; // tns_decode_coef - compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0); + AAC_RENAME(compute_lpc_coefs)(tns->coef[w][filt], order, lpc, 0, 0, 0); start = ics->swb_offset[FFMIN(bottom, mmm)]; end = ics->swb_offset[FFMIN( top, mmm)]; @@ -2223,13 +2337,13 @@ static void apply_tns(float coef[1024], TemporalNoiseShaping *tns, // ar filter for (m = 0; m < size; m++, start += inc) for (i = 1; i <= FFMIN(m, order); i++) - coef[start] -= coef[start - i * inc] * lpc[i - 1]; + coef[start] -= AAC_MUL26(coef[start - i * inc], lpc[i - 1]); } else { // ma filter for (m = 0; m < size; m++, start += inc) { tmp[0] = coef[start]; for (i = 1; i <= FFMIN(m, order); i++) - coef[start] += tmp[i] * lpc[i - 1]; + coef[start] += AAC_MUL26(tmp[i], lpc[i - 1]); for (i = order; i > 0; i--) tmp[i] = tmp[i - 1]; } @@ -2242,26 +2356,41 @@ static void apply_tns(float coef[1024], TemporalNoiseShaping *tns, * Apply windowing and MDCT to obtain the spectral * coefficient from the predicted sample by LTP. */ -static void windowing_and_mdct_ltp(AACContext *ac, float *out, - float *in, IndividualChannelStream *ics) +static void windowing_and_mdct_ltp(AACContext *ac, INTFLOAT *out, + INTFLOAT *in, IndividualChannelStream *ics) { - const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; - const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; - const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; - const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; - + const INTFLOAT *lwindow = ics->use_kb_window[0] ? AAC_RENAME(ff_aac_kbd_long_1024) : AAC_RENAME(ff_sine_1024); + const INTFLOAT *swindow = ics->use_kb_window[0] ? AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128); + const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME(ff_aac_kbd_long_1024) : AAC_RENAME(ff_sine_1024); + const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128); + +#if USE_FIXED + if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) { + ac->fdsp->vector_fmul(in, in, lwindow_prev, 1024); + } else { + memset(in, 0, 448 * sizeof(INTFLOAT)); + ac->fdsp->vector_fmul(in + 448, in + 448, swindow_prev, 128); + } + if (ics->window_sequence[0] != LONG_START_SEQUENCE) { + ac->fdsp->vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024); + } else { + ac->fdsp->vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128); + memset(in + 1024 + 576, 0, 448 * sizeof(INTFLOAT)); + } +#else if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) { ac->fdsp.vector_fmul(in, in, lwindow_prev, 1024); } else { - memset(in, 0, 448 * sizeof(float)); + memset(in, 0, 448 * sizeof(INTFLOAT)); ac->fdsp.vector_fmul(in + 448, in + 448, swindow_prev, 128); } if (ics->window_sequence[0] != LONG_START_SEQUENCE) { ac->fdsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024); } else { ac->fdsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128); - memset(in + 1024 + 576, 0, 448 * sizeof(float)); + memset(in + 1024 + 576, 0, 448 * sizeof(INTFLOAT)); } +#endif /* USE_FIXED */ ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in); } @@ -2275,15 +2404,15 @@ static void apply_ltp(AACContext *ac, SingleChannelElement *sce) int i, sfb; if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { - float *predTime = sce->ret; - float *predFreq = ac->buf_mdct; + INTFLOAT *predTime = sce->ret; + INTFLOAT *predFreq = ac->buf_mdct; int16_t num_samples = 2048; if (ltp->lag < 1024) num_samples = ltp->lag + 1024; for (i = 0; i < num_samples; i++) - predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef; - memset(&predTime[i], 0, (2048 - i) * sizeof(float)); + predTime[i] = AAC_MUL30(sce->ltp_state[i + 2048 - ltp->lag], ltp->coef); + memset(&predTime[i], 0, (2048 - i) * sizeof(INTFLOAT)); ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics); @@ -2303,28 +2432,40 @@ static void apply_ltp(AACContext *ac, SingleChannelElement *sce) static void update_ltp(AACContext *ac, SingleChannelElement *sce) { IndividualChannelStream *ics = &sce->ics; - float *saved = sce->saved; - float *saved_ltp = sce->coeffs; - const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; - const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; + INTFLOAT *saved = sce->saved; + INTFLOAT *saved_ltp = sce->coeffs; + const INTFLOAT *lwindow = ics->use_kb_window[0] ? AAC_RENAME(ff_aac_kbd_long_1024) : AAC_RENAME(ff_sine_1024); + const INTFLOAT *swindow = ics->use_kb_window[0] ? AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128); int i; if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { - memcpy(saved_ltp, saved, 512 * sizeof(float)); - memset(saved_ltp + 576, 0, 448 * sizeof(float)); + memcpy(saved_ltp, saved, 512 * sizeof(INTFLOAT)); + memset(saved_ltp + 576, 0, 448 * sizeof(INTFLOAT)); +#if USE_FIXED + ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); +#else ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); +#endif /* USE_FIXED */ for (i = 0; i < 64; i++) - saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i]; + saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], swindow[63 - i]); } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { - memcpy(saved_ltp, ac->buf_mdct + 512, 448 * sizeof(float)); - memset(saved_ltp + 576, 0, 448 * sizeof(float)); + memcpy(saved_ltp, ac->buf_mdct + 512, 448 * sizeof(INTFLOAT)); + memset(saved_ltp + 576, 0, 448 * sizeof(INTFLOAT)); +#if USE_FIXED + ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); +#else ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); +#endif /* USE_FIXED */ for (i = 0; i < 64; i++) - saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i]; + saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], swindow[63 - i]); } else { // LONG_STOP or ONLY_LONG +#if USE_FIXED + ac->fdsp->vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512); +#else ac->fdsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512); +#endif /* USE_FIXED */ for (i = 0; i < 512; i++) - saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i]; + saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], lwindow[511 - i]); } memcpy(sce->ltp_state, sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state)); @@ -2338,22 +2479,27 @@ static void update_ltp(AACContext *ac, SingleChannelElement *sce) static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce) { IndividualChannelStream *ics = &sce->ics; - float *in = sce->coeffs; - float *out = sce->ret; - float *saved = sce->saved; - const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; - const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; - const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; - float *buf = ac->buf_mdct; - float *temp = ac->temp; + INTFLOAT *in = sce->coeffs; + INTFLOAT *out = sce->ret; + INTFLOAT *saved = sce->saved; + const INTFLOAT *swindow = ics->use_kb_window[0] ? AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128); + const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME(ff_aac_kbd_long_1024) : AAC_RENAME(ff_sine_1024); + const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128); + INTFLOAT *buf = ac->buf_mdct; + INTFLOAT *temp = ac->temp; int i; // imdct if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { for (i = 0; i < 1024; i += 128) ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i); - } else + } else { ac->mdct.imdct_half(&ac->mdct, buf, in); +#if USE_FIXED + for (i=0; i<1024; i++) + buf[i] = (buf[i] + 4) >> 3; +#endif /* USE_FIXED */ + } /* window overlapping * NOTE: To simplify the overlapping code, all 'meaningless' short to long @@ -2363,70 +2509,106 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce) */ if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { +#if USE_FIXED + ac->fdsp->vector_fmul_window( out, saved, buf, lwindow_prev, 512); +#else ac->fdsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512); +#endif /* USE_FIXED */ } else { - memcpy( out, saved, 448 * sizeof(float)); + memcpy( out, saved, 448 * sizeof(INTFLOAT)); if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { +#if USE_FIXED + ac->fdsp->vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64); + ac->fdsp->vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64); + ac->fdsp->vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64); + ac->fdsp->vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64); + ac->fdsp->vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64); +#else ac->fdsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64); ac->fdsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64); ac->fdsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64); ac->fdsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64); ac->fdsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64); - memcpy( out + 448 + 4*128, temp, 64 * sizeof(float)); +#endif /* USE_FIXED */ + memcpy( out + 448 + 4*128, temp, 64 * sizeof(INTFLOAT)); } else { +#if USE_FIXED + ac->fdsp->vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64); +#else ac->fdsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64); - memcpy( out + 576, buf + 64, 448 * sizeof(float)); +#endif /* USE_FIXED */ + memcpy( out + 576, buf + 64, 448 * sizeof(INTFLOAT)); } } // buffer update if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { - memcpy( saved, temp + 64, 64 * sizeof(float)); + memcpy( saved, temp + 64, 64 * sizeof(INTFLOAT)); +#if USE_FIXED + ac->fdsp->vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64); + ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64); + ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64); +#else ac->fdsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64); ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64); ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64); - memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); +#endif + memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(INTFLOAT)); } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { - memcpy( saved, buf + 512, 448 * sizeof(float)); - memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); + memcpy( saved, buf + 512, 448 * sizeof(INTFLOAT)); + memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(INTFLOAT)); } else { // LONG_STOP or ONLY_LONG - memcpy( saved, buf + 512, 512 * sizeof(float)); + memcpy( saved, buf + 512, 512 * sizeof(INTFLOAT)); } } static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce) { IndividualChannelStream *ics = &sce->ics; - float *in = sce->coeffs; - float *out = sce->ret; - float *saved = sce->saved; - float *buf = ac->buf_mdct; + INTFLOAT *in = sce->coeffs; + INTFLOAT *out = sce->ret; + INTFLOAT *saved = sce->saved; + INTFLOAT *buf = ac->buf_mdct; // imdct ac->mdct.imdct_half(&ac->mdct_ld, buf, in); +#if USE_FIXED + int i; + for (i = 0; i < 1024; i++) + buf[i] = (buf[i] + 2) >> 2; +#endif /* USE_FIXED */ + // window overlapping if (ics->use_kb_window[1]) { // AAC LD uses a low overlap sine window instead of a KBD window - memcpy(out, saved, 192 * sizeof(float)); + memcpy(out, saved, 192 * sizeof(INTFLOAT)); +#if USE_FIXED + ac->fdsp->vector_fmul_window(out + 192, saved + 192, buf, ff_sine_128_fixed, 64); +#else ac->fdsp.vector_fmul_window(out + 192, saved + 192, buf, ff_sine_128, 64); - memcpy( out + 320, buf + 64, 192 * sizeof(float)); +#endif /* USE_FIXED */ + memcpy( out + 320, buf + 64, 192 * sizeof(INTFLOAT)); } else { +#if USE_FIXED + ac->fdsp->vector_fmul_window(out, saved, buf, ff_sine_512_fixed, 256); +#else ac->fdsp.vector_fmul_window(out, saved, buf, ff_sine_512, 256); +#endif /* USE_FIXED */ } // buffer update - memcpy(saved, buf + 256, 256 * sizeof(float)); + memcpy(saved, buf + 256, 256 * sizeof(INTFLOAT)); } static void imdct_and_windowing_eld(AACContext *ac, SingleChannelElement *sce) { - float *in = sce->coeffs; - float *out = sce->ret; - float *saved = sce->saved; - const float *const window = ff_aac_eld_window; - float *buf = ac->buf_mdct; + INTFLOAT *in = sce->coeffs; + INTFLOAT *out = sce->ret; + INTFLOAT *saved = sce->saved; + const INTFLOAT *const window = AAC_RENAME(ff_aac_eld_window); + INTFLOAT *buf = ac->buf_mdct; int i; const int n = 512; const int n2 = n >> 1; @@ -2438,11 +2620,17 @@ static void imdct_and_windowing_eld(AACContext *ac, SingleChannelElement *sce) // International Conference on Audio, Language and Image Processing, ICALIP 2008. // URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4590245&isnumber=4589950 for (i = 0; i < n2; i+=2) { - float temp; + INTFLOAT temp; temp = in[i ]; in[i ] = -in[n - 1 - i]; in[n - 1 - i] = temp; temp = -in[i + 1]; in[i + 1] = in[n - 2 - i]; in[n - 2 - i] = temp; } ac->mdct.imdct_half(&ac->mdct_ld, buf, in); + +#if USE_FIXED + for (i = 0; i < 1024; i++) + buf[i] = (buf[i] + 1) >> 1; +#endif /* USE_FIXED */ + for (i = 0; i < n; i+=2) { buf[i] = -buf[i]; } @@ -2454,26 +2642,26 @@ static void imdct_and_windowing_eld(AACContext *ac, SingleChannelElement *sce) // The spec says to use samples [0..511] but the reference decoder uses // samples [128..639]. for (i = n4; i < n2; i ++) { - out[i - n4] = buf[n2 - 1 - i] * window[i - n4] + - saved[ i + n2] * window[i + n - n4] + - -saved[ n + n2 - 1 - i] * window[i + 2*n - n4] + - -saved[2*n + n2 + i] * window[i + 3*n - n4]; + out[i - n4] = AAC_MUL31( buf[ n2 - 1 - i] , window[i - n4]) + + AAC_MUL31( saved[ i + n2] , window[i + n - n4]) + + AAC_MUL31(-saved[ n + n2 - 1 - i] , window[i + 2*n - n4]) + + AAC_MUL31(-saved[ 2*n + n2 + i] , window[i + 3*n - n4]); } for (i = 0; i < n2; i ++) { - out[n4 + i] = buf[i] * window[i + n2 - n4] + - -saved[ n - 1 - i] * window[i + n2 + n - n4] + - -saved[ n + i] * window[i + n2 + 2*n - n4] + - saved[2*n + n - 1 - i] * window[i + n2 + 3*n - n4]; + out[n4 + i] = AAC_MUL31( buf[ i] , window[i + n2 - n4]) + + AAC_MUL31(-saved[ n - 1 - i] , window[i + n2 + n - n4]) + + AAC_MUL31(-saved[ n + i] , window[i + n2 + 2*n - n4]) + + AAC_MUL31( saved[2*n + n - 1 - i] , window[i + n2 + 3*n - n4]); } for (i = 0; i < n4; i ++) { - out[n2 + n4 + i] = buf[ i + n2] * window[i + n - n4] + - -saved[ n2 - 1 - i] * window[i + 2*n - n4] + - -saved[ n + n2 + i] * window[i + 3*n - n4]; + out[n2 + n4 + i] = AAC_MUL31( buf[ i + n2] , window[i + n - n4]) + + AAC_MUL31(-saved[n2 - 1 - i] , window[i + 2*n - n4]) + + AAC_MUL31(-saved[n + n2 + i] , window[i + 3*n - n4]); } // buffer update - memmove(saved + n, saved, 2 * n * sizeof(float)); - memcpy( saved, buf, n * sizeof(float)); + memmove(saved + n, saved, 2 * n * sizeof(INTFLOAT)); + memcpy( saved, buf, n * sizeof(INTFLOAT)); } /** @@ -2533,7 +2721,7 @@ static void spectral_to_sample(AACContext *ac) ChannelElement *che = ac->che[type][i]; if (che) { if (type <= TYPE_CPE) - apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling); + apply_channel_coupling(ac, che, type, i, BEFORE_TNS, AAC_RENAME(apply_dependent_coupling)); if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) { if (che->ch[0].ics.predictor_present) { if (che->ch[0].ics.ltp.present) @@ -2547,7 +2735,7 @@ static void spectral_to_sample(AACContext *ac) if (che->ch[1].tns.present) ac->apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1); if (type <= TYPE_CPE) - apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling); + apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, AAC_RENAME(apply_dependent_coupling)); if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) { imdct_and_window(ac, &che->ch[0]); if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) @@ -2558,11 +2746,22 @@ static void spectral_to_sample(AACContext *ac) ac->update_ltp(ac, &che->ch[1]); } if (ac->oc[1].m4ac.sbr > 0) { - ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret); + AAC_RENAME(ff_sbr_apply)(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret); } } if (type <= TYPE_CCE) - apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling); + apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, AAC_RENAME(apply_independent_coupling)); + +#if USE_FIXED + { + int j; + /* preparation for resampler */ + for(j = 0; j<2048; j++){ + che->ch[0].ret[j] = (int32_t)av_clipl_int32((int64_t)che->ch[0].ret[j]<<7)+0x8000; + che->ch[1].ret[j] = (int32_t)av_clipl_int32((int64_t)che->ch[1].ret[j]<<7)+0x8000; + } + } +#endif /* USE_FIXED */ } } } @@ -2940,7 +3139,7 @@ static av_cold int aac_decode_close(AVCodecContext *avctx) for (i = 0; i < MAX_ELEM_ID; i++) { for (type = 0; type < 4; type++) { if (ac->che[type][i]) - ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr); + AAC_RENAME(ff_aac_sbr_ctx_close)(&ac->che[type][i]->sbr); av_freep(&ac->che[type][i]); } } @@ -2959,9 +3158,15 @@ static void aacdec_init(AACContext *c) c->apply_tns = apply_tns; c->windowing_and_mdct_ltp = windowing_and_mdct_ltp; c->update_ltp = update_ltp; +#if USE_FIXED + c->vector_pow43 = vector_pow43; + c->subband_scale = subband_scale; +#endif +#if !USE_FIXED if(ARCH_MIPS) ff_aacdec_init_mips(c); +#endif /* !USE_FIXED */ } /** * AVOptions for Japanese DTV specific extensions (ADTS only) diff --git a/libavcodec/lpc.h b/libavcodec/lpc.h index c323230..d01439d 100644 --- a/libavcodec/lpc.h +++ b/libavcodec/lpc.h @@ -24,6 +24,7 @@ #include <stdint.h> #include "libavutil/avassert.h" +#include "aac_defines.h" #define ORDER_METHOD_EST 0 #define ORDER_METHOD_2LEVEL 1 @@ -107,11 +108,15 @@ void ff_lpc_init_x86(LPCContext *s); */ void ff_lpc_end(LPCContext *s); +#if USE_FIXED +#define LPC_TYPE int +#else #ifdef LPC_USE_DOUBLE #define LPC_TYPE double #else #define LPC_TYPE float #endif +#endif // USE_FIXED /** * Schur recursion. @@ -148,7 +153,7 @@ static inline void compute_ref_coefs(const LPC_TYPE *autoc, int max_order, * Levinson-Durbin recursion. * Produce LPC coefficients from autocorrelation data. */ -static inline int compute_lpc_coefs(const LPC_TYPE *autoc, int max_order, +static inline int AAC_RENAME(compute_lpc_coefs)(const LPC_TYPE *autoc, int max_order, LPC_TYPE *lpc, int lpc_stride, int fail, int normalize) { @@ -165,14 +170,14 @@ static inline int compute_lpc_coefs(const LPC_TYPE *autoc, int max_order, return -1; for(i=0; i<max_order; i++) { - LPC_TYPE r = -autoc[i]; + LPC_TYPE r = AAC_SRA_R(-autoc[i], 5); if (normalize) { for(j=0; j<i; j++) r -= lpc_last[j] * autoc[i-j-1]; r /= err; - err *= 1.0 - (r * r); + err *= FIXR(1.0) - (r * r); } lpc[i] = r; @@ -180,8 +185,8 @@ static inline int compute_lpc_coefs(const LPC_TYPE *autoc, int max_order, for(j=0; j < (i+1)>>1; j++) { LPC_TYPE f = lpc_last[ j]; LPC_TYPE b = lpc_last[i-1-j]; - lpc[ j] = f + r * b; - lpc[i-1-j] = b + r * f; + lpc[ j] = f + AAC_MUL26(r, b); + lpc[i-1-j] = b + AAC_MUL26(r, f); } if (fail && err < 0) diff --git a/libavcodec/mdct_template.c b/libavcodec/mdct_template.c index 7fa8bcc..a4fc86e 100644 --- a/libavcodec/mdct_template.c +++ b/libavcodec/mdct_template.c @@ -81,8 +81,13 @@ av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale) scale = sqrt(fabs(scale)); for(i=0;i<n4;i++) { alpha = 2 * M_PI * (i + theta) / n; - s->tcos[i*tstep] = FIX15(-cos(alpha) * scale); - s->tsin[i*tstep] = FIX15(-sin(alpha) * scale); +#if FFT_FIXED_32 + s->tcos[i*tstep] = (FFTSample)floor(-cos(alpha) * 2147483648.0 + 0.5); + s->tsin[i*tstep] = (FFTSample)floor(-sin(alpha) * 2147483648.0 + 0.5); +#else + s->tcos[i*tstep] = FIX15(-cos(alpha) * scale); + s->tsin[i*tstep] = FIX15(-sin(alpha) * scale); +#endif } return 0; fail: diff --git a/libavutil/fixed_dsp.c b/libavutil/fixed_dsp.c index b99fea5..711252f 100644 --- a/libavutil/fixed_dsp.c +++ b/libavutil/fixed_dsp.c @@ -26,7 +26,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * Author: Nedeljko Babic (nba...@mips.com) + * Author: Nedeljko Babic (nedeljko.babic imgtec com) * * This file is part of FFmpeg. * @@ -47,7 +47,29 @@ #include "fixed_dsp.h" -static void vector_fmul_window_fixed_scaled_c(int16_t *dst, const int32_t *src0, +static void vector_fmul_add_c(int *dst, const int *src0, const int *src1, const int *src2, int len){ + int i; + int64_t accu; + + for (i=0; i<len; i++) { + accu = (int64_t)src0[i] * src1[i]; + dst[i] = src2[i] + (int)((accu + 0x40000000) >> 31); + } +} + +static void vector_fmul_reverse_c(int *dst, const int *src0, const int *src1, int len) +{ + int i; + int64_t accu; + + src1 += len-1; + for (i=0; i<len; i++) { + accu = (int64_t)src0[i] * src1[-i]; + dst[i] = (int)((accu+0x40000000) >> 31); + } +} + +static void vector_fmul_window_scaled_c(int16_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len, uint8_t bits) { @@ -68,7 +90,7 @@ static void vector_fmul_window_fixed_scaled_c(int16_t *dst, const int32_t *src0, } } -static void vector_fmul_window_fixed_c(int32_t *dst, const int32_t *src0, +static void vector_fmul_window_c(int32_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len) { @@ -88,11 +110,49 @@ static void vector_fmul_window_fixed_c(int32_t *dst, const int32_t *src0, } } +static void vector_fmul_c(int *dst, const int *src0, const int *src1, int len) +{ + int i; + int64_t accu; + + for (i = 0; i < len; i++){ + accu = (int64_t)src0[i] * src1[i]; + dst[i] = (int)((accu+0x40000000) >> 31); + } +} + +static int ff_scalarproduct_fixed_c(const int *v1, const int *v2, int len) +{ + int64_t p = 0; + int i; + + for (i = 0; i < len; i++) + p += (int64_t)v1[i] * v2[i]; + + return (int)((p + 0x40000000) >> 31); +} + +static void butterflies_fixed_c(int *v1, int *v2, int len) +{ + int i; + + for (i = 0; i < len; i++){ + int t = v1[i] - v2[i]; + v1[i] += v2[i]; + v2[i] = t; + } +} + AVFixedDSPContext * avpriv_alloc_fixed_dsp(int bit_exact) { AVFixedDSPContext * fdsp = av_malloc(sizeof(AVFixedDSPContext)); - fdsp->vector_fmul_window_scaled = vector_fmul_window_fixed_scaled_c; - fdsp->vector_fmul_window = vector_fmul_window_fixed_c; + fdsp->vector_fmul = vector_fmul_c; + fdsp->vector_fmul_add = vector_fmul_add_c; + fdsp->vector_fmul_reverse = vector_fmul_reverse_c; + fdsp->vector_fmul_window_scaled = vector_fmul_window_scaled_c; + fdsp->vector_fmul_window = vector_fmul_window_c; + fdsp->butterflies_fixed = butterflies_fixed_c; + fdsp->scalarproduct_fixed = ff_scalarproduct_fixed_c; return fdsp; } diff --git a/libavutil/fixed_dsp.h b/libavutil/fixed_dsp.h index ff6f365..3035d19 100644 --- a/libavutil/fixed_dsp.h +++ b/libavutil/fixed_dsp.h @@ -54,6 +54,25 @@ #include "libavcodec/mathops.h" typedef struct AVFixedDSPContext { + /* assume len is a multiple of 16, and arrays are 32-byte aligned */ + + /** + * Calculate the product of two vectors of integers and store the result in + * a vector of integers. + * + * @param dst output vector + * constraints: 32-byte aligned + * @param src0 first input vector + * constraints: 32-byte aligned + * @param src1 second input vector + * constraints: 32-byte aligned + * @param len number of elements in the input + * constraints: multiple of 16 + */ + void (*vector_fmul)(int *dst, const int *src0, const int *src1, + int len); + + void (*vector_fmul_reverse)(int *dst, const int *src0, const int *src1, int len); /** * Overlap/add with window function. * Used primarily by MDCT-based audio codecs. @@ -92,6 +111,40 @@ typedef struct AVFixedDSPContext { */ void (*vector_fmul_window)(int32_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len); + /** + * Calculate the product of two vectors of integers, add a third vector of + * integers and store the result in a vector of integers. + * + * @param dst output vector + * constraints: 32-byte aligned + * @param src0 first input vector + * constraints: 32-byte aligned + * @param src1 second input vector + * constraints: 32-byte aligned + * @param src1 third input vector + * constraints: 32-byte aligned + * @param len number of elements in the input + * constraints: multiple of 16 + */ + void (*vector_fmul_add)(int *dst, const int *src0, const int *src1, + const int *src2, int len); + + /** + * Calculate the scalar product of two vectors of floats. + * @param v1 first vector, 16-byte aligned + * @param v2 second vector, 16-byte aligned + * @param len length of vectors, multiple of 4 + */ + int (*scalarproduct_fixed)(const int *v1, const int *v2, int len); + + /** + * Calculate the sum and difference of two vectors of integers. + * + * @param v1 first input vector, sum output, 16-byte aligned + * @param v2 second input vector, difference output, 16-byte aligned + * @param len length of vectors, multiple of 4 + */ + void (*butterflies_fixed)(int *av_restrict v1, int *av_restrict v2, int len); } AVFixedDSPContext; /** -- 1.8.2.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel