On Thu, May 14, 2015 at 03:33:55PM +0200, Nedeljko Babic wrote: > From: Djordje Pesut <djordje.pe...@imgtec.com> > > Add fixed point implementation > > Signed-off-by: Nedeljko Babic <nedeljko.ba...@imgtec.com> > --- > libavcodec/aac.h | 101 ++++++++-- > libavcodec/aacdec.c | 5 + > libavcodec/aacdec_fixed.c | 446 > +++++++++++++++++++++++++++++++++++++++++++ > libavcodec/aacdec_template.c | 433 ++++++++++++++++++++++++++++------------- > libavcodec/lpc.h | 15 +- > libavcodec/mdct_template.c | 5 +
> libavutil/fixed_dsp.c | 70 ++++++- > libavutil/fixed_dsp.h | 53 +++++ the changes to libavutil should be split into a seperate patch > 8 files changed, 965 insertions(+), 163 deletions(-) > create mode 100644 libavcodec/aacdec_fixed.c > > diff --git a/libavcodec/aac.h b/libavcodec/aac.h > index 23ec085..571f8b9 100644 > --- a/libavcodec/aac.h > +++ b/libavcodec/aac.h > @@ -30,9 +30,62 @@ > #ifndef AVCODEC_AAC_H > #define AVCODEC_AAC_H > > +#ifndef USE_FIXED > +#define USE_FIXED 0 > +#endif > + > +#if USE_FIXED > + > +#define FFT_FLOAT 0 > +#define FFT_FIXED_32 1 > + > +#define AAC_RENAME(x) x ## _fixed > +#define AAC_RENAME_32(x) x ## _fixed_32 > +#define INTFLOAT int > +#define SHORTFLOAT int16_t > +#define AAC_FLOAT aac_float_t i think *_t is reserved in POSIX to some extend [...] > +#include <assert.h> we use av_assert*() > +#include <errno.h> is that really used ? [...] > +/** > + * Apply dependent channel coupling (applied before IMDCT). > + * > + * @param index index into coupling gain array > + */ > +static void apply_dependent_coupling_fixed(AACContext *ac, > + SingleChannelElement *target, > + ChannelElement *cce, int index) > +{ > + IndividualChannelStream *ics = &cce->ch[0].ics; > + const uint16_t *offsets = ics->swb_offset; > + int *dest = target->coeffs; > + const int *src = cce->ch[0].coeffs; > + int g, i, group, k, idx = 0; > + if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) { > + av_log(ac->avctx, AV_LOG_ERROR, > + "Dependent coupling is not supported together with LTP\n"); > + return; > + } > + for (g = 0; g < ics->num_window_groups; g++) { > + for (i = 0; i < ics->max_sfb; i++, idx++) { > + if (cce->ch[0].band_type[idx] != ZERO_BT) { > + const int gain = cce->coup.gain[index][idx]; > + int shift, round, c, tmp; > + > + if (gain < 0) { > + c = -cce_scale_fixed[-gain & 7]; > + shift = (-gain-1024) >> 3; > + } > + else { > + c = cce_scale_fixed[gain & 7]; > + shift = (gain-1024) >> 3; > + } > + > + if (shift < 0) { > + shift = -shift; > + round = 1 << (shift - 1); > + > + for (group = 0; group < ics->group_len[g]; group++) { > + for (k = offsets[i]; k < offsets[i + 1]; k++) { > + tmp = (int)(((int64_t)src[group * 128 + k] * c + \ > + (int64_t)0x1000000000) >> 37); > + dest[group * 128 + k] += (tmp + round) >> shift; > + } > + } > + } there is something wrong with the indention level [...] > memcpy(sce->ltp_state, sce->ltp_state+1024, 1024 * > sizeof(*sce->ltp_state)); > @@ -2341,22 +2472,27 @@ static void update_ltp(AACContext *ac, > SingleChannelElement *sce) > static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce) > { > IndividualChannelStream *ics = &sce->ics; > - float *in = sce->coeffs; > - float *out = sce->ret; > - float *saved = sce->saved; > - const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 > : ff_sine_128; > - const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 > : ff_sine_1024; > - const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 > : ff_sine_128; > - float *buf = ac->buf_mdct; > - float *temp = ac->temp; > + INTFLOAT *in = sce->coeffs; > + INTFLOAT *out = sce->ret; > + INTFLOAT *saved = sce->saved; > + const INTFLOAT *swindow = ics->use_kb_window[0] ? > AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128); > + const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? > AAC_RENAME(ff_aac_kbd_long_1024) : AAC_RENAME(ff_sine_1024); > + const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? > AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128); > + INTFLOAT *buf = ac->buf_mdct; > + INTFLOAT *temp = ac->temp; > int i; > > // imdct > if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { > for (i = 0; i < 1024; i += 128) > ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i); > - } else > + } else { > ac->mdct.imdct_half(&ac->mdct, buf, in); > +#if USE_FIXED > + for (i=0; i<1024; i++) > + buf[i] = (buf[i] + 4) >> 3; > +#endif /* USE_FIXED */ > + } > > /* window overlapping > * NOTE: To simplify the overlapping code, all 'meaningless' short to > long > @@ -2368,7 +2504,7 @@ static void imdct_and_windowing(AACContext *ac, > SingleChannelElement *sce) > (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || > ics->window_sequence[0] == LONG_START_SEQUENCE)) { > ac->fdsp->vector_fmul_window( out, saved, > buf, lwindow_prev, 512); > } else { > - memcpy( out, saved, > 448 * sizeof(float)); > + memcpy( out, saved, > 448 * sizeof(INTFLOAT)); > > if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { > ac->fdsp->vector_fmul_window(out + 448 + 0*128, saved + 448, > buf + 0*128, swindow_prev, 64); > @@ -2376,65 +2512,71 @@ static void imdct_and_windowing(AACContext *ac, > SingleChannelElement *sce) > ac->fdsp->vector_fmul_window(out + 448 + 2*128, buf + 1*128 + > 64, buf + 2*128, swindow, 64); > ac->fdsp->vector_fmul_window(out + 448 + 3*128, buf + 2*128 + > 64, buf + 3*128, swindow, 64); > ac->fdsp->vector_fmul_window(temp, buf + 3*128 + > 64, buf + 4*128, swindow, 64); > - memcpy( out + 448 + 4*128, temp, 64 * > sizeof(float)); > + memcpy( out + 448 + 4*128, temp, 64 * > sizeof(INTFLOAT)); > } else { > ac->fdsp->vector_fmul_window(out + 448, saved + 448, > buf, swindow_prev, 64); > - memcpy( out + 576, buf + 64, > 448 * sizeof(float)); > + memcpy( out + 576, buf + 64, > 448 * sizeof(INTFLOAT)); > } > } > > // buffer update > if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { > - memcpy( saved, temp + 64, 64 * > sizeof(float)); > + memcpy( saved, temp + 64, 64 * > sizeof(INTFLOAT)); > ac->fdsp->vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + > 5*128, swindow, 64); > ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + > 6*128, swindow, 64); > ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + > 7*128, swindow, 64); > - memcpy( saved + 448, buf + 7*128 + 64, 64 * > sizeof(float)); > + memcpy( saved + 448, buf + 7*128 + 64, 64 * > sizeof(INTFLOAT)); > } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { > - memcpy( saved, buf + 512, 448 * > sizeof(float)); > - memcpy( saved + 448, buf + 7*128 + 64, 64 * > sizeof(float)); > + memcpy( saved, buf + 512, 448 * > sizeof(INTFLOAT)); > + memcpy( saved + 448, buf + 7*128 + 64, 64 * > sizeof(INTFLOAT)); > } else { // LONG_STOP or ONLY_LONG > - memcpy( saved, buf + 512, 512 * > sizeof(float)); > + memcpy( saved, buf + 512, 512 * > sizeof(INTFLOAT)); these should use sizeof(*saved) or something not sizeof(INTFLOAT) so as to make it more robust [...] > diff --git a/libavutil/fixed_dsp.h b/libavutil/fixed_dsp.h > index ff6f365..73859c0 100644 > --- a/libavutil/fixed_dsp.h > +++ b/libavutil/fixed_dsp.h > @@ -54,6 +54,8 @@ > #include "libavcodec/mathops.h" > > typedef struct AVFixedDSPContext { > + /* assume len is a multiple of 16, and arrays are 32-byte aligned */ > + > /** > * Overlap/add with window function. > * Used primarily by MDCT-based audio codecs. > @@ -92,6 +94,57 @@ typedef struct AVFixedDSPContext { > */ > void (*vector_fmul_window)(int32_t *dst, const int32_t *src0, const > int32_t *src1, const int32_t *win, int len); > > + /** > + * Fixed-point multiplication that calculates the product of two vectors > of > + * integers and stores the result in a vector of integers. > + * > + * @param dst output vector > + * constraints: 32-byte aligned > + * @param src0 first input vector > + * constraints: 32-byte aligned > + * @param src1 second input vector > + * constraints: 32-byte aligned > + * @param len number of elements in the input > + * constraints: multiple of 16 > + */ > + void (*vector_fmul)(int *dst, const int *src0, const int *src1, > + int len); > + > + void (*vector_fmul_reverse)(int *dst, const int *src0, const int *src1, > int len); > + /** > + * Calculate the product of two vectors of integers, add a third vector > of > + * integers and store the result in a vector of integers. > + * > + * @param dst output vector > + * constraints: 32-byte aligned > + * @param src0 first input vector > + * constraints: 32-byte aligned > + * @param src1 second input vector > + * constraints: 32-byte aligned > + * @param src1 third input vector > + * constraints: 32-byte aligned > + * @param len number of elements in the input > + * constraints: multiple of 16 > + */ > + void (*vector_fmul_add)(int *dst, const int *src0, const int *src1, > + const int *src2, int len); > + > + /** > + * Calculate the scalar product of two vectors of integers. > + * @param v1 first vector, 16-byte aligned > + * @param v2 second vector, 16-byte aligned > + * @param len length of vectors, multiple of 4 > + */ > + int (*scalarproduct_fixed)(const int *v1, const int *v2, int len); > + > + /** > + * Calculate the sum and difference of two vectors of integers. > + * > + * @param v1 first input vector, sum output, 16-byte aligned > + * @param v2 second input vector, difference output, 16-byte aligned > + * @param len length of vectors, multiple of 4 > + */ > + void (*butterflies_fixed)(int *av_restrict v1, int *av_restrict v2, int > len); these ignore the 1.0 point / amount of shifting done the documentation should be sufficient so that someone could write a working implementation based on just the documentation [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Concerning the gods, I have no means of knowing whether they exist or not or of what sort they may be, because of the obscurity of the subject, and the brevity of human life -- Protagoras
signature.asc
Description: Digital signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel