--- libavcodec/dcadsp.c | 413 ++++++++++++++++++++++++++++++++++++++++++++++++++++ libavcodec/dcadsp.h | 91 ++++++++++++ 2 files changed, 504 insertions(+) create mode 100644 libavcodec/dcadsp.c create mode 100644 libavcodec/dcadsp.h
diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c new file mode 100644 index 0000000..cee3d60 --- /dev/null +++ b/libavcodec/dcadsp.c @@ -0,0 +1,413 @@ +/* + * Copyright (C) 2016 foo86 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/mem.h" + +#include "dcadsp.h" +#include "dcamath.h" + +static void decode_hf_c(int32_t **dst, + const int32_t *vq_index, + const int8_t hf_vq[1024][32], + int32_t scale_factors[32][2], + intptr_t sb_start, intptr_t sb_end, + intptr_t ofs, intptr_t len) +{ + int i, j; + + for (i = sb_start; i < sb_end; i++) { + const int8_t *coeff = hf_vq[vq_index[i]]; + int32_t scale = scale_factors[i][0]; + for (j = 0; j < len; j++) + dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4); + } +} + +static void decode_joint_c(int32_t **dst, int32_t **src, + const int32_t *scale_factors, + intptr_t sb_start, intptr_t sb_end, + intptr_t ofs, intptr_t len) +{ + int i, j; + + for (i = sb_start; i < sb_end; i++) { + int32_t scale = scale_factors[i]; + for (j = 0; j < len; j++) + dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale)); + } +} + +static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples, + const float *filter_coeff, intptr_t npcmblocks, + int dec_select) +{ + // Select decimation factor + int factor = 64 << dec_select; + int ncoeffs = 8 >> dec_select; + int nlfesamples = npcmblocks >> (dec_select + 1); + int i, j, k; + + for (i = 0; i < nlfesamples; i++) { + // One decimated sample generates 64 or 128 interpolated ones + for (j = 0; j < factor / 2; j++) { + float a = 0; + float b = 0; + + for (k = 0; k < ncoeffs; k++) { + a += filter_coeff[ j * ncoeffs + k] * lfe_samples[-k]; + b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k]; + } + + pcm_samples[ j] = a; + pcm_samples[factor / 2 + j] = b; + } + + lfe_samples++; + pcm_samples += factor; + } +} + +static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples, + const float *filter_coeff, intptr_t npcmblocks) +{ + lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0); +} + +static void lfe_fir2_float_c(float *pcm_samples, int32_t *lfe_samples, + const float *filter_coeff, intptr_t npcmblocks) +{ + lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1); +} + +static void lfe_x96_float_c(float *dst, const float *src, + float *hist, intptr_t len) +{ + float prev = *hist; + int i; + + for (i = 0; i < len; i++) { + float a = 0.25f * src[i] + 0.75f * prev; + float b = 0.75f * src[i] + 0.25f * prev; + prev = src[i]; + *dst++ = a; + *dst++ = b; + } + + *hist = prev; +} + +static void sub_qmf32_float_c(SynthFilterContext *synth, + FFTContext *imdct, + float *pcm_samples, + int32_t **subband_samples_lo, + int32_t **subband_samples_hi, + float *hist1, int *offset, float *hist2, + const float *filter_coeff, intptr_t npcmblocks, + float scale) +{ + LOCAL_ALIGNED(32, float, input, [32]); + int i, j; + + for (j = 0; j < npcmblocks; j++) { + // Load in one sample from each subband + for (i = 0; i < 32; i++) { + if ((i - 1) & 2) + input[i] = -subband_samples_lo[i][j]; + else + input[i] = subband_samples_lo[i][j]; + } + + // One subband sample generates 32 interpolated ones + synth->synth_filter_float(imdct, hist1, offset, + hist2, filter_coeff, + pcm_samples, input, scale); + pcm_samples += 32; + } +} + +static void sub_qmf64_float_c(SynthFilterContext *synth, + FFTContext *imdct, + float *pcm_samples, + int32_t **subband_samples_lo, + int32_t **subband_samples_hi, + float *hist1, int *offset, float *hist2, + const float *filter_coeff, intptr_t npcmblocks, + float scale) +{ + LOCAL_ALIGNED(32, float, input, [64]); + int i, j; + + if (!subband_samples_hi) + memset(&input[32], 0, sizeof(input[0]) * 32); + + for (j = 0; j < npcmblocks; j++) { + // Load in one sample from each subband + if (subband_samples_hi) { + // Full 64 subbands, first 32 are residual coded + for (i = 0; i < 32; i++) { + if ((i - 1) & 2) + input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j]; + else + input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j]; + } + for (i = 32; i < 64; i++) { + if ((i - 1) & 2) + input[i] = -subband_samples_hi[i][j]; + else + input[i] = subband_samples_hi[i][j]; + } + } else { + // Only first 32 subbands + for (i = 0; i < 32; i++) { + if ((i - 1) & 2) + input[i] = -subband_samples_lo[i][j]; + else + input[i] = subband_samples_lo[i][j]; + } + } + + // One subband sample generates 64 interpolated ones + synth->synth_filter_float_64(imdct, hist1, offset, + hist2, filter_coeff, + pcm_samples, input, scale); + pcm_samples += 64; + } +} + +static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples, + const int32_t *filter_coeff, intptr_t npcmblocks) +{ + // Select decimation factor + int nlfesamples = npcmblocks >> 1; + int i, j, k; + + for (i = 0; i < nlfesamples; i++) { + // One decimated sample generates 64 interpolated ones + for (j = 0; j < 32; j++) { + int64_t a = 0; + int64_t b = 0; + + for (k = 0; k < 8; k++) { + a += (int64_t)filter_coeff[ j * 8 + k] * lfe_samples[-k]; + b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k]; + } + + pcm_samples[ j] = clip23(norm23(a)); + pcm_samples[32 + j] = clip23(norm23(b)); + } + + lfe_samples++; + pcm_samples += 64; + } +} + +static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src, + int32_t *hist, intptr_t len) +{ + int32_t prev = *hist; + int i; + + for (i = 0; i < len; i++) { + int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev; + int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev; + prev = src[i]; + *dst++ = clip23(norm23(a)); + *dst++ = clip23(norm23(b)); + } + + *hist = prev; +} + +static void sub_qmf32_fixed_c(SynthFilterContext *synth, + DCADCTContext *imdct, + int32_t *pcm_samples, + int32_t **subband_samples_lo, + int32_t **subband_samples_hi, + int32_t *hist1, int *offset, int32_t *hist2, + const int32_t *filter_coeff, intptr_t npcmblocks) +{ + LOCAL_ALIGNED(32, int32_t, input, [32]); + int i, j; + + for (j = 0; j < npcmblocks; j++) { + // Load in one sample from each subband + for (i = 0; i < 32; i++) + input[i] = subband_samples_lo[i][j]; + + // One subband sample generates 32 interpolated ones + synth->synth_filter_fixed(imdct, hist1, offset, + hist2, filter_coeff, + pcm_samples, input); + pcm_samples += 32; + } +} + +static void sub_qmf64_fixed_c(SynthFilterContext *synth, + DCADCTContext *imdct, + int32_t *pcm_samples, + int32_t **subband_samples_lo, + int32_t **subband_samples_hi, + int32_t *hist1, int *offset, int32_t *hist2, + const int32_t *filter_coeff, intptr_t npcmblocks) +{ + LOCAL_ALIGNED(32, int32_t, input, [64]); + int i, j; + + if (!subband_samples_hi) + memset(&input[32], 0, sizeof(input[0]) * 32); + + for (j = 0; j < npcmblocks; j++) { + // Load in one sample from each subband + if (subband_samples_hi) { + // Full 64 subbands, first 32 are residual coded + for (i = 0; i < 32; i++) + input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j]; + for (i = 32; i < 64; i++) + input[i] = subband_samples_hi[i][j]; + } else { + // Only first 32 subbands + for (i = 0; i < 32; i++) + input[i] = subband_samples_lo[i][j]; + } + + // One subband sample generates 64 interpolated ones + synth->synth_filter_fixed_64(imdct, hist1, offset, + hist2, filter_coeff, + pcm_samples, input); + pcm_samples += 64; + } +} + +static void decor_c(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] += src[i] * coeff + (1 << 2) >> 3; +} + +static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2, + const int32_t *src, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) { + int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */); + dst1[i] -= cs; + dst2[i] -= cs; + } +} + +static void dmix_sub_c(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] -= mul15(src[i], coeff); +} + +static void dmix_add_c(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] += mul15(src[i], coeff); +} + +static void dmix_scale_c(int32_t *dst, intptr_t scale, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] = mul15(dst[i], scale); +} + +static void dmix_scale_inv_c(int32_t *dst, intptr_t scale_inv, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] = mul16(dst[i], scale_inv); +} + +static void filter0(int32_t *dst, const int32_t *src, int32_t coeff, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] -= mul22(src[i], coeff); +} + +static void filter1(int32_t *dst, const int32_t *src, int32_t coeff, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] -= mul23(src[i], coeff); +} + +static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1, + const int32_t *coeff, intptr_t len) +{ + int i; + + filter0(src0, src1, coeff[0], len); + filter0(src1, src0, coeff[1], len); + filter0(src0, src1, coeff[2], len); + filter0(src1, src0, coeff[3], len); + + for (i = 0; i < 8; i++, src0--) { + filter1(src0, src1, coeff[i + 4], len); + filter1(src1, src0, coeff[i + 12], len); + filter1(src0, src1, coeff[i + 4], len); + } + + for (i = 0; i < len; i++) { + *dst++ = *src1++; + *dst++ = *++src0; + } +} + +av_cold void ff_dcadsp_init(DCADSPContext *s) +{ + s->decode_hf = decode_hf_c; + s->decode_joint = decode_joint_c; + + s->lfe_fir_float[0] = lfe_fir1_float_c; + s->lfe_fir_float[1] = lfe_fir2_float_c; + s->lfe_x96_float = lfe_x96_float_c; + s->sub_qmf_float[0] = sub_qmf32_float_c; + s->sub_qmf_float[1] = sub_qmf64_float_c; + + s->lfe_fir_fixed = lfe_fir_fixed_c; + s->lfe_x96_fixed = lfe_x96_fixed_c; + s->sub_qmf_fixed[0] = sub_qmf32_fixed_c; + s->sub_qmf_fixed[1] = sub_qmf64_fixed_c; + + s->decor = decor_c; + + s->dmix_sub_xch = dmix_sub_xch_c; + s->dmix_sub = dmix_sub_c; + s->dmix_add = dmix_add_c; + s->dmix_scale = dmix_scale_c; + s->dmix_scale_inv = dmix_scale_inv_c; + + s->assemble_freq_bands = assemble_freq_bands_c; +} diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h new file mode 100644 index 0000000..d8acf37 --- /dev/null +++ b/libavcodec/dcadsp.h @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2016 foo86 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DCADSP_H +#define AVCODEC_DCADSP_H + +#include "libavutil/common.h" + +#include "fft.h" +#include "dcadct.h" +#include "synth_filter.h" + +typedef struct DCADSPContext { + void (*decode_hf)(int32_t **dst, + const int32_t *vq_index, + const int8_t hf_vq[1024][32], + int32_t scale_factors[32][2], + intptr_t sb_start, intptr_t sb_end, + intptr_t ofs, intptr_t len); + + void (*decode_joint)(int32_t **dst, int32_t **src, + const int32_t *scale_factors, + intptr_t sb_start, intptr_t sb_end, + intptr_t ofs, intptr_t len); + + void (*lfe_fir_float[2])(float *pcm_samples, int32_t *lfe_samples, + const float *filter_coeff, intptr_t npcmblocks); + + void (*lfe_x96_float)(float *dst, const float *src, + float *hist, intptr_t len); + + void (*sub_qmf_float[2])(SynthFilterContext *synth, + FFTContext *imdct, + float *pcm_samples, + int32_t **subband_samples_lo, + int32_t **subband_samples_hi, + float *hist1, int *offset, float *hist2, + const float *filter_coeff, intptr_t npcmblocks, + float scale); + + void (*lfe_fir_fixed)(int32_t *pcm_samples, int32_t *lfe_samples, + const int32_t *filter_coeff, intptr_t npcmblocks); + + void (*lfe_x96_fixed)(int32_t *dst, const int32_t *src, + int32_t *hist, intptr_t len); + + void (*sub_qmf_fixed[2])(SynthFilterContext *synth, + DCADCTContext *imdct, + int32_t *pcm_samples, + int32_t **subband_samples_lo, + int32_t **subband_samples_hi, + int32_t *hist1, int *offset, int32_t *hist2, + const int32_t *filter_coeff, intptr_t npcmblocks); + + void (*decor)(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len); + + void (*dmix_sub_xch)(int32_t *dst1, int32_t *dst2, + const int32_t *src, intptr_t len); + + void (*dmix_sub)(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len); + + void (*dmix_add)(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len); + + void (*dmix_scale)(int32_t *dst, intptr_t scale, intptr_t len); + + void (*dmix_scale_inv)(int32_t *dst, intptr_t scale_inv, intptr_t len); + + void (*assemble_freq_bands)(int32_t *dst, int32_t *src0, int32_t *src1, + const int32_t *coeff, intptr_t len); +} DCADSPContext; + +av_cold void ff_dcadsp_init(DCADSPContext *s); + +#endif -- 2.1.4 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel