Signed-off-by: Paul B Mahol <one...@gmail.com> --- libavcodec/Makefile | 2 +- libavcodec/takdec.c | 44 +++++++++------------ libavcodec/takdsp.c | 82 ++++++++++++++++++++++++++++++++++++++ libavcodec/takdsp.h | 34 ++++++++++++++++ libavcodec/x86/Makefile | 2 + libavcodec/x86/takdsp.asm | 94 ++++++++++++++++++++++++++++++++++++++++++++ libavcodec/x86/takdsp_init.c | 45 +++++++++++++++++++++ 7 files changed, 277 insertions(+), 26 deletions(-) create mode 100644 libavcodec/takdsp.c create mode 100644 libavcodec/takdsp.h create mode 100644 libavcodec/x86/takdsp.asm create mode 100644 libavcodec/x86/takdsp_init.c
diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 153c3f8..dcd3828 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -490,7 +490,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER) += svq1enc.o svq1.o \ h263.o ituh263enc.o OBJS-$(CONFIG_SVQ3_DECODER) += svq3.o svq13.o mpegutils.o OBJS-$(CONFIG_TEXT_DECODER) += textdec.o ass.o -OBJS-$(CONFIG_TAK_DECODER) += takdec.o tak.o +OBJS-$(CONFIG_TAK_DECODER) += takdec.o tak.o takdsp.o OBJS-$(CONFIG_TARGA_DECODER) += targa.o OBJS-$(CONFIG_TARGA_ENCODER) += targaenc.o rle.o OBJS-$(CONFIG_TARGA_Y216_DECODER) += targa_y216dec.o diff --git a/libavcodec/takdec.c b/libavcodec/takdec.c index 5395596..e5c0723 100644 --- a/libavcodec/takdec.c +++ b/libavcodec/takdec.c @@ -28,6 +28,7 @@ #include "libavutil/internal.h" #include "libavutil/samplefmt.h" #include "tak.h" +#include "takdsp.h" #include "audiodsp.h" #include "thread.h" #include "avcodec.h" @@ -47,6 +48,7 @@ typedef struct MCDParam { typedef struct TAKDecContext { AVCodecContext *avctx; ///< parent AVCodecContext AudioDSPContext adsp; + TAKDSPContext tdsp; TAKStreamInfo ti; GetBitContext gb; ///< bitstream reader initialized to start at the current frame @@ -172,6 +174,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx) TAKDecContext *s = avctx->priv_data; ff_audiodsp_init(&s->adsp); + ff_takdsp_init(&s->tdsp); s->avctx = avctx; avctx->bits_per_raw_sample = avctx->bits_per_coded_sample; @@ -541,46 +544,32 @@ static int decode_channel(TAKDecContext *s, int chan) static int decorrelate(TAKDecContext *s, int c1, int c2, int length) { GetBitContext *gb = &s->gb; - int32_t *p1 = s->decoded[c1] + 1; - int32_t *p2 = s->decoded[c2] + 1; + int32_t *p1 = s->decoded[c1] + (s->dmode > 5); + int32_t *p2 = s->decoded[c2] + (s->dmode > 5); + int32_t bp1 = p1[0]; + int32_t bp2 = p2[0]; int i; int dshift, dfactor; + length += s->dmode < 6; + switch (s->dmode) { case 1: /* left/side */ - for (i = 0; i < length; i++) { - int32_t a = p1[i]; - int32_t b = p2[i]; - p2[i] = a + b; - } + s->tdsp.decorrelate_ls(p1, p2, length); break; case 2: /* side/right */ - for (i = 0; i < length; i++) { - int32_t a = p1[i]; - int32_t b = p2[i]; - p1[i] = b - a; - } + s->tdsp.decorrelate_sr(p1, p2, length); break; case 3: /* side/mid */ - for (i = 0; i < length; i++) { - int32_t a = p1[i]; - int32_t b = p2[i]; - a -= b >> 1; - p1[i] = a; - p2[i] = a + b; - } + s->tdsp.decorrelate_sm(p1, p2, length); break; case 4: /* side/left with scale factor */ FFSWAP(int32_t*, p1, p2); + FFSWAP(int32_t, bp1, bp2); case 5: /* side/right with scale factor */ dshift = get_bits_esc4(gb); dfactor = get_sbits(gb, 10); - for (i = 0; i < length; i++) { - int32_t a = p1[i]; - int32_t b = p2[i]; - b = dfactor * (b >> dshift) + 128 >> 8 << dshift; - p1[i] = b - a; - } + s->tdsp.decorrelate_sf(p1, p2, length, dshift, dfactor); break; case 6: FFSWAP(int32_t*, p1, p2); @@ -664,6 +653,11 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length) } } + if (s->dmode > 0 && s->dmode < 6) { + p1[0] = bp1; + p2[0] = bp2; + } + return 0; } diff --git a/libavcodec/takdsp.c b/libavcodec/takdsp.c new file mode 100644 index 0000000..2441c2b --- /dev/null +++ b/libavcodec/takdsp.c @@ -0,0 +1,82 @@ +/* + * TAK decoder + * Copyright (c) 2015 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "takdsp.h" +#include "config.h" + +static void decorrelate_ls(int32_t *p1, int32_t *p2, int length) +{ + int i; + + for (i = 0; i < length; i++) { + int32_t a = p1[i]; + int32_t b = p2[i]; + p2[i] = a + b; + } +} + +static void decorrelate_sr(int32_t *p1, int32_t *p2, int length) +{ + int i; + + for (i = 0; i < length; i++) { + int32_t a = p1[i]; + int32_t b = p2[i]; + p1[i] = b - a; + } +} + +static void decorrelate_sm(int32_t *p1, int32_t *p2, int length) +{ + int i; + + for (i = 0; i < length; i++) { + int32_t a = p1[i]; + int32_t b = p2[i]; + a -= b >> 1; + p1[i] = a; + p2[i] = a + b; + } +} + +static void decorrelate_sf(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor) +{ + int i; + + for (i = 0; i < length; i++) { + int32_t a = p1[i]; + int32_t b = p2[i]; + b = dfactor * (b >> dshift) + 128 >> 8 << dshift; + p1[i] = b - a; + } +} + +av_cold void ff_takdsp_init(TAKDSPContext *c) +{ + c->decorrelate_ls = decorrelate_ls; + c->decorrelate_sr = decorrelate_sr; + c->decorrelate_sm = decorrelate_sm; + c->decorrelate_sf = decorrelate_sf; + + if (ARCH_X86) + ff_takdsp_init_x86(c); +} diff --git a/libavcodec/takdsp.h b/libavcodec/takdsp.h new file mode 100644 index 0000000..c05b574 --- /dev/null +++ b/libavcodec/takdsp.h @@ -0,0 +1,34 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_TAKDSP_H +#define AVCODEC_TAKDSP_H + +#include <stdint.h> + +typedef struct TAKDSPContext { + void (*decorrelate_ls)(int32_t *p1, int32_t *p2, int length); + void (*decorrelate_sr)(int32_t *p1, int32_t *p2, int length); + void (*decorrelate_sm)(int32_t *p1, int32_t *p2, int length); + void (*decorrelate_sf)(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor); +} TAKDSPContext; + +void ff_takdsp_init(TAKDSPContext *c); +void ff_takdsp_init_x86(TAKDSPContext *c); + +#endif /* AVCODEC_TAKDSP_H */ diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 5ff3a77..7d6ce8a 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -55,6 +55,7 @@ OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp_init.o OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o +OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o @@ -150,6 +151,7 @@ YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o +YASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o diff --git a/libavcodec/x86/takdsp.asm b/libavcodec/x86/takdsp.asm new file mode 100644 index 0000000..0158d4d --- /dev/null +++ b/libavcodec/x86/takdsp.asm @@ -0,0 +1,94 @@ +;****************************************************************************** +;* TAK DSP SIMD optimizations +;* +;* Copyright (C) 2015 Paul B Mahol +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +pd_128: dd 128 + +SECTION .text + +INIT_XMM sse2 +cglobal tak_decorrelate_ls, 3, 3, 2, p1, p2, length + .loop: + mova m0, [p1q] + mova m1, [p2q] + paddd m0, m1 + mova [p2q], m0 + add p1q, mmsize + add p2q, mmsize + sub lengthd, mmsize/4 + jg .loop + REP_RET + +cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length + .loop: + mova m0, [p1q] + mova m1, [p2q] + psubd m1, m0 + mova [p1q], m0 + add p1q, mmsize + add p2q, mmsize + sub lengthd, mmsize/4 + jg .loop + REP_RET + +cglobal tak_decorrelate_sm, 3, 3, 3, p1, p2, length + .loop: + mova m0, [p1q] + mova m1, [p2q] + mova m2, m1 + psrld m2, 1 + psubd m0, m2 + paddd m1, m0 + mova [p1q], m0 + mova [p2q], m1 + add p1q, mmsize + add p2q, mmsize + sub lengthd, mmsize/4 + jg .loop + REP_RET + +INIT_XMM sse4 +cglobal tak_decorrelate_sf, 5, 5, 5, p1, p2, length, dshift, dfactor + movd m2, dshiftm + movd m3, dfactorm + pshufd m3, m3, 0 + movd m4, [pd_128] + pshufd m4, m4, 0 + + .loop: + mova m0, [p1q] + mova m1, [p2q] + psrld m1, m2 + pmulld m1, m3 + paddd m1, m4 + psrld m1, 8 + pslld m1, m2 + psubd m1, m0 + mova [p1q], m1 + add p1q, mmsize + add p2q, mmsize + sub lengthd, mmsize/4 + jg .loop + REP_RET diff --git a/libavcodec/x86/takdsp_init.c b/libavcodec/x86/takdsp_init.c new file mode 100644 index 0000000..555d064 --- /dev/null +++ b/libavcodec/x86/takdsp_init.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2015 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/takdsp.h" +#include "libavutil/x86/cpu.h" +#include "config.h" + +void ff_tak_decorrelate_ls_sse2(int32_t *p1, int32_t *p2, int length); +void ff_tak_decorrelate_sr_sse2(int32_t *p1, int32_t *p2, int length); +void ff_tak_decorrelate_sm_sse2(int32_t *p1, int32_t *p2, int length); +void ff_tak_decorrelate_sf_sse4(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor); + +av_cold void ff_takdsp_init_x86(TAKDSPContext *c) +{ +#if HAVE_YASM + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_SSE2(cpu_flags)) { + c->decorrelate_ls = ff_tak_decorrelate_ls_sse2; + c->decorrelate_sr = ff_tak_decorrelate_sr_sse2; + c->decorrelate_sm = ff_tak_decorrelate_sm_sse2; + } + + if (EXTERNAL_SSE4(cpu_flags)) { + c->decorrelate_sf = ff_tak_decorrelate_sf_sse4; + } +#endif +} -- 1.9.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel