Signed-off-by: James Almer <jamr...@gmail.com> --- libavcodec/Makefile | 2 +- libavcodec/ttadsp.c | 41 ++++++++++++++++++++++++++++++++++++----- libavcodec/ttadsp.h | 3 +++ libavcodec/ttaenc.c | 38 ++++++-------------------------------- libavcodec/x86/Makefile | 2 ++ libavcodec/x86/ttadsp.asm | 24 ++++++++++++++++-------- libavcodec/x86/ttadsp_init.c | 25 +++++++++++++++++++------ 7 files changed, 83 insertions(+), 52 deletions(-)
diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 33ac2b3..4355c13 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -552,7 +552,7 @@ OBJS-$(CONFIG_TRUESPEECH_DECODER) += truespeech.o OBJS-$(CONFIG_TSCC_DECODER) += tscc.o msrledec.o OBJS-$(CONFIG_TSCC2_DECODER) += tscc2.o OBJS-$(CONFIG_TTA_DECODER) += tta.o ttadata.o ttadsp.o -OBJS-$(CONFIG_TTA_ENCODER) += ttaenc.o ttadata.o +OBJS-$(CONFIG_TTA_ENCODER) += ttaenc.o ttadata.o ttadsp.o OBJS-$(CONFIG_TWINVQ_DECODER) += twinvqdec.o twinvq.o OBJS-$(CONFIG_TXD_DECODER) += txd.o OBJS-$(CONFIG_ULTI_DECODER) += ulti.o diff --git a/libavcodec/ttadsp.c b/libavcodec/ttadsp.c index 30b7ab9..32a87b2 100644 --- a/libavcodec/ttadsp.c +++ b/libavcodec/ttadsp.c @@ -18,9 +18,10 @@ #include "ttadsp.h" -static void ttafilter_process_dec_c(int32_t *qm, int32_t *dx, int32_t *dl, - int32_t *error, int32_t *in, int32_t shift, - int32_t round) { +static inline void ttafilter_process(int32_t *qm, int32_t *dx, int32_t *dl, + int32_t *error, int32_t *in, int32_t shift, + int32_t round, int enc) +{ if (*error < 0) { qm[0] -= dx[0]; qm[1] -= dx[1]; qm[2] -= dx[2]; qm[3] -= dx[3]; qm[4] -= dx[4]; qm[5] -= dx[5]; qm[6] -= dx[6]; qm[7] -= dx[7]; @@ -40,17 +41,47 @@ static void ttafilter_process_dec_c(int32_t *qm, int32_t *dx, int32_t *dl, dx[6] = ((dl[6] >> 30) | 2) & ~1; dx[7] = ((dl[7] >> 30) | 4) & ~3; - *error = *in; - *in += (round >> shift); + if (!enc) { + *error = *in; + *in += (round >> shift); + } dl[4] = -dl[5]; dl[5] = -dl[6]; dl[6] = *in - dl[7]; dl[7] = *in; dl[5] += dl[6]; dl[4] += dl[5]; + + if (enc) { + *in -= (round >> shift); + *error = *in; + } +} + +#if CONFIG_TTA_DECODER +static void ttafilter_process_dec_c(int32_t *qm, int32_t *dx, int32_t *dl, + int32_t *error, int32_t *in, int32_t shift, + int32_t round) +{ + ttafilter_process(qm, dx, dl, error, in, shift, round, 0); +} +#endif + +#if CONFIG_TTA_ENCODER +static void ttafilter_process_enc_c(int32_t *qm, int32_t *dx, int32_t *dl, + int32_t *error, int32_t *in, int32_t shift, + int32_t round) +{ + ttafilter_process(qm, dx, dl, error, in, shift, round, 1); } +#endif av_cold void ff_ttadsp_init(TTADSPContext *c) { +#if CONFIG_TTA_DECODER c->ttafilter_process_dec = ttafilter_process_dec_c; +#endif +#if CONFIG_TTA_ENCODER + c->ttafilter_process_enc = ttafilter_process_enc_c; +#endif if (ARCH_X86) ff_ttadsp_init_x86(c); diff --git a/libavcodec/ttadsp.h b/libavcodec/ttadsp.h index 56930f1..df73998 100644 --- a/libavcodec/ttadsp.h +++ b/libavcodec/ttadsp.h @@ -26,6 +26,9 @@ typedef struct TTADSPContext { void (*ttafilter_process_dec)(int32_t *qm, int32_t *dx, int32_t *dl, int32_t *error, int32_t *in, int32_t shift, int32_t round); + void (*ttafilter_process_enc)(int32_t *qm, int32_t *dx, int32_t *dl, + int32_t *error, int32_t *in, int32_t shift, + int32_t round); } TTADSPContext; void ff_ttadsp_init(TTADSPContext *c); diff --git a/libavcodec/ttaenc.c b/libavcodec/ttaenc.c index 2f1c8db..5ccf98b 100644 --- a/libavcodec/ttaenc.c +++ b/libavcodec/ttaenc.c @@ -20,6 +20,7 @@ #define BITSTREAM_WRITER_LE #include "ttadata.h" +#include "ttadsp.h" #include "avcodec.h" #include "put_bits.h" #include "internal.h" @@ -29,6 +30,7 @@ typedef struct TTAEncContext { const AVCRC *crc_table; int bps; TTAChannel *ch_ctx; + TTADSPContext dsp; } TTAEncContext; static av_cold int tta_encode_init(AVCodecContext *avctx) @@ -57,38 +59,9 @@ static av_cold int tta_encode_init(AVCodecContext *avctx) if (!s->ch_ctx) return AVERROR(ENOMEM); - return 0; -} - -static inline void ttafilter_process(TTAFilter *c, int32_t *in) -{ - register int32_t *dl = c->dl, *qm = c->qm, *dx = c->dx, sum = c->round; - - if (c->error < 0) { - qm[0] -= dx[0]; qm[1] -= dx[1]; qm[2] -= dx[2]; qm[3] -= dx[3]; - qm[4] -= dx[4]; qm[5] -= dx[5]; qm[6] -= dx[6]; qm[7] -= dx[7]; - } else if (c->error > 0) { - qm[0] += dx[0]; qm[1] += dx[1]; qm[2] += dx[2]; qm[3] += dx[3]; - qm[4] += dx[4]; qm[5] += dx[5]; qm[6] += dx[6]; qm[7] += dx[7]; - } + ff_ttadsp_init(&s->dsp); - sum += dl[0] * qm[0] + dl[1] * qm[1] + dl[2] * qm[2] + dl[3] * qm[3] + - dl[4] * qm[4] + dl[5] * qm[5] + dl[6] * qm[6] + dl[7] * qm[7]; - - dx[0] = dx[1]; dx[1] = dx[2]; dx[2] = dx[3]; dx[3] = dx[4]; - dl[0] = dl[1]; dl[1] = dl[2]; dl[2] = dl[3]; dl[3] = dl[4]; - - dx[4] = ((dl[4] >> 30) | 1); - dx[5] = ((dl[5] >> 30) | 2) & ~1; - dx[6] = ((dl[6] >> 30) | 2) & ~1; - dx[7] = ((dl[7] >> 30) | 4) & ~3; - - dl[4] = -dl[5]; dl[5] = -dl[6]; - dl[6] = *in - dl[7]; dl[7] = *in; - dl[5] += dl[6]; dl[4] += dl[5]; - - *in -= (sum >> c->shift); - c->error = *in; + return 0; } static int32_t get_sample(const AVFrame *frame, int sample, @@ -155,7 +128,8 @@ pkt_alloc: } c->predictor = temp; - ttafilter_process(filter, &value); + s->dsp.ttafilter_process_enc(filter->qm, filter->dx, filter->dl, &filter->error, &value, + filter->shift, filter->round); outval = (value > 0) ? (value << 1) - 1: -value << 1; k = rice->k0; diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 839b5bc..cc2b3c4 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -61,6 +61,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o +OBJS-$(CONFIG_TTA_ENCODER) += x86/ttadsp_init.o OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o @@ -160,6 +161,7 @@ YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o YASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o +YASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttadsp.o YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o YASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o diff --git a/libavcodec/x86/ttadsp.asm b/libavcodec/x86/ttadsp.asm index 8f48949..1c664f2 100644 --- a/libavcodec/x86/ttadsp.asm +++ b/libavcodec/x86/ttadsp.asm @@ -29,9 +29,9 @@ pd_1224: dd 1, 2, 2, 4 SECTION .text -%macro TTA_FILTER 2 +%macro TTA_FILTER 3 INIT_XMM %1 -cglobal ttafilter_process_dec, 5,5,%2, qm, dx, dl, error, in, shift, round +cglobal ttafilter_process_%2, 5,5,%3, qm, dx, dl, error, in, shift, round mova m2, [qmq ] mova m3, [qmq + 0x10] mova m4, [dxq ] @@ -94,13 +94,19 @@ cglobal ttafilter_process_dec, 5,5,%2, qm, dx, dl, error, in, shift, round mova [dlq ], m2 mova [dxq ], m5 mova [dxq + 0x10], m4 - movd m0, [inq] ; filter->error = *in; - movd [errorq], m0 ; - movd m2, shiftm ; *in += (sum >> filter->shift); + movd m2, shiftm ; + movd m0, [inq] psrad m6, m2 ; - paddd m0, m6 ; +%ifidn %2, dec + movd [errorq], m0 ; filter->error = *in; + paddd m0, m6 ; *in += (sum >> filter->shift); movd [inq], m0 ; +%else + psubd m3, m0, m6 ; + movd [inq], m3 ; *in -= (sum >> filter->shift); + movd [errorq], m3 ; filter->error = *in; +%endif psrldq m1, 4 ; pslldq m0, 12 ; filter->dl[4] = -filter->dl[5]; @@ -115,5 +121,7 @@ cglobal ttafilter_process_dec, 5,5,%2, qm, dx, dl, error, in, shift, round RET %endmacro -TTA_FILTER ssse3, 8 -TTA_FILTER sse4, 7 +TTA_FILTER ssse3, dec, 8 +TTA_FILTER sse4, dec, 7 +TTA_FILTER ssse3, enc, 8 +TTA_FILTER sse4, enc, 7 diff --git a/libavcodec/x86/ttadsp_init.c b/libavcodec/x86/ttadsp_init.c index 47dc87f..75c444c 100644 --- a/libavcodec/x86/ttadsp_init.c +++ b/libavcodec/x86/ttadsp_init.c @@ -22,21 +22,34 @@ #include "libavutil/x86/cpu.h" #include "config.h" -void ff_ttafilter_process_dec_ssse3(int32_t *qm, int32_t *dx, int32_t *dl, - int32_t *error, int32_t *in, int32_t shift, - int32_t round); -void ff_ttafilter_process_dec_sse4(int32_t *qm, int32_t *dx, int32_t *dl, - int32_t *error, int32_t *in, int32_t shift, - int32_t round); +#define TTAFILTER_PROCESS(opt) \ +void ff_ttafilter_process_dec_##opt(int32_t *qm, int32_t *dx, int32_t *dl, \ + int32_t *error, int32_t *in, int32_t shift, \ + int32_t round); \ +void ff_ttafilter_process_enc_##opt(int32_t *qm, int32_t *dx, int32_t *dl, \ + int32_t *error, int32_t *in, int32_t shift, \ + int32_t round) + +TTAFILTER_PROCESS(ssse3); +TTAFILTER_PROCESS(sse4); av_cold void ff_ttadsp_init_x86(TTADSPContext *c) { #if HAVE_YASM int cpu_flags = av_get_cpu_flags(); +#if CONFIG_TTA_DECODER if (EXTERNAL_SSSE3(cpu_flags)) c->ttafilter_process_dec = ff_ttafilter_process_dec_ssse3; if (EXTERNAL_SSE4(cpu_flags)) c->ttafilter_process_dec = ff_ttafilter_process_dec_sse4; #endif + +#if CONFIG_TTA_ENCODER + if (EXTERNAL_SSSE3(cpu_flags)) + c->ttafilter_process_enc = ff_ttafilter_process_enc_ssse3; + if (EXTERNAL_SSE4(cpu_flags)) + c->ttafilter_process_enc = ff_ttafilter_process_enc_sse4; +#endif +#endif } -- 2.9.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel