This allows to remove a dependency of the dirac decoder (!) on mpegvideoenc.
Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- configure | 7 +- libavcodec/Makefile | 1 + libavcodec/diracdec.c | 14 +-- libavcodec/drawedgesdsp.c | 61 ++++++++++ libavcodec/drawedgesdsp.h | 35 ++++++ libavcodec/mpegvideo.h | 2 + libavcodec/mpegvideo_enc.c | 9 +- libavcodec/mpegvideoencdsp.c | 32 ------ libavcodec/mpegvideoencdsp.h | 6 - libavcodec/snowenc.c | 13 ++- libavcodec/x86/Makefile | 1 + libavcodec/x86/drawedgesdsp.c | 157 ++++++++++++++++++++++++++ libavcodec/x86/mpegvideo.c | 2 +- libavcodec/x86/mpegvideoencdsp_init.c | 118 ------------------- 14 files changed, 281 insertions(+), 177 deletions(-) create mode 100644 libavcodec/drawedgesdsp.c create mode 100644 libavcodec/drawedgesdsp.h create mode 100644 libavcodec/x86/drawedgesdsp.c diff --git a/configure b/configure index bea4547e20..edb1ddca33 100755 --- a/configure +++ b/configure @@ -2552,6 +2552,7 @@ CONFIG_EXTRA=" dnn dovi_rpudec dovi_rpuenc + drawedgesdsp dvprofile evcparse exif @@ -2868,7 +2869,7 @@ mpeg_er_select="error_resilience" mpegaudio_select="mpegaudiodsp mpegaudioheader" mpegvideo_select="blockdsp hpeldsp idctdsp videodsp" mpegvideodec_select="h264chroma mpegvideo mpeg_er" -mpegvideoenc_select="aandcttables fdctdsp me_cmp mpegvideo pixblockdsp" +mpegvideoenc_select="aandcttables drawedgesdsp fdctdsp me_cmp mpegvideo pixblockdsp" msmpeg4dec_select="h263_decoder" msmpeg4enc_select="h263_encoder" vc1dsp_select="h264chroma qpeldsp startcode" @@ -2918,7 +2919,7 @@ cook_decoder_select="audiodsp sinewin" cri_decoder_select="mjpeg_decoder" cscd_decoder_suggest="zlib" dds_decoder_select="texturedsp" -dirac_decoder_select="dirac_parse dwt golomb mpegvideoenc qpeldsp videodsp" +dirac_decoder_select="dirac_parse drawedgesdsp dwt golomb qpeldsp videodsp" dnxhd_decoder_select="blockdsp idctdsp" dnxhd_encoder_select="blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp videodsp" dvvideo_decoder_select="dvprofile idctdsp" @@ -3060,7 +3061,7 @@ shorten_decoder_select="bswapdsp" sipr_decoder_select="lsp" smvjpeg_decoder_select="mjpeg_decoder" snow_decoder_select="dwt h264qpel rangecoder videodsp" -snow_encoder_select="dwt h264qpel hpeldsp me_cmp mpegvideoenc rangecoder videodsp" +snow_encoder_select="drawedgesdsp dwt h264qpel hpeldsp me_cmp mpegvideoenc rangecoder videodsp" sonic_decoder_select="golomb rangecoder" sonic_encoder_select="golomb rangecoder" sonic_ls_encoder_select="golomb rangecoder" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 3e8a44e89c..dff6193bc2 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -89,6 +89,7 @@ OBJS-$(CONFIG_CBS_VP9) += cbs_vp9.o OBJS-$(CONFIG_DEFLATE_WRAPPER) += zlib_wrapper.o OBJS-$(CONFIG_DOVI_RPUDEC) += dovi_rpu.o dovi_rpudec.o OBJS-$(CONFIG_DOVI_RPUENC) += dovi_rpu.o dovi_rpuenc.o +OBJS-$(CONFIG_DRAWEDGESDSP) += drawedgesdsp.o OBJS-$(CONFIG_ERROR_RESILIENCE) += error_resilience.o OBJS-$(CONFIG_EVCPARSE) += evc_parse.o evc_ps.o OBJS-$(CONFIG_EXIF) += exif.o tiff_common.o diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c index f1fde0b339..f0df74e131 100644 --- a/libavcodec/diracdec.c +++ b/libavcodec/diracdec.c @@ -34,11 +34,11 @@ #include "get_bits.h" #include "codec_internal.h" #include "decode.h" +#include "drawedgesdsp.h" #include "golomb.h" #include "dirac_arith.h" #include "dirac_vlc.h" #include "mpegpicture.h" -#include "mpegvideoencdsp.h" #include "dirac_dwt.h" #include "dirac.h" #include "diractab.h" @@ -135,7 +135,7 @@ typedef struct DiracSlice { typedef struct DiracContext { AVCodecContext *avctx; - MpegvideoEncDSPContext mpvencdsp; + DrawEdgesDSPContext drawedges; VideoDSPContext vdsp; DiracDSPContext diracdsp; DiracVersionInfo version; @@ -397,7 +397,7 @@ static av_cold int dirac_decode_init(AVCodecContext *avctx) s->thread_buf_size = -1; ff_diracdsp_init(&s->diracdsp); - ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); + ff_drawedgesdsp_init(&s->drawedges); ff_videodsp_init(&s->vdsp, 8); for (i = 0; i < MAX_FRAMES; i++) { @@ -1836,7 +1836,7 @@ static int interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int int i, edge = EDGE_WIDTH/2; ref->hpel[plane][0] = ref->avframe->data[plane]; - s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */ + s->drawedges.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */ /* no need for hpel if we only have fpel vectors */ if (!s->mv_precision) @@ -1856,9 +1856,9 @@ static int interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2], ref->hpel[plane][3], ref->hpel[plane][0], ref->avframe->linesize[plane], width, height); - s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); - s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); - s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); + s->drawedges.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); + s->drawedges.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); + s->drawedges.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); } ref->interpolated[plane] = 1; diff --git a/libavcodec/drawedgesdsp.c b/libavcodec/drawedgesdsp.c new file mode 100644 index 0000000000..3306bb9f6d --- /dev/null +++ b/libavcodec/drawedgesdsp.c @@ -0,0 +1,61 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> +#include <string.h> + +#include "config.h" + +#include "drawedgesdsp.h" +#include "libavutil/attributes.h" + +/* draw the edges of width 'w' of an image of size width, height */ +// FIXME: Check that this is OK for MPEG-4 interlaced. +static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides) +{ + uint8_t *ptr = buf, *last_line; + + /* left and right */ + for (int i = 0; i < height; i++) { + memset(ptr - w, ptr[0], w); + memset(ptr + width, ptr[width - 1], w); + ptr += wrap; + } + + /* top and bottom + corners */ + buf -= w; + last_line = buf + (height - 1) * wrap; + if (sides & EDGE_TOP) + for (int i = 0; i < h; i++) + // top + memcpy(buf - (i + 1) * wrap, buf, width + w + w); + if (sides & EDGE_BOTTOM) + for (int i = 0; i < h; i++) + // bottom + memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); +} + +av_cold void ff_drawedgesdsp_init(DrawEdgesDSPContext *c) +{ + c->draw_edges = draw_edges_c; + +#if ARCH_X86 + ff_drawedgesdsp_init_x86(c); +#endif +} diff --git a/libavcodec/drawedgesdsp.h b/libavcodec/drawedgesdsp.h new file mode 100644 index 0000000000..a57275809c --- /dev/null +++ b/libavcodec/drawedgesdsp.h @@ -0,0 +1,35 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DRAWEDGESDSP_H +#define AVCODEC_DRAWEDGESDSP_H + +#include <stdint.h> + +#define EDGE_TOP 1 +#define EDGE_BOTTOM 2 + +typedef struct DrawEdgesDSPContext { + void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides); +} DrawEdgesDSPContext; + +void ff_drawedgesdsp_init(DrawEdgesDSPContext *c); +void ff_drawedgesdsp_init_x86(DrawEdgesDSPContext *c); + +#endif /* AVCODEC_DRAWEDGESDSP_H */ diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index 215df0fd5b..4635172732 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -29,6 +29,7 @@ #define AVCODEC_MPEGVIDEO_H #include "blockdsp.h" +#include "drawedgesdsp.h" #include "error_resilience.h" #include "fdctdsp.h" #include "get_bits.h" @@ -222,6 +223,7 @@ typedef struct MpegEncContext { HpelDSPContext hdsp; IDCTDSPContext idsp; MECmpContext mecc; + DrawEdgesDSPContext drawedges; MpegvideoEncDSPContext mpvencdsp; PixblockDSPContext pdsp; QpelDSPContext qdsp; diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c index 2a75973ac4..ab14538b33 100644 --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@ -810,6 +810,7 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx) ff_fdctdsp_init(&s->fdsp, avctx); ff_me_cmp_init(&s->mecc, avctx); ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); + ff_drawedgesdsp_init(&s->drawedges); ff_pixblockdsp_init(&s->pdsp, avctx); if (!(avctx->stats_out = av_mallocz(256)) || @@ -1224,7 +1225,7 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg) } } if ((s->width & 15) || (s->height & (vpad-1))) { - s->mpvencdsp.draw_edges(dst, dst_stride, + s->drawedges.draw_edges(dst, dst_stride, w, h, 16 >> h_shift, vpad >> v_shift, @@ -1655,19 +1656,19 @@ static void frame_end(MpegEncContext *s) !s->intra_only) { int hshift = s->chroma_x_shift; int vshift = s->chroma_y_shift; - s->mpvencdsp.draw_edges(s->current_picture.f->data[0], + s->drawedges.draw_edges(s->current_picture.f->data[0], s->current_picture.f->linesize[0], s->h_edge_pos, s->v_edge_pos, EDGE_WIDTH, EDGE_WIDTH, EDGE_TOP | EDGE_BOTTOM); - s->mpvencdsp.draw_edges(s->current_picture.f->data[1], + s->drawedges.draw_edges(s->current_picture.f->data[1], s->current_picture.f->linesize[1], s->h_edge_pos >> hshift, s->v_edge_pos >> vshift, EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, EDGE_TOP | EDGE_BOTTOM); - s->mpvencdsp.draw_edges(s->current_picture.f->data[2], + s->drawedges.draw_edges(s->current_picture.f->data[2], s->current_picture.f->linesize[2], s->h_edge_pos >> hshift, s->v_edge_pos >> vshift, diff --git a/libavcodec/mpegvideoencdsp.c b/libavcodec/mpegvideoencdsp.c index 997d048663..a6de93456d 100644 --- a/libavcodec/mpegvideoencdsp.c +++ b/libavcodec/mpegvideoencdsp.c @@ -16,9 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <assert.h> #include <stdint.h> -#include <string.h> #include "config.h" #include "libavutil/avassert.h" @@ -114,34 +112,6 @@ static int pix_norm1_c(const uint8_t *pix, int line_size) return s; } -/* draw the edges of width 'w' of an image of size width, height */ -// FIXME: Check that this is OK for MPEG-4 interlaced. -static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides) -{ - uint8_t *ptr = buf, *last_line; - int i; - - /* left and right */ - for (i = 0; i < height; i++) { - memset(ptr - w, ptr[0], w); - memset(ptr + width, ptr[width - 1], w); - ptr += wrap; - } - - /* top and bottom + corners */ - buf -= w; - last_line = buf + (height - 1) * wrap; - if (sides & EDGE_TOP) - for (i = 0; i < h; i++) - // top - memcpy(buf - (i + 1) * wrap, buf, width + w + w); - if (sides & EDGE_BOTTOM) - for (i = 0; i < h; i++) - // bottom - memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); -} - /* 2x2 -> 1x1 */ static void shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, @@ -243,8 +213,6 @@ av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; - c->draw_edges = draw_edges_8_c; - #if ARCH_ARM ff_mpegvideoencdsp_init_arm(c, avctx); #elif ARCH_PPC diff --git a/libavcodec/mpegvideoencdsp.h b/libavcodec/mpegvideoencdsp.h index 95084679d9..46b40cf30e 100644 --- a/libavcodec/mpegvideoencdsp.h +++ b/libavcodec/mpegvideoencdsp.h @@ -26,9 +26,6 @@ #define BASIS_SHIFT 16 #define RECON_SHIFT 6 -#define EDGE_TOP 1 -#define EDGE_BOTTOM 2 - typedef struct MpegvideoEncDSPContext { int (*try_8x8basis)(const int16_t rem[64], const int16_t weight[64], const int16_t basis[64], int scale); @@ -39,9 +36,6 @@ typedef struct MpegvideoEncDSPContext { void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); - - void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides); } MpegvideoEncDSPContext; void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c index 43ca602762..ec708cebcc 100644 --- a/libavcodec/snowenc.c +++ b/libavcodec/snowenc.c @@ -27,6 +27,7 @@ #include "libavutil/pixdesc.h" #include "avcodec.h" #include "codec_internal.h" +#include "drawedgesdsp.h" #include "encode.h" #include "internal.h" //For AVCodecInternal.recon_frame #include "me_cmp.h" @@ -46,7 +47,7 @@ typedef struct SnowEncContext { SnowContext com; QpelDSPContext qdsp; - MpegvideoEncDSPContext mpvencdsp; + DrawEdgesDSPContext drawedges; int lambda; int lambda2; @@ -216,7 +217,7 @@ static av_cold int encode_init(AVCodecContext *avctx) mcf(12,12) ff_me_cmp_init(&enc->mecc, avctx); - ff_mpegvideoencdsp_init(&enc->mpvencdsp, avctx); + ff_drawedgesdsp_init(&enc->drawedges); ff_snow_alloc_blocks(s); @@ -1775,7 +1776,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, memcpy(&s->input_picture->data[i][y * s->input_picture->linesize[i]], &pict->data[i][y * pict->linesize[i]], AV_CEIL_RSHIFT(width, hshift)); - enc->mpvencdsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i], + enc->drawedges.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i], AV_CEIL_RSHIFT(width, hshift), AV_CEIL_RSHIFT(height, vshift), EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, EDGE_TOP | EDGE_BOTTOM); @@ -1815,14 +1816,14 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, int w = s->avctx->width; int h = s->avctx->height; - enc->mpvencdsp.draw_edges(s->current_picture->data[0], + enc->drawedges.draw_edges(s->current_picture->data[0], s->current_picture->linesize[0], w , h , EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM); if (s->current_picture->data[2]) { - enc->mpvencdsp.draw_edges(s->current_picture->data[1], + enc->drawedges.draw_edges(s->current_picture->data[1], s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift, EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); - enc->mpvencdsp.draw_edges(s->current_picture->data[2], + enc->drawedges.draw_edges(s->current_picture->data[2], s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift, EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); } diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 331183f450..2b6c6659fd 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -7,6 +7,7 @@ OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_init.o OBJS-$(CONFIG_BSWAPDSP) += x86/bswapdsp_init.o OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp_init.o \ x86/dirac_dwt_init.o +OBJS-$(CONFIG_DRAWEDGESDSP) += x86/drawedgesdsp.o OBJS-$(CONFIG_FDCTDSP) += x86/fdctdsp_init.o OBJS-$(CONFIG_FMTCONVERT) += x86/fmtconvert_init.o OBJS-$(CONFIG_H263DSP) += x86/h263dsp_init.o diff --git a/libavcodec/x86/drawedgesdsp.c b/libavcodec/x86/drawedgesdsp.c new file mode 100644 index 0000000000..1d059b3806 --- /dev/null +++ b/libavcodec/x86/drawedgesdsp.c @@ -0,0 +1,157 @@ +/* + * draw_edges by Michael Niedermayer <michae...@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "config.h" + +#include "libavcodec/drawedgesdsp.h" +#include "libavutil/attributes.h" +#include "libavutil/avassert.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" + +#if HAVE_INLINE_ASM + +/* Draw the edges of width 'w' of an image of size width, height + * this MMX version can only handle w == 8 || w == 16. */ +static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides) +{ + uint8_t *ptr, *last_line; + + last_line = buf + (height - 1) * wrap; + /* left and right */ + ptr = buf; + if (w == 8) { + __asm__ volatile ( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "punpckldq %%mm0, %%mm0 \n\t" + "movq %%mm0, -8(%0) \n\t" + "movq -8(%0, %2), %%mm1 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm1, (%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) wrap), "r" ((x86_reg) width), + "r" (ptr + wrap * height)); + } else if (w == 16) { + __asm__ volatile ( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "punpckldq %%mm0, %%mm0 \n\t" + "movq %%mm0, -8(%0) \n\t" + "movq %%mm0, -16(%0) \n\t" + "movq -8(%0, %2), %%mm1 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm1, (%0, %2) \n\t" + "movq %%mm1, 8(%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + "jb 1b \n\t" + : "+r"(ptr) + : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height) + ); + } else { + av_assert1(w == 4); + __asm__ volatile ( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "movd %%mm0, -4(%0) \n\t" + "movd -4(%0, %2), %%mm1 \n\t" + "punpcklbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movd %%mm1, (%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) wrap), "r" ((x86_reg) width), + "r" (ptr + wrap * height)); + } + + /* top and bottom (and hopefully also the corners) */ + if (sides & EDGE_TOP) { + for (int i = 0; i < h; i += 4) { + ptr = buf - (i + 1) * wrap - w; + __asm__ volatile ( + "1: \n\t" + "movq (%1, %0), %%mm0 \n\t" + "movq %%mm0, (%0) \n\t" + "movq %%mm0, (%0, %2) \n\t" + "movq %%mm0, (%0, %2, 2) \n\t" + "movq %%mm0, (%0, %3) \n\t" + "add $8, %0 \n\t" + "cmp %4, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) buf - (x86_reg) ptr - w), + "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3), + "r" (ptr + width + 2 * w)); + } + } + + if (sides & EDGE_BOTTOM) { + for (int i = 0; i < h; i += 4) { + ptr = last_line + (i + 1) * wrap - w; + __asm__ volatile ( + "1: \n\t" + "movq (%1, %0), %%mm0 \n\t" + "movq %%mm0, (%0) \n\t" + "movq %%mm0, (%0, %2) \n\t" + "movq %%mm0, (%0, %2, 2) \n\t" + "movq %%mm0, (%0, %3) \n\t" + "add $8, %0 \n\t" + "cmp %4, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) last_line - (x86_reg) ptr - w), + "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3), + "r" (ptr + width + 2 * w)); + } + } +} + +#endif + +av_cold void ff_drawedgesdsp_init_x86(DrawEdgesDSPContext *c) +{ +#if HAVE_INLINE_ASM + int cpu_flags = av_get_cpu_flags(); + + if (INLINE_MMX(cpu_flags)) + c->draw_edges = draw_edges_mmx; +#endif +} diff --git a/libavcodec/x86/mpegvideo.c b/libavcodec/x86/mpegvideo.c index 73967cafda..a4ab2bb308 100644 --- a/libavcodec/x86/mpegvideo.c +++ b/libavcodec/x86/mpegvideo.c @@ -1,6 +1,6 @@ /* * Optimized for ia32 CPUs by Nick Kurshev <nickol...@mail.ru> - * H.263, MPEG-1, MPEG-2 dequantizer & draw_edges by Michael Niedermayer <michae...@gmx.at> + * H.263, MPEG-1, MPEG-2 dequantizer by Michael Niedermayer <michae...@gmx.at> * * This file is part of FFmpeg. * diff --git a/libavcodec/x86/mpegvideoencdsp_init.c b/libavcodec/x86/mpegvideoencdsp_init.c index 9fa7ee4824..c816ca2ee7 100644 --- a/libavcodec/x86/mpegvideoencdsp_init.c +++ b/libavcodec/x86/mpegvideoencdsp_init.c @@ -17,7 +17,6 @@ */ #include "libavutil/attributes.h" -#include "libavutil/avassert.h" #include "libavutil/cpu.h" #include "libavutil/x86/cpu.h" #include "libavcodec/avcodec.h" @@ -94,121 +93,6 @@ int ff_pix_norm1_sse2(const uint8_t *pix, int line_size); #undef PMULHRW #undef PHADDD #endif /* HAVE_SSSE3_INLINE */ - -/* Draw the edges of width 'w' of an image of size width, height - * this MMX version can only handle w == 8 || w == 16. */ -static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides) -{ - uint8_t *ptr, *last_line; - int i; - - last_line = buf + (height - 1) * wrap; - /* left and right */ - ptr = buf; - if (w == 8) { - __asm__ volatile ( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "punpckldq %%mm0, %%mm0 \n\t" - "movq %%mm0, -8(%0) \n\t" - "movq -8(%0, %2), %%mm1 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movq %%mm1, (%0, %2) \n\t" - "add %1, %0 \n\t" - "cmp %3, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) wrap), "r" ((x86_reg) width), - "r" (ptr + wrap * height)); - } else if (w == 16) { - __asm__ volatile ( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "punpckldq %%mm0, %%mm0 \n\t" - "movq %%mm0, -8(%0) \n\t" - "movq %%mm0, -16(%0) \n\t" - "movq -8(%0, %2), %%mm1 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movq %%mm1, (%0, %2) \n\t" - "movq %%mm1, 8(%0, %2) \n\t" - "add %1, %0 \n\t" - "cmp %3, %0 \n\t" - "jb 1b \n\t" - : "+r"(ptr) - : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height) - ); - } else { - av_assert1(w == 4); - __asm__ volatile ( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "movd %%mm0, -4(%0) \n\t" - "movd -4(%0, %2), %%mm1 \n\t" - "punpcklbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movd %%mm1, (%0, %2) \n\t" - "add %1, %0 \n\t" - "cmp %3, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) wrap), "r" ((x86_reg) width), - "r" (ptr + wrap * height)); - } - - /* top and bottom (and hopefully also the corners) */ - if (sides & EDGE_TOP) { - for (i = 0; i < h; i += 4) { - ptr = buf - (i + 1) * wrap - w; - __asm__ volatile ( - "1: \n\t" - "movq (%1, %0), %%mm0 \n\t" - "movq %%mm0, (%0) \n\t" - "movq %%mm0, (%0, %2) \n\t" - "movq %%mm0, (%0, %2, 2) \n\t" - "movq %%mm0, (%0, %3) \n\t" - "add $8, %0 \n\t" - "cmp %4, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) buf - (x86_reg) ptr - w), - "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3), - "r" (ptr + width + 2 * w)); - } - } - - if (sides & EDGE_BOTTOM) { - for (i = 0; i < h; i += 4) { - ptr = last_line + (i + 1) * wrap - w; - __asm__ volatile ( - "1: \n\t" - "movq (%1, %0), %%mm0 \n\t" - "movq %%mm0, (%0) \n\t" - "movq %%mm0, (%0, %2) \n\t" - "movq %%mm0, (%0, %2, 2) \n\t" - "movq %%mm0, (%0, %3) \n\t" - "add $8, %0 \n\t" - "cmp %4, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) last_line - (x86_reg) ptr - w), - "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3), - "r" (ptr + width + 2 * w)); - } - } -} - #endif /* HAVE_INLINE_ASM */ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, @@ -232,8 +116,6 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, c->try_8x8basis = try_8x8basis_mmx; } c->add_8x8basis = add_8x8basis_mmx; - - c->draw_edges = draw_edges_mmx; } if (INLINE_AMD3DNOW(cpu_flags)) { -- 2.40.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".