It's also used by some decoders Signed-off-by: James Almer <jamr...@gmail.com> --- configure | 4 +- libavcodec/diracdec.c | 12 ++-- libavcodec/mpegvideo_enc.c | 8 +-- libavcodec/mpegvideoencdsp.c | 30 -------- libavcodec/mpegvideoencdsp.h | 6 -- libavcodec/snow.c | 7 +- libavcodec/snow.h | 1 - libavcodec/snowenc.c | 2 +- libavcodec/videodsp.c | 30 ++++++++ libavcodec/videodsp.h | 6 ++ libavcodec/x86/mpegvideoencdsp_init.c | 118 -------------------------------- libavcodec/x86/videodsp_init.c | 124 +++++++++++++++++++++++++++++++++- 12 files changed, 175 insertions(+), 173 deletions(-)
diff --git a/configure b/configure index 4ed43a0..47acef5 100755 --- a/configure +++ b/configure @@ -2048,7 +2048,7 @@ cook_decoder_select="audiodsp mdct sinewin" cscd_decoder_select="lzo" cscd_decoder_suggest="zlib" dca_decoder_select="mdct" -dirac_decoder_select="dsputil dwt golomb videodsp mpegvideoenc" +dirac_decoder_select="dwt golomb videodsp qpeldsp" dnxhd_decoder_select="blockdsp idctdsp" dnxhd_encoder_select="aandcttables blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp" dvvideo_decoder_select="dvprofile idctdsp" @@ -2157,7 +2157,7 @@ rv30_decoder_select="error_resilience golomb h264chroma h264pred h264qpel mpeg_e rv40_decoder_select="error_resilience golomb h264chroma h264pred h264qpel mpeg_er mpegvideo videodsp" shorten_decoder_select="golomb" sipr_decoder_select="lsp" -snow_decoder_select="dsputil dwt h264qpel hpeldsp rangecoder mpegvideoenc" +snow_decoder_select="dsputil dwt h264qpel hpeldsp rangecoder videodsp" snow_encoder_select="aandcttables dsputil dwt h264qpel hpeldsp mpegvideoenc rangecoder" sonic_decoder_select="golomb rangecoder" sonic_encoder_select="golomb rangecoder" diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c index a18c867..2d79aeb 100644 --- a/libavcodec/diracdec.c +++ b/libavcodec/diracdec.c @@ -137,7 +137,7 @@ typedef struct Plane { typedef struct DiracContext { AVCodecContext *avctx; - MpegvideoEncDSPContext mpvencdsp; + VideoDSPContext vdsp; DiracDSPContext diracdsp; GetBitContext gb; dirac_source_params source; @@ -424,7 +424,7 @@ static av_cold int dirac_decode_init(AVCodecContext *avctx) s->frame_number = -1; ff_diracdsp_init(&s->diracdsp); - ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); + ff_videodsp_init(&s->vdsp, 8); for (i = 0; i < MAX_FRAMES; i++) { s->all_frames[i].avframe = av_frame_alloc(); @@ -1557,7 +1557,7 @@ static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, in int i, edge = EDGE_WIDTH/2; ref->hpel[plane][0] = ref->avframe->data[plane]; - s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */ + s->vdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */ /* no need for hpel if we only have fpel vectors */ if (!s->mv_precision) @@ -1574,9 +1574,9 @@ static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, in s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2], ref->hpel[plane][3], ref->hpel[plane][0], ref->avframe->linesize[plane], width, height); - s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); - s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); - s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); + s->vdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); + s->vdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); + s->vdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); } ref->interpolated[plane] = 1; } diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c index a4786b4..289fcd4 100644 --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@ -1165,7 +1165,7 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg) } } if ((s->width & 15) || (s->height & (vpad-1))) { - s->mpvencdsp.draw_edges(dst, dst_stride, + s->vdsp.draw_edges(dst, dst_stride, w, h, 16>>h_shift, vpad>>v_shift, @@ -1549,19 +1549,19 @@ static void frame_end(MpegEncContext *s) const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt); int hshift = desc->log2_chroma_w; int vshift = desc->log2_chroma_h; - s->mpvencdsp.draw_edges(s->current_picture.f->data[0], + s->vdsp.draw_edges(s->current_picture.f->data[0], s->current_picture.f->linesize[0], s->h_edge_pos, s->v_edge_pos, EDGE_WIDTH, EDGE_WIDTH, EDGE_TOP | EDGE_BOTTOM); - s->mpvencdsp.draw_edges(s->current_picture.f->data[1], + s->vdsp.draw_edges(s->current_picture.f->data[1], s->current_picture.f->linesize[1], s->h_edge_pos >> hshift, s->v_edge_pos >> vshift, EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, EDGE_TOP | EDGE_BOTTOM); - s->mpvencdsp.draw_edges(s->current_picture.f->data[2], + s->vdsp.draw_edges(s->current_picture.f->data[2], s->current_picture.f->linesize[2], s->h_edge_pos >> hshift, s->v_edge_pos >> vshift, diff --git a/libavcodec/mpegvideoencdsp.c b/libavcodec/mpegvideoencdsp.c index 10ad369..373379c 100644 --- a/libavcodec/mpegvideoencdsp.c +++ b/libavcodec/mpegvideoencdsp.c @@ -126,34 +126,6 @@ static int pix_norm1_c(uint8_t *pix, int line_size) return s; } -/* draw the edges of width 'w' of an image of size width, height */ -// FIXME: Check that this is OK for MPEG-4 interlaced. -static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides) -{ - uint8_t *ptr = buf, *last_line; - int i; - - /* left and right */ - for (i = 0; i < height; i++) { - memset(ptr - w, ptr[0], w); - memset(ptr + width, ptr[width - 1], w); - ptr += wrap; - } - - /* top and bottom + corners */ - buf -= w; - last_line = buf + (height - 1) * wrap; - if (sides & EDGE_TOP) - for (i = 0; i < h; i++) - // top - memcpy(buf - (i + 1) * wrap, buf, width + w + w); - if (sides & EDGE_BOTTOM) - for (i = 0; i < h; i++) - // bottom - memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); -} - av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, AVCodecContext *avctx) { @@ -168,8 +140,6 @@ av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; - c->draw_edges = draw_edges_8_c; - if (ARCH_ARM) ff_mpegvideoencdsp_init_arm(c, avctx); if (ARCH_PPC) diff --git a/libavcodec/mpegvideoencdsp.h b/libavcodec/mpegvideoencdsp.h index e12f4c6..81e3fe6 100644 --- a/libavcodec/mpegvideoencdsp.h +++ b/libavcodec/mpegvideoencdsp.h @@ -26,9 +26,6 @@ #define BASIS_SHIFT 16 #define RECON_SHIFT 6 -#define EDGE_TOP 1 -#define EDGE_BOTTOM 2 - typedef struct MpegvideoEncDSPContext { int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale); @@ -39,9 +36,6 @@ typedef struct MpegvideoEncDSPContext { void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); - - void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides); } MpegvideoEncDSPContext; void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, diff --git a/libavcodec/snow.c b/libavcodec/snow.c index 711d1a4..57446dd 100644 --- a/libavcodec/snow.c +++ b/libavcodec/snow.c @@ -433,7 +433,6 @@ av_cold int ff_snow_common_init(AVCodecContext *avctx){ ff_videodsp_init(&s->vdsp, 8); ff_dwt_init(&s->dwt); ff_h264qpel_init(&s->h264qpel, 8); - ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); #define mcf(dx,dy)\ s->qdsp.put_qpel_pixels_tab [0][dy+dx/4]=\ @@ -643,14 +642,14 @@ int ff_snow_frame_start(SnowContext *s){ int h= s->avctx->height; if (s->current_picture->data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) { - s->mpvencdsp.draw_edges(s->current_picture->data[0], + s->vdsp.draw_edges(s->current_picture->data[0], s->current_picture->linesize[0], w , h , EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM); if (s->current_picture->data[2]) { - s->mpvencdsp.draw_edges(s->current_picture->data[1], + s->vdsp.draw_edges(s->current_picture->data[1], s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift, EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); - s->mpvencdsp.draw_edges(s->current_picture->data[2], + s->vdsp.draw_edges(s->current_picture->data[2], s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift, EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); } diff --git a/libavcodec/snow.h b/libavcodec/snow.h index 2cda5b3..a9c8518 100644 --- a/libavcodec/snow.h +++ b/libavcodec/snow.h @@ -115,7 +115,6 @@ typedef struct SnowContext{ QpelDSPContext qdsp; VideoDSPContext vdsp; H264QpelContext h264qpel; - MpegvideoEncDSPContext mpvencdsp; SnowDWTContext dwt; AVFrame *new_picture; AVFrame *input_picture; ///< new_picture with the internal linesizes diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c index cb83821..eefbf08 100644 --- a/libavcodec/snowenc.c +++ b/libavcodec/snowenc.c @@ -1568,7 +1568,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, memcpy(&s->input_picture->data[i][y * s->input_picture->linesize[i]], &pict->data[i][y * pict->linesize[i]], width>>hshift); - s->mpvencdsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i], + s->vdsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i], width >> hshift, height >> vshift, EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, EDGE_TOP | EDGE_BOTTOM); diff --git a/libavcodec/videodsp.c b/libavcodec/videodsp.c index ba618a7..2b91a5b 100644 --- a/libavcodec/videodsp.c +++ b/libavcodec/videodsp.c @@ -31,6 +31,35 @@ #include "videodsp_template.c" #undef BIT_DEPTH + +/* draw the edges of width 'w' of an image of size width, height */ +// FIXME: Check that this is OK for MPEG-4 interlaced. +static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides) +{ + uint8_t *ptr = buf, *last_line; + int i; + + /* left and right */ + for (i = 0; i < height; i++) { + memset(ptr - w, ptr[0], w); + memset(ptr + width, ptr[width - 1], w); + ptr += wrap; + } + + /* top and bottom + corners */ + buf -= w; + last_line = buf + (height - 1) * wrap; + if (sides & EDGE_TOP) + for (i = 0; i < h; i++) + // top + memcpy(buf - (i + 1) * wrap, buf, width + w + w); + if (sides & EDGE_BOTTOM) + for (i = 0; i < h; i++) + // bottom + memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); +} + static void just_return(uint8_t *buf, ptrdiff_t stride, int h) { } @@ -43,6 +72,7 @@ av_cold void ff_videodsp_init(VideoDSPContext *ctx, int bpc) } else { ctx->emulated_edge_mc = ff_emulated_edge_mc_16; } + ctx->draw_edges = draw_edges_8_c; if (ARCH_AARCH64) ff_videodsp_init_aarch64(ctx, bpc); diff --git a/libavcodec/videodsp.h b/libavcodec/videodsp.h index fc01a31..a6567fc 100644 --- a/libavcodec/videodsp.h +++ b/libavcodec/videodsp.h @@ -29,6 +29,9 @@ #include <stddef.h> #include <stdint.h> +#define EDGE_TOP 1 +#define EDGE_BOTTOM 2 + #define EMULATED_EDGE(depth) \ void ff_emulated_edge_mc_ ## depth(uint8_t *dst, const uint8_t *src, \ ptrdiff_t dst_stride, ptrdiff_t src_stride, \ @@ -74,6 +77,9 @@ typedef struct VideoDSPContext { * @param h number of lines to prefetch */ void (*prefetch)(uint8_t *buf, ptrdiff_t stride, int h); + + void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides); } VideoDSPContext; void ff_videodsp_init(VideoDSPContext *ctx, int bpc); diff --git a/libavcodec/x86/mpegvideoencdsp_init.c b/libavcodec/x86/mpegvideoencdsp_init.c index d91b902..f9090f6 100644 --- a/libavcodec/x86/mpegvideoencdsp_init.c +++ b/libavcodec/x86/mpegvideoencdsp_init.c @@ -97,120 +97,6 @@ int ff_pix_norm1_sse2(uint8_t *pix, int line_size); #undef PHADDD #endif /* HAVE_SSSE3_INLINE */ -/* Draw the edges of width 'w' of an image of size width, height - * this MMX version can only handle w == 8 || w == 16. */ -static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides) -{ - uint8_t *ptr, *last_line; - int i; - - last_line = buf + (height - 1) * wrap; - /* left and right */ - ptr = buf; - if (w == 8) { - __asm__ volatile ( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "punpckldq %%mm0, %%mm0 \n\t" - "movq %%mm0, -8(%0) \n\t" - "movq -8(%0, %2), %%mm1 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movq %%mm1, (%0, %2) \n\t" - "add %1, %0 \n\t" - "cmp %3, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) wrap), "r" ((x86_reg) width), - "r" (ptr + wrap * height)); - } else if (w == 16) { - __asm__ volatile ( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "punpckldq %%mm0, %%mm0 \n\t" - "movq %%mm0, -8(%0) \n\t" - "movq %%mm0, -16(%0) \n\t" - "movq -8(%0, %2), %%mm1 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movq %%mm1, (%0, %2) \n\t" - "movq %%mm1, 8(%0, %2) \n\t" - "add %1, %0 \n\t" - "cmp %3, %0 \n\t" - "jb 1b \n\t" - : "+r"(ptr) - : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height) - ); - } else { - av_assert1(w == 4); - __asm__ volatile ( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "movd %%mm0, -4(%0) \n\t" - "movd -4(%0, %2), %%mm1 \n\t" - "punpcklbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movd %%mm1, (%0, %2) \n\t" - "add %1, %0 \n\t" - "cmp %3, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) wrap), "r" ((x86_reg) width), - "r" (ptr + wrap * height)); - } - - /* top and bottom (and hopefully also the corners) */ - if (sides & EDGE_TOP) { - for (i = 0; i < h; i += 4) { - ptr = buf - (i + 1) * wrap - w; - __asm__ volatile ( - "1: \n\t" - "movq (%1, %0), %%mm0 \n\t" - "movq %%mm0, (%0) \n\t" - "movq %%mm0, (%0, %2) \n\t" - "movq %%mm0, (%0, %2, 2) \n\t" - "movq %%mm0, (%0, %3) \n\t" - "add $8, %0 \n\t" - "cmp %4, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) buf - (x86_reg) ptr - w), - "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3), - "r" (ptr + width + 2 * w)); - } - } - - if (sides & EDGE_BOTTOM) { - for (i = 0; i < h; i += 4) { - ptr = last_line + (i + 1) * wrap - w; - __asm__ volatile ( - "1: \n\t" - "movq (%1, %0), %%mm0 \n\t" - "movq %%mm0, (%0) \n\t" - "movq %%mm0, (%0, %2) \n\t" - "movq %%mm0, (%0, %2, 2) \n\t" - "movq %%mm0, (%0, %3) \n\t" - "add $8, %0 \n\t" - "cmp %4, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) last_line - (x86_reg) ptr - w), - "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3), - "r" (ptr + width + 2 * w)); - } - } -} - #endif /* HAVE_INLINE_ASM */ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, @@ -239,10 +125,6 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, c->try_8x8basis = try_8x8basis_mmx; } c->add_8x8basis = add_8x8basis_mmx; - - if (avctx->bits_per_raw_sample <= 8) { - c->draw_edges = draw_edges_mmx; - } } if (INLINE_AMD3DNOW(cpu_flags)) { diff --git a/libavcodec/x86/videodsp_init.c b/libavcodec/x86/videodsp_init.c index 3218abd..dab6ec2 100644 --- a/libavcodec/x86/videodsp_init.c +++ b/libavcodec/x86/videodsp_init.c @@ -240,14 +240,136 @@ static av_noinline void emulated_edge_mc_sse2(uint8_t *buf, const uint8_t *src, } #endif /* HAVE_YASM */ +#if HAVE_INLINE_ASM +/* Draw the edges of width 'w' of an image of size width, height + * this MMX version can only handle w == 8 || w == 16. */ +static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides) +{ + uint8_t *ptr, *last_line; + int i; + + last_line = buf + (height - 1) * wrap; + /* left and right */ + ptr = buf; + if (w == 8) { + __asm__ volatile ( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "punpckldq %%mm0, %%mm0 \n\t" + "movq %%mm0, -8(%0) \n\t" + "movq -8(%0, %2), %%mm1 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm1, (%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) wrap), "r" ((x86_reg) width), + "r" (ptr + wrap * height)); + } else if (w == 16) { + __asm__ volatile ( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "punpckldq %%mm0, %%mm0 \n\t" + "movq %%mm0, -8(%0) \n\t" + "movq %%mm0, -16(%0) \n\t" + "movq -8(%0, %2), %%mm1 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm1, (%0, %2) \n\t" + "movq %%mm1, 8(%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + "jb 1b \n\t" + : "+r"(ptr) + : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height) + ); + } else { + av_assert1(w == 4); + __asm__ volatile ( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "movd %%mm0, -4(%0) \n\t" + "movd -4(%0, %2), %%mm1 \n\t" + "punpcklbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movd %%mm1, (%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) wrap), "r" ((x86_reg) width), + "r" (ptr + wrap * height)); + } + + /* top and bottom (and hopefully also the corners) */ + if (sides & EDGE_TOP) { + for (i = 0; i < h; i += 4) { + ptr = buf - (i + 1) * wrap - w; + __asm__ volatile ( + "1: \n\t" + "movq (%1, %0), %%mm0 \n\t" + "movq %%mm0, (%0) \n\t" + "movq %%mm0, (%0, %2) \n\t" + "movq %%mm0, (%0, %2, 2) \n\t" + "movq %%mm0, (%0, %3) \n\t" + "add $8, %0 \n\t" + "cmp %4, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) buf - (x86_reg) ptr - w), + "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3), + "r" (ptr + width + 2 * w)); + } + } + + if (sides & EDGE_BOTTOM) { + for (i = 0; i < h; i += 4) { + ptr = last_line + (i + 1) * wrap - w; + __asm__ volatile ( + "1: \n\t" + "movq (%1, %0), %%mm0 \n\t" + "movq %%mm0, (%0) \n\t" + "movq %%mm0, (%0, %2) \n\t" + "movq %%mm0, (%0, %2, 2) \n\t" + "movq %%mm0, (%0, %3) \n\t" + "add $8, %0 \n\t" + "cmp %4, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) last_line - (x86_reg) ptr - w), + "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3), + "r" (ptr + width + 2 * w)); + } + } +} +#endif /* HAVE_INLINE_ASM */ + void ff_prefetch_mmxext(uint8_t *buf, ptrdiff_t stride, int h); void ff_prefetch_3dnow(uint8_t *buf, ptrdiff_t stride, int h); av_cold void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc) { -#if HAVE_YASM int cpu_flags = av_get_cpu_flags(); +#if HAVE_INLINE_ASM + if (INLINE_MMX(cpu_flags) && bpc <= 8) { + ctx->draw_edges = draw_edges_mmx; + } +#endif /* HAVE_INLINE_ASM */ + +#if HAVE_YASM #if ARCH_X86_32 if (EXTERNAL_MMX(cpu_flags) && bpc <= 8) { ctx->emulated_edge_mc = emulated_edge_mc_mmx; -- 1.8.5.5 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel