On 7/3/2017 4:45 PM, Paul B Mahol wrote: > Signed-off-by: Paul B Mahol <one...@gmail.com> > --- > doc/filters.texi | 17 +++ > libavfilter/Makefile | 1 + > libavfilter/allfilters.c | 1 + > libavfilter/limiter.h | 33 ++++++ > libavfilter/vf_limiter.c | 228 > ++++++++++++++++++++++++++++++++++++++ > libavfilter/x86/Makefile | 2 + > libavfilter/x86/vf_limiter.asm | 80 +++++++++++++ > libavfilter/x86/vf_limiter_init.c | 41 +++++++ > 8 files changed, 403 insertions(+) > create mode 100644 libavfilter/limiter.h > create mode 100644 libavfilter/vf_limiter.c > create mode 100644 libavfilter/x86/vf_limiter.asm > create mode 100644 libavfilter/x86/vf_limiter_init.c > > diff --git a/doc/filters.texi b/doc/filters.texi > index 930ca4c..dbf0fc1 100644 > --- a/doc/filters.texi > +++ b/doc/filters.texi > @@ -9639,6 +9639,23 @@ The formula that generates the correction is: > where @var{r_0} is halve of the image diagonal and @var{r_src} and > @var{r_tgt} are the > distances from the focal point in the source and target images, respectively. > > +@section limiter > + > +Limits the pixel components values to the specified range [min, max]. > + > +The filter accepts the following options: > + > +@table @option > +@item min > +Lower bound. Defaults to the lowest allowed value for the input. > + > +@item max > +Upper bound. Defaults to the highest allowed value for the input. > + > +@item planes > +Specify which planes will be processed. > +@end table > + > @section loop > > Loop video frames. > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > index f023a0d..4d85f65 100644 > --- a/libavfilter/Makefile > +++ b/libavfilter/Makefile > @@ -216,6 +216,7 @@ OBJS-$(CONFIG_INTERLACE_FILTER) += > vf_interlace.o > OBJS-$(CONFIG_INTERLEAVE_FILTER) += f_interleave.o > OBJS-$(CONFIG_KERNDEINT_FILTER) += vf_kerndeint.o > OBJS-$(CONFIG_LENSCORRECTION_FILTER) += vf_lenscorrection.o > +OBJS-$(CONFIG_LIMITER_FILTER) += vf_limiter.o > OBJS-$(CONFIG_LOOP_FILTER) += f_loop.o > OBJS-$(CONFIG_LUMAKEY_FILTER) += vf_lumakey.o > OBJS-$(CONFIG_LUT_FILTER) += vf_lut.o > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > index c1c5233..0a990ca 100644 > --- a/libavfilter/allfilters.c > +++ b/libavfilter/allfilters.c > @@ -228,6 +228,7 @@ static void register_all(void) > REGISTER_FILTER(INTERLEAVE, interleave, vf); > REGISTER_FILTER(KERNDEINT, kerndeint, vf); > REGISTER_FILTER(LENSCORRECTION, lenscorrection, vf); > + REGISTER_FILTER(LIMITER, limiter, vf); > REGISTER_FILTER(LOOP, loop, vf); > REGISTER_FILTER(LUMAKEY, lumakey, vf); > REGISTER_FILTER(LUT, lut, vf); > diff --git a/libavfilter/limiter.h b/libavfilter/limiter.h > new file mode 100644 > index 0000000..54d423d > --- /dev/null > +++ b/libavfilter/limiter.h > @@ -0,0 +1,33 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#ifndef AVFILTER_LIMITER_H > +#define AVFILTER_LIMITER_H > + > +#include <stddef.h> > +#include <stdint.h> > + > +typedef struct LimiterDSPContext { > + void (*limiter)(const uint8_t *src, uint8_t *dst, > + ptrdiff_t slinesize, ptrdiff_t dlinesize, > + int w, int h, int min, int max); > +} LimiterDSPContext; > + > +void ff_limiter_init_x86(LimiterDSPContext *dsp, int bpp); > + > +#endif /* AVFILTER_LIMITER_H */ > diff --git a/libavfilter/vf_limiter.c b/libavfilter/vf_limiter.c > new file mode 100644 > index 0000000..d6f5745 > --- /dev/null > +++ b/libavfilter/vf_limiter.c > @@ -0,0 +1,228 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "libavutil/attributes.h" > +#include "libavutil/common.h" > +#include "libavutil/eval.h" > +#include "libavutil/imgutils.h" > +#include "libavutil/opt.h" > +#include "libavutil/pixdesc.h" > +#include "avfilter.h" > +#include "formats.h" > +#include "internal.h" > +#include "limiter.h" > +#include "video.h" > + > +typedef struct LimiterContext { > + const AVClass *class; > + int min; > + int max; > + int planes; > + int nb_planes; > + int linesize[4]; > + int width[4]; > + int height[4]; > + > + LimiterDSPContext dsp; > +} LimiterContext; > + > +#define OFFSET(x) offsetof(LimiterContext, x) > +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM > + > +static const AVOption limiter_options[] = { > + { "min", "set min value", OFFSET(min), AV_OPT_TYPE_INT, {.i64=0}, > 0, 65535, .flags = FLAGS }, > + { "max", "set max value", OFFSET(max), AV_OPT_TYPE_INT, > {.i64=65535}, 0, 65535, .flags = FLAGS }, > + { "planes", "set planes", OFFSET(planes), AV_OPT_TYPE_INT, {.i64=15}, > 0, 65535, .flags = FLAGS }, > + { NULL } > +}; > + > +AVFILTER_DEFINE_CLASS(limiter); > + > +static av_cold int init(AVFilterContext *ctx) > +{ > + LimiterContext *s = ctx->priv; > + > + if (s->min > s->max) > + return AVERROR(EINVAL); > + return 0; > +} > + > +static int query_formats(AVFilterContext *ctx) > +{ > + static const enum AVPixelFormat pix_fmts[] = { > + AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P, > + AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, > + AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, > AV_PIX_FMT_YUV420P, > + AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P, > + AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, > + AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9, > + AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, > + AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, > AV_PIX_FMT_YUV440P12, > + AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14, > + AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16, > + AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9, > + AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10, > + AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16, > + AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, > + AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, > + AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16, > + AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, > AV_PIX_FMT_GRAY16, > + AV_PIX_FMT_NONE > + }; > + > + AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); > + if (!fmts_list) > + return AVERROR(ENOMEM); > + return ff_set_common_formats(ctx, fmts_list); > +} > + > +static void limiter8(const uint8_t *src, uint8_t *dst, > + ptrdiff_t slinesize, ptrdiff_t dlinesize, > + int w, int h, int min, int max) > +{ > + int x, y; > + > + for (y = 0; y < h; y++) { > + for (x = 0; x < w; x++) { > + dst[x] = av_clip(src[x], min, max); > + } > + > + dst += dlinesize; > + src += slinesize; > + } > +} > + > +static void limiter16(const uint8_t *ssrc, uint8_t *ddst, > + ptrdiff_t slinesize, ptrdiff_t dlinesize, > + int w, int h, int min, int max) > +{ > + const uint16_t *src = (const uint16_t *)ssrc; > + uint16_t *dst = (uint16_t *)ddst; > + int x, y; > + > + dlinesize /= 2; > + slinesize /= 2; > + > + for (y = 0; y < h; y++) { > + for (x = 0; x < w; x++) { > + dst[x] = av_clip(src[x], min, max); > + } > + > + dst += dlinesize; > + src += slinesize; > + } > +} > + > +static int config_props(AVFilterLink *inlink) > +{ > + AVFilterContext *ctx = inlink->dst; > + LimiterContext *s = ctx->priv; > + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); > + int vsub, hsub, ret; > + > + s->nb_planes = av_pix_fmt_count_planes(inlink->format); > + > + if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, > inlink->w)) < 0) > + return ret; > + > + hsub = desc->log2_chroma_w; > + vsub = desc->log2_chroma_h; > + s->height[1] = s->height[2] = AV_CEIL_RSHIFT(inlink->h, vsub); > + s->height[0] = s->height[3] = inlink->h; > + s->width[1] = s->width[2] = AV_CEIL_RSHIFT(inlink->w, hsub); > + s->width[0] = s->width[3] = inlink->w; > + > + if (desc->comp[0].depth == 8) > + s->dsp.limiter = limiter8; > + else > + s->dsp.limiter = limiter16; > + > + if (ARCH_X86) > + ff_limiter_init_x86(&s->dsp, desc->comp[0].depth); > + > + return 0; > +} > + > +static int filter_frame(AVFilterLink *inlink, AVFrame *in) > +{ > + AVFilterContext *ctx = inlink->dst; > + LimiterContext *s = ctx->priv; > + AVFilterLink *outlink = ctx->outputs[0]; > + AVFrame *out; > + int p; > + > + if (av_frame_is_writable(in)) { > + out = in; > + } else { > + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); > + if (!out) { > + av_frame_free(&in); > + return AVERROR(ENOMEM); > + } > + av_frame_copy_props(out, in); > + } > + > + for (p = 0; p < s->nb_planes; p++) { > + if (!((1 << p) & s->planes)) { > + if (out != in) > + av_image_copy_plane(out->data[p], out->linesize[p], > in->data[p], in->linesize[p], > + s->linesize[p], s->height[p]); > + continue; > + } > + > + s->dsp.limiter(in->data[p], out->data[p], > + in->linesize[p], out->linesize[p], > + s->width[p], s->height[p], > + s->min, s->max); > + } > + > + if (out != in) > + av_frame_free(&in); > + > + return ff_filter_frame(outlink, out); > +} > + > +static const AVFilterPad inputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .filter_frame = filter_frame, > + .config_props = config_props, > + }, > + { NULL } > +}; > + > +static const AVFilterPad outputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + }, > + { NULL } > +}; > + > +AVFilter ff_vf_limiter = { > + .name = "limiter", > + .description = NULL_IF_CONFIG_SMALL("Limit pixels components to the > specified range."), > + .priv_size = sizeof(LimiterContext), > + .priv_class = &limiter_class, > + .init = init, > + .query_formats = query_formats, > + .inputs = inputs, > + .outputs = outputs, > + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, > +}; > diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile > index f598ceb..3431625 100644 > --- a/libavfilter/x86/Makefile > +++ b/libavfilter/x86/Makefile > @@ -8,6 +8,7 @@ OBJS-$(CONFIG_GRADFUN_FILTER) += > x86/vf_gradfun_init.o > OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o > OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o > OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace_init.o > +OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter_init.o > OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge_init.o > OBJS-$(CONFIG_NOISE_FILTER) += x86/vf_noise.o > OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7_init.o > @@ -33,6 +34,7 @@ X86ASM-OBJS-$(CONFIG_GRADFUN_FILTER) += > x86/vf_gradfun.o > X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o > X86ASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o > X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o > +X86ASM-OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter.o > X86ASM-OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge.o > X86ASM-OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7.o > X86ASM-OBJS-$(CONFIG_PSNR_FILTER) += x86/vf_psnr.o > diff --git a/libavfilter/x86/vf_limiter.asm b/libavfilter/x86/vf_limiter.asm > new file mode 100644 > index 0000000..7d61c26 > --- /dev/null > +++ b/libavfilter/x86/vf_limiter.asm > @@ -0,0 +1,80 @@ > +;***************************************************************************** > +;* x86-optimized functions for limiter filter > +;* > +;* This file is part of FFmpeg. > +;* > +;* FFmpeg is free software; you can redistribute it and/or > +;* modify it under the terms of the GNU Lesser General Public > +;* License as published by the Free Software Foundation; either > +;* version 2.1 of the License, or (at your option) any later version. > +;* > +;* FFmpeg is distributed in the hope that it will be useful, > +;* but WITHOUT ANY WARRANTY; without even the implied warranty of > +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +;* Lesser General Public License for more details. > +;* > +;* You should have received a copy of the GNU Lesser General Public > +;* License along with FFmpeg; if not, write to the Free Software > +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > +;****************************************************************************** > + > +%include "libavutil/x86/x86util.asm" > + > +SECTION_RODATA > + > +pb_0: times 16 db 0 > + > +SECTION .text > + > +INIT_XMM sse2 > + > +cglobal limiter_8bit, 8, 9, 3, src, dst, slinesize, dlinesize, w, h, min, > max, x
More than 7 gprs is x86_64 only. > + movsxdifnidn wq, wd > + add srcq, wq > + add dstq, wq > + neg wq > + SPLATB_REG m1, min, [pb_0] > + SPLATB_REG m2, max, [pb_0] > +.nextrow: > + mov xq, wq > + > + .loop: > + movu m0, [srcq + xq] > + CLIPUB m0, m1, m2 > + mova [dstq+xq], m0 > + add xq, mmsize > + jl .loop > + > + add srcq, slinesizeq > + add dstq, dlinesizeq > + sub hd, 1 > + jg .nextrow > + ret > + > +cglobal limiter_16bit, 8, 9, 3, src, dst, slinesize, dlinesize, w, h, min, > max, x > + shl wd, 1 > + add srcq, wq > + add dstq, wq > + neg wq > + pinsrw m1, minw, 0 movd m1, mind > + SPLATW m1, m1 > + pinsrw m2, maxw, 0 movd m2, maxd You also could, as part of making this work on x86_32, load these two straight from stack with using r6m/r7m. That would leave you with exactly seven gprs to store both pointers, both strides, both dimensions and the temp value x. > + SPLATW m2, m2 > +.nextrow: > + mov xq, wq > + > + .loop: > + movu m0, [srcq + xq] > + pmaxuw m0, m1 > + pminuw m0, m2 These two are SSE4 instructions. You should be getting the relevant warnings during assembly... See libavcodec/x86/hevc_sao_10bit.asm for an example of SSE2 emulation of pminuw, which you can move to x86util so it can be used here. > + mova [dstq+xq], m0 > + add xq, mmsize > + jl .loop > + > + add srcq, slinesizeq > + add dstq, dlinesizeq > + sub hd, 1 > + jg .nextrow > + ret > + > + ret > diff --git a/libavfilter/x86/vf_limiter_init.c > b/libavfilter/x86/vf_limiter_init.c > new file mode 100644 > index 0000000..348bb23 > --- /dev/null > +++ b/libavfilter/x86/vf_limiter_init.c > @@ -0,0 +1,41 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "libavutil/x86/cpu.h" > + > +#include "libavfilter/limiter.h" > + > +void ff_limiter_8bit_sse2(const uint8_t *src, uint8_t *dst, > + ptrdiff_t slinesize, ptrdiff_t dlinesize, > + int w, int h, int min, int max); > +void ff_limiter_16bit_sse2(const uint8_t *src, uint8_t *dst, > + ptrdiff_t slinesize, ptrdiff_t dlinesize, > + int w, int h, int min, int max); > + > +void ff_limiter_init_x86(LimiterDSPContext *dsp, int bpp) > +{ > + int cpu_flags = av_get_cpu_flags(); > + > + if (EXTERNAL_SSE2(cpu_flags)) { > + if (bpp <= 8) { > + dsp->limiter = ff_limiter_8bit_sse2; > + } else if (bpp <= 16) { > + dsp->limiter = ff_limiter_16bit_sse2; > + } > + } > +} > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel