On 7/11/15, Ronald S. Bultje <rsbul...@gmail.com> wrote: > Both are 2-2.5x faster than their C counterpart. > --- > libavfilter/ssim.h | 36 ++++++++ > libavfilter/vf_ssim.c | 26 ++++-- > libavfilter/x86/Makefile | 2 + > libavfilter/x86/vf_ssim.asm | 190 > +++++++++++++++++++++++++++++++++++++++++ > libavfilter/x86/vf_ssim_init.c | 38 +++++++++ > 5 files changed, 283 insertions(+), 9 deletions(-) > create mode 100644 libavfilter/ssim.h > create mode 100644 libavfilter/x86/vf_ssim.asm > create mode 100644 libavfilter/x86/vf_ssim_init.c > > diff --git a/libavfilter/ssim.h b/libavfilter/ssim.h > new file mode 100644 > index 0000000..cd3a6ee > --- /dev/null > +++ b/libavfilter/ssim.h > @@ -0,0 +1,36 @@ > +/* > + * Copyright (c) 2015 Ronald S. Bultje <rsbul...@gmail.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#ifndef LIBAVFILTER_SSIM_H > +#define LIBAVFILTER_SSIM_H > + > +#include <stddef.h> > +#include <stdint.h> > + > +typedef struct SSIMDSPContext { > + void (*ssim_4x4_line)(const uint8_t *buf, ptrdiff_t buf_stride, > + const uint8_t *ref, ptrdiff_t ref_stride, > + int (*sums)[4], int w); > + float (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int > w); > +} SSIMDSPContext; > + > +void ff_ssim_init_x86(SSIMDSPContext *dsp); > + > +#endif /* LIBAVFILTER_SSIM_H */ > diff --git a/libavfilter/vf_ssim.c b/libavfilter/vf_ssim.c > index f7a259e..b5a61ee 100644 > --- a/libavfilter/vf_ssim.c > +++ b/libavfilter/vf_ssim.c > @@ -42,6 +42,7 @@ > #include "drawutils.h" > #include "formats.h" > #include "internal.h" > +#include "ssim.h" > #include "video.h" > > typedef struct SSIMContext { > @@ -59,6 +60,7 @@ typedef struct SSIMContext { > int planeheight[4]; > int *temp; > int is_rgb; > + SSIMDSPContext dsp; > } SSIMContext; > > #define OFFSET(x) offsetof(SSIMContext, x) > @@ -85,8 +87,8 @@ static void set_meta(AVDictionary **metadata, const char > *key, char comp, float > } > } > > -static void ssim_4x4xn(const uint8_t *main, int main_stride, > - const uint8_t *ref, int ref_stride, > +static void ssim_4x4xn(const uint8_t *main, ptrdiff_t main_stride, > + const uint8_t *ref, ptrdiff_t ref_stride, > int (*sums)[4], int width) > { > int x, y, z; > @@ -132,7 +134,7 @@ static float ssim_end1(int s1, int s2, int ss, int s12) > / ((float)(fs1 * fs1 + fs2 * fs2 + ssim_c1) * (float)(vars + > ssim_c2)); > } > > -static float ssim_endn(int (*sum0)[4], int (*sum1)[4], int width) > +static float ssim_endn(const int (*sum0)[4], const int (*sum1)[4], int > width) > { > float ssim = 0.0; > int i; > @@ -145,7 +147,8 @@ static float ssim_endn(int (*sum0)[4], int (*sum1)[4], > int width) > return ssim; > } > > -static float ssim_plane(uint8_t *main, int main_stride, > +static float ssim_plane(SSIMDSPContext *dsp, > + uint8_t *main, int main_stride, > uint8_t *ref, int ref_stride, > int width, int height, void *temp) > { > @@ -160,12 +163,12 @@ static float ssim_plane(uint8_t *main, int > main_stride, > for (y = 1; y < height; y++) { > for (; z <= y; z++) { > FFSWAP(void*, sum0, sum1); > - ssim_4x4xn(&main[4 * z * main_stride], main_stride, > - &ref[4 * z * ref_stride], ref_stride, > - sum0, width); > + dsp->ssim_4x4_line(&main[4 * z * main_stride], main_stride, > + &ref[4 * z * ref_stride], ref_stride, > + sum0, width); > } > > - ssim += ssim_endn(sum0, sum1, width - 1); > + ssim += dsp->ssim_end_line(sum0, sum1, width - 1); > } > > return ssim / ((height - 1) * (width - 1)); > @@ -187,7 +190,7 @@ static AVFrame *do_ssim(AVFilterContext *ctx, AVFrame > *main, > s->nb_frames++; > > for (i = 0; i < s->nb_components; i++) { > - c[i] = ssim_plane(main->data[i], main->linesize[i], > + c[i] = ssim_plane(&s->dsp, main->data[i], main->linesize[i], > ref->data[i], ref->linesize[i], > s->planewidth[i], s->planeheight[i], s->temp); > ssimv += s->coefs[i] * c[i]; > @@ -294,6 +297,11 @@ static int config_input_ref(AVFilterLink *inlink) > if (!s->temp) > return AVERROR(ENOMEM); > > + s->dsp.ssim_4x4_line = ssim_4x4xn; > + s->dsp.ssim_end_line = ssim_endn; > + if (ARCH_X86) > + ff_ssim_init_x86(&s->dsp); > + > return 0; > } > > diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile > index 89d3ca1..230e879 100644 > --- a/libavfilter/x86/Makefile > +++ b/libavfilter/x86/Makefile > @@ -9,6 +9,7 @@ OBJS-$(CONFIG_PP7_FILTER) += > x86/vf_pp7_init.o > OBJS-$(CONFIG_PSNR_FILTER) += x86/vf_psnr_init.o > OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o > OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o > +OBJS-$(CONFIG_SSIM_FILTER) += x86/vf_ssim_init.o > OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_tinterlace_init.o > OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o > OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o > @@ -21,6 +22,7 @@ YASM-OBJS-$(CONFIG_INTERLACE_FILTER) += > x86/vf_interlace.o > YASM-OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7.o > YASM-OBJS-$(CONFIG_PSNR_FILTER) += x86/vf_psnr.o > YASM-OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup.o > +YASM-OBJS-$(CONFIG_SSIM_FILTER) += x86/vf_ssim.o > YASM-OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_interlace.o > YASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o > YASM-OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif.o > x86/yadif-16.o x86/yadif-10.o > diff --git a/libavfilter/x86/vf_ssim.asm b/libavfilter/x86/vf_ssim.asm > new file mode 100644 > index 0000000..55bb645 > --- /dev/null > +++ b/libavfilter/x86/vf_ssim.asm > @@ -0,0 +1,190 @@ > +;***************************************************************************** > +;* x86-optimized functions for interlace filter
Besides this above. patch lgtm. Unless someone have to comment to asm part. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel