On Sun, May 06, 2018 at 01:40:53PM +0200, Clément Bœsch wrote: > SIMD code will not have to deal with padding itself. Overwriting in that > function may have been possible but involve large overreading of the > sources. Instead, we simply make sure the width to process is always a > multiple of 16. Additionally, there must be some actual area to process > so the SIMD code can have its boundary checks after processing the first > pixels. > --- > libavfilter/vf_nlmeans.c | 25 ++++++++++++++++++------- > 1 file changed, 18 insertions(+), 7 deletions(-) > > diff --git a/libavfilter/vf_nlmeans.c b/libavfilter/vf_nlmeans.c > index d222d3913e..21f981a605 100644 > --- a/libavfilter/vf_nlmeans.c > +++ b/libavfilter/vf_nlmeans.c > @@ -157,6 +157,9 @@ static void compute_safe_ssd_integral_image_c(uint32_t > *dst, int dst_linesize_32 > { > int x, y; > > + /* SIMD-friendly assumptions allowed here */ > + av_assert2(!(w & 0xf) && w >= 16 && h >= 1); > + > for (y = 0; y < h; y++) { > uint32_t acc = dst[-1] - dst[-dst_linesize_32 - 1]; > > @@ -257,9 +260,16 @@ static void compute_ssd_integral_image(uint32_t *ii, int > ii_linesize_32, > // to compare the 2 sources pixels > const int startx_safe = FFMAX(s1x, s2x); > const int starty_safe = FFMAX(s1y, s2y); > - const int endx_safe = FFMIN(s1x + w, s2x + w); > + const int u_endx_safe = FFMIN(s1x + w, s2x + w); // unaligned > const int endy_safe = FFMIN(s1y + h, s2y + h); > > + // deduce the safe area width and height > + const int safe_pw = (u_endx_safe - startx_safe) & ~0xf; > + const int safe_ph = endy_safe - starty_safe; > + > + // adjusted end x position of the safe area after width of the safe area > gets aligned > + const int endx_safe = startx_safe + safe_pw; > + > // top part where only one of s1 and s2 is still readable, or none at all > compute_unsafe_ssd_integral_image(ii, ii_linesize_32, > 0, 0, > @@ -273,24 +283,25 @@ static void compute_ssd_integral_image(uint32_t *ii, > int ii_linesize_32, > 0, starty_safe, > src, linesize, > offx, offy, e, w, h, > - startx_safe, endy_safe - starty_safe); > + startx_safe, safe_ph); > > // main and safe part of the integral > av_assert1(startx_safe - s1x >= 0); av_assert1(startx_safe - s1x < w); > av_assert1(starty_safe - s1y >= 0); av_assert1(starty_safe - s1y < h); > av_assert1(startx_safe - s2x >= 0); av_assert1(startx_safe - s2x < w); > av_assert1(starty_safe - s2y >= 0); av_assert1(starty_safe - s2y < h); > - compute_safe_ssd_integral_image_c(ii + starty_safe*ii_linesize_32 + > startx_safe, ii_linesize_32, > - src + (starty_safe - s1y) * linesize + > (startx_safe - s1x), linesize, > - src + (starty_safe - s2y) * linesize + > (startx_safe - s2x), linesize, > - endx_safe - startx_safe, endy_safe - > starty_safe); > + if (safe_pw && safe_ph) > + dsp->compute_safe_ssd_integral_image(ii + starty_safe*ii_linesize_32 > + startx_safe, ii_linesize_32, > + src + (starty_safe - s1y) * > linesize + (startx_safe - s1x), linesize, > + src + (starty_safe - s2y) * > linesize + (startx_safe - s2x), linesize, > + safe_pw, safe_ph);
i think this is or i am missing some change libavfilter/vf_nlmeans.c: In function ‘compute_ssd_integral_image’: libavfilter/vf_nlmeans.c:294:9: error: ‘dsp’ undeclared (first use in this function) dsp->compute_safe_ssd_integral_image(ii + starty_safe*ii_linesize_32 + startx_safe, ii_linesize_32, ^ libavfilter/vf_nlmeans.c:294:9: note: each undeclared identifier is reported only once for each function it appears in libavfilter/vf_nlmeans.c: At top level: libavfilter/vf_nlmeans.c:153:13: warning: ‘compute_safe_ssd_integral_image_c’ defined but not used [-Wunused-function] static void compute_safe_ssd_integral_image_c(uint32_t *dst, int dst_linesize_32, ^ make: *** [libavfilter/vf_nlmeans.o] Error 1 make: *** Waiting for unfinished jobs.... [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Everything should be made as simple as possible, but not simpler. -- Albert Einstein
signature.asc
Description: PGP signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel