Overall speed appears to be 1.1x faster with no noticeable quality impact. --- libavfilter/vf_nlmeans.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/libavfilter/vf_nlmeans.c b/libavfilter/vf_nlmeans.c index f37f1183f7..201e4feb41 100644 --- a/libavfilter/vf_nlmeans.c +++ b/libavfilter/vf_nlmeans.c @@ -40,8 +40,8 @@ #include "video.h" struct weighted_avg { - double total_weight; - double sum; + float total_weight; + float sum; }; #define WEIGHT_LUT_NBITS 9 @@ -63,8 +63,8 @@ typedef struct NLMeansContext { ptrdiff_t ii_lz_32; // linesize in 32-bit units of the integral image struct weighted_avg *wa; // weighted average of every pixel ptrdiff_t wa_linesize; // linesize for wa in struct size unit - double weight_lut[WEIGHT_LUT_SIZE]; // lookup table mapping (scaled) patch differences to their associated weights - double pdiff_lut_scale; // scale factor for patch differences before looking into the LUT + float weight_lut[WEIGHT_LUT_SIZE]; // lookup table mapping (scaled) patch differences to their associated weights + float pdiff_lut_scale; // scale factor for patch differences before looking into the LUT int max_meaningful_diff; // maximum difference considered (if the patch difference is too high we ignore the pixel) NLMeansDSPContext dsp; } NLMeansContext; @@ -206,7 +206,7 @@ static void compute_safe_ssd_integral_image_c(uint32_t *dst, ptrdiff_t dst_lines * @param w width to compute * @param h height to compute */ -static inline void compute_unsafe_ssd_integral_image(uint32_t *dst, ptrdiff_t dst_linesize_32, +static void compute_unsafe_ssd_integral_image(uint32_t *dst, ptrdiff_t dst_linesize_32, int startx, int starty, const uint8_t *src, ptrdiff_t linesize, int offx, int offy, int r, int sw, int sh, @@ -402,7 +402,7 @@ static int nlmeans_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs const int patch_diff_sq = get_integral_patch_value(td->ii_start, s->ii_lz_32, x, y, td->p); if (patch_diff_sq < s->max_meaningful_diff) { const int weight_lut_idx = patch_diff_sq * s->pdiff_lut_scale; - const double weight = s->weight_lut[weight_lut_idx]; // exp(-patch_diff_sq * s->pdiff_scale) + const float weight = s->weight_lut[weight_lut_idx]; // exp(-patch_diff_sq * s->pdiff_scale) wa[x].total_weight += weight; wa[x].sum += weight * src[x]; } -- 2.17.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel