On 9/12/18, James Almer <jamr...@gmail.com> wrote: > On 9/12/2018 7:42 AM, Paul B Mahol wrote: >> Signed-off-by: Paul B Mahol <one...@gmail.com> >> --- >> libavutil/float_dsp.c | 9 +++++++++ >> libavutil/float_dsp.h | 16 ++++++++++++++++ >> 2 files changed, 25 insertions(+) > > Ah, i had a patch like this lying around but never got to send it. > I still have the x86 simd implementation, though, so I'll send that later. > >> >> diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c >> index 1d4911d815..6e28d71b57 100644 >> --- a/libavutil/float_dsp.c >> +++ b/libavutil/float_dsp.c >> @@ -32,6 +32,14 @@ static void vector_fmul_c(float *dst, const float >> *src0, const float *src1, >> dst[i] = src0[i] * src1[i]; >> } >> >> +static void vector_dmul_c(double *dst, const double *src0, const double >> *src1, >> + int len) >> +{ >> + int i; >> + for (i = 0; i < len; i++) >> + dst[i] = src0[i] * src1[i]; >> +} >> + >> static void vector_fmac_scalar_c(float *dst, const float *src, float mul, >> int len) >> { >> @@ -131,6 +139,7 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int >> bit_exact) >> return NULL; >> >> fdsp->vector_fmul = vector_fmul_c; >> + fdsp->vector_dmul = vector_dmul_c; >> fdsp->vector_fmac_scalar = vector_fmac_scalar_c; >> fdsp->vector_fmul_scalar = vector_fmul_scalar_c; >> fdsp->vector_dmac_scalar = vector_dmac_scalar_c; >> diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h >> index 2c24d93471..9c664592bd 100644 >> --- a/libavutil/float_dsp.h >> +++ b/libavutil/float_dsp.h >> @@ -173,6 +173,22 @@ typedef struct AVFloatDSPContext { >> * @return sum of elementwise products >> */ >> float (*scalarproduct_float)(const float *v1, const float *v2, int >> len); >> + >> + /** >> + * Calculate the entry wise product of two vectors of doubles and >> store the result in >> + * a vector of doubles. >> + * >> + * @param dst output vector >> + * constraints: 32-byte aligned >> + * @param src0 first input vector >> + * constraints: 32-byte aligned >> + * @param src1 second input vector >> + * constraints: 32-byte aligned >> + * @param len number of elements in the input >> + * constraints: multiple of 16 > > Why not 8?
It is what float variant uses. And for good reason. > >> + */ >> + void (*vector_dmul)(double *dst, const double *src0, const double >> *src1, >> + int len); >> } AVFloatDSPContext; >> >> /** >> > > LGTM. > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel