Use doubles for accumulating floats. Signed-off-by: Paul B Mahol <one...@gmail.com> --- libavfilter/ssim.h | 2 +- libavfilter/vf_ssim.c | 18 +++++------ libavfilter/x86/vf_ssim.asm | 36 ++++++++++++++-------- libavfilter/x86/vf_ssim_init.c | 2 +- tests/ref/fate/filter-refcmp-ssim-rgb | 44 +++++++++++++-------------- tests/ref/fate/filter-refcmp-ssim-yuv | 26 ++++++++-------- 6 files changed, 70 insertions(+), 58 deletions(-)
diff --git a/libavfilter/ssim.h b/libavfilter/ssim.h index ac0395a22a..a6a41aabe6 100644 --- a/libavfilter/ssim.h +++ b/libavfilter/ssim.h @@ -28,7 +28,7 @@ typedef struct SSIMDSPContext { void (*ssim_4x4_line)(const uint8_t *buf, ptrdiff_t buf_stride, const uint8_t *ref, ptrdiff_t ref_stride, int (*sums)[4], int w); - float (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w); + double (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w); } SSIMDSPContext; void ff_ssim_init_x86(SSIMDSPContext *dsp); diff --git a/libavfilter/vf_ssim.c b/libavfilter/vf_ssim.c index c08fbcdcc2..17dce8e8e8 100644 --- a/libavfilter/vf_ssim.c +++ b/libavfilter/vf_ssim.c @@ -55,13 +55,13 @@ typedef struct SSIMContext { uint64_t nb_frames; double ssim[4], ssim_total; char comps[4]; - float coefs[4]; + double coefs[4]; uint8_t rgba_map[4]; int planewidth[4]; int planeheight[4]; int *temp; int is_rgb; - float (*ssim_plane)(SSIMDSPContext *dsp, + double (*ssim_plane)(SSIMDSPContext *dsp, uint8_t *main, int main_stride, uint8_t *ref, int ref_stride, int width, int height, void *temp, @@ -206,9 +206,9 @@ static float ssim_endn_16bit(const int64_t (*sum0)[4], const int64_t (*sum1)[4], return ssim; } -static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width) +static double ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width) { - float ssim = 0.0; + double ssim = 0.0; int i; for (i = 0; i < width; i++) @@ -221,14 +221,14 @@ static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int widt #define SUM_LEN(w) (((w) >> 2) + 3) -static float ssim_plane_16bit(SSIMDSPContext *dsp, +static double ssim_plane_16bit(SSIMDSPContext *dsp, uint8_t *main, int main_stride, uint8_t *ref, int ref_stride, int width, int height, void *temp, int max) { int z = 0, y; - float ssim = 0.0; + double ssim = 0.0; int64_t (*sum0)[4] = temp; int64_t (*sum1)[4] = sum0 + SUM_LEN(width); @@ -249,14 +249,14 @@ static float ssim_plane_16bit(SSIMDSPContext *dsp, return ssim / ((height - 1) * (width - 1)); } -static float ssim_plane(SSIMDSPContext *dsp, +static double ssim_plane(SSIMDSPContext *dsp, uint8_t *main, int main_stride, uint8_t *ref, int ref_stride, int width, int height, void *temp, int max) { int z = 0, y; - float ssim = 0.0; + double ssim = 0.0; int (*sum0)[4] = temp; int (*sum1)[4] = sum0 + SUM_LEN(width); @@ -288,7 +288,7 @@ static int do_ssim(FFFrameSync *fs) SSIMContext *s = ctx->priv; AVFrame *master, *ref; AVDictionary **metadata; - float c[4], ssimv = 0.0; + double c[4], ssimv = 0.0; int ret, i; ret = ff_framesync_dualinput_get(fs, &master, &ref); diff --git a/libavfilter/x86/vf_ssim.asm b/libavfilter/x86/vf_ssim.asm index 3293e66701..4cd6293b59 100644 --- a/libavfilter/x86/vf_ssim.asm +++ b/libavfilter/x86/vf_ssim.asm @@ -169,8 +169,9 @@ SSIM_4X4_LINE 8 %endif INIT_XMM sse4 -cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w +cglobal ssim_end_line, 3, 3, 7, sum0, sum1, w pxor m0, m0 + pxor m6, m6 .loop: mova m1, [sum0q+mmsize*0] mova m2, [sum0q+mmsize*1] @@ -214,34 +215,45 @@ cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w mulps m4, m5 mulps m3, m1 divps m4, m3 ; ssim_endl - addps m0, m4 ; ssim + mova m5, m4 + cvtps2pd m3, m5 + movhlps m5, m5 + cvtps2pd m5, m5 + addpd m0, m3 ; ssim + addpd m6, m5 ; ssim add sum0q, mmsize*4 add sum1q, mmsize*4 sub wd, 4 jg .loop - ; subps the ones we added too much + ; subpd the ones we added too much test wd, wd jz .end add wd, 4 + test wd, 3 + jz .skip3 test wd, 2 jz .skip2 - psrldq m4, 8 -.skip2: test wd, 1 jz .skip1 - psrldq m4, 4 +.skip3: + psrldq m5, 8 + subpd m6, m5 + jmp .end +.skip2: + psrldq m3, 8 + subpd m0, m3 + jmp .end .skip1: - subps m0, m4 + psrldq m5, 8 + subpd m6, m5 .end: + addpd m0, m6 movhlps m4, m0 - addps m0, m4 - movss m4, m0 - shufps m0, m0, 1 - addss m0, m4 + addpd m0, m4 %if ARCH_X86_32 - movss r0m, m0 + movsd r0m, m0 fld r0mp %endif RET diff --git a/libavfilter/x86/vf_ssim_init.c b/libavfilter/x86/vf_ssim_init.c index 599c928403..cbaa20ef16 100644 --- a/libavfilter/x86/vf_ssim_init.c +++ b/libavfilter/x86/vf_ssim_init.c @@ -28,7 +28,7 @@ void ff_ssim_4x4_line_ssse3(const uint8_t *buf, ptrdiff_t buf_stride, void ff_ssim_4x4_line_xop (const uint8_t *buf, ptrdiff_t buf_stride, const uint8_t *ref, ptrdiff_t ref_stride, int (*sums)[4], int w); -float ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w); +double ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w); void ff_ssim_init_x86(SSIMDSPContext *dsp) { diff --git a/tests/ref/fate/filter-refcmp-ssim-rgb b/tests/ref/fate/filter-refcmp-ssim-rgb index 8c23c60b37..cb3b5f2e12 100644 --- a/tests/ref/fate/filter-refcmp-ssim-rgb +++ b/tests/ref/fate/filter-refcmp-ssim-rgb @@ -1,30 +1,30 @@ frame:0 pts:0 pts_time:0 +lavfi.ssim.R=0.73 +lavfi.ssim.G=0.77 +lavfi.ssim.B=0.90 +lavfi.ssim.All=0.80 +lavfi.ssim.dB=7.01 +frame:1 pts:1 pts_time:1 lavfi.ssim.R=0.72 lavfi.ssim.G=0.76 -lavfi.ssim.B=0.89 -lavfi.ssim.All=0.79 -lavfi.ssim.dB=6.74 -frame:1 pts:1 pts_time:1 -lavfi.ssim.R=0.70 -lavfi.ssim.G=0.74 -lavfi.ssim.B=0.85 -lavfi.ssim.All=0.77 -lavfi.ssim.dB=6.31 +lavfi.ssim.B=0.86 +lavfi.ssim.All=0.78 +lavfi.ssim.dB=6.56 frame:2 pts:2 pts_time:2 -lavfi.ssim.R=0.71 +lavfi.ssim.R=0.72 +lavfi.ssim.G=0.76 +lavfi.ssim.B=0.85 +lavfi.ssim.All=0.78 +lavfi.ssim.dB=6.53 +frame:3 pts:3 pts_time:3 +lavfi.ssim.R=0.72 lavfi.ssim.G=0.75 lavfi.ssim.B=0.84 +lavfi.ssim.All=0.77 +lavfi.ssim.dB=6.35 +frame:4 pts:4 pts_time:4 +lavfi.ssim.R=0.72 +lavfi.ssim.G=0.75 +lavfi.ssim.B=0.82 lavfi.ssim.All=0.76 lavfi.ssim.dB=6.29 -frame:3 pts:3 pts_time:3 -lavfi.ssim.R=0.70 -lavfi.ssim.G=0.73 -lavfi.ssim.B=0.83 -lavfi.ssim.All=0.76 -lavfi.ssim.dB=6.11 -frame:4 pts:4 pts_time:4 -lavfi.ssim.R=0.71 -lavfi.ssim.G=0.74 -lavfi.ssim.B=0.80 -lavfi.ssim.All=0.75 -lavfi.ssim.dB=6.05 diff --git a/tests/ref/fate/filter-refcmp-ssim-yuv b/tests/ref/fate/filter-refcmp-ssim-yuv index 5c8ffb9483..209c8bd600 100644 --- a/tests/ref/fate/filter-refcmp-ssim-yuv +++ b/tests/ref/fate/filter-refcmp-ssim-yuv @@ -1,30 +1,30 @@ frame:0 pts:0 pts_time:0 -lavfi.ssim.Y=0.80 +lavfi.ssim.Y=0.82 lavfi.ssim.U=0.76 lavfi.ssim.V=0.69 -lavfi.ssim.All=0.76 -lavfi.ssim.dB=6.25 +lavfi.ssim.All=0.77 +lavfi.ssim.dB=6.37 frame:1 pts:1 pts_time:1 -lavfi.ssim.Y=0.80 +lavfi.ssim.Y=0.81 lavfi.ssim.U=0.73 lavfi.ssim.V=0.68 -lavfi.ssim.All=0.75 -lavfi.ssim.dB=6.08 +lavfi.ssim.All=0.76 +lavfi.ssim.dB=6.20 frame:2 pts:2 pts_time:2 -lavfi.ssim.Y=0.80 +lavfi.ssim.Y=0.82 lavfi.ssim.U=0.73 lavfi.ssim.V=0.68 -lavfi.ssim.All=0.75 -lavfi.ssim.dB=6.10 +lavfi.ssim.All=0.76 +lavfi.ssim.dB=6.22 frame:3 pts:3 pts_time:3 -lavfi.ssim.Y=0.79 +lavfi.ssim.Y=0.81 lavfi.ssim.U=0.72 lavfi.ssim.V=0.68 lavfi.ssim.All=0.75 -lavfi.ssim.dB=5.94 +lavfi.ssim.dB=6.06 frame:4 pts:4 pts_time:4 -lavfi.ssim.Y=0.80 +lavfi.ssim.Y=0.81 lavfi.ssim.U=0.72 lavfi.ssim.V=0.68 lavfi.ssim.All=0.75 -lavfi.ssim.dB=5.97 +lavfi.ssim.dB=6.05 -- 2.17.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".