Hi! As the enhanced testcase shows, ix86_expand_floorceildf_32 doesn't emit correct ceil when honoring signed zeros, because it performs copysign, then comparison and then based on the comparison an optional addition of 1. The comment claims that it is essential to use x2 -= -1.0 instead of x2 += 1.0, but in reality we emit x2 += 1.0 anyway and it makes no difference on the sign of result if it is zero, as (unless rounding to negative infinity) both -1.0 - (-1.0) and -1.0 + 1.0 evaluate to +0.0 rather than -0.0 we need in this case.
I have no better ideas than to use another copysign, but it should be just one extra instruction, as we've already previously done copysign from the same source and thus hve already computed the x & signmask and so this patch just adds a {,v}por of that previously computed x & signmask into the result again. In this case we aren't really copying it always to positive, but all we want is handle the single problematic case when we compute positive 0 and need negative 0, for all others the oring won't really change the sign. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Or does anybody have some smarter idea? Note, not sure if floor isn't incorrect in the round to negative infinity case (but that would be preexisting issue). 2019-03-15 Jakub Jelinek <ja...@redhat.com> PR target/89726 * config/i386/i386.c (ix86_expand_floorceildf_32): In ceil compensation use x2 += 1 instead of x2 -= -1 and when honoring signed zeros, do another copysign after the compensation. * gcc.target/i386/fpprec-1.c (x): Add 6 new constants. (expect_round, expect_rint, expect_floor, expect_ceil, expect_trunc): Add expected results for them. --- gcc/config/i386/i386.c.jj 2019-03-14 23:44:27.862560139 +0100 +++ gcc/config/i386/i386.c 2019-03-15 15:07:54.607453430 +0100 @@ -45563,8 +45563,10 @@ ix86_expand_floorceildf_32 (rtx operand0 x2 -= 1; Compensate. Ceil: if (x2 < x) - x2 -= -1; - return x2; + x2 += 1; + if (HONOR_SIGNED_ZEROS (mode)) + x2 = copysign (x2, x); + return x2; */ machine_mode mode = GET_MODE (operand0); rtx xa, TWO52, tmp, one, res, mask; @@ -45590,17 +45592,16 @@ ix86_expand_floorceildf_32 (rtx operand0 /* xa = copysign (xa, operand1) */ ix86_sse_copysign_to_positive (xa, xa, res, mask); - /* generate 1.0 or -1.0 */ - one = force_reg (mode, - const_double_from_real_value (do_floor - ? dconst1 : dconstm1, mode)); + /* generate 1.0 */ + one = force_reg (mode, const_double_from_real_value (dconst1, mode)); /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); - /* We always need to subtract here to preserve signed zero. */ - tmp = expand_simple_binop (mode, MINUS, + tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS, xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); + if (!do_floor && HONOR_SIGNED_ZEROS (mode)) + ix86_sse_copysign_to_positive (tmp, tmp, res, mask); emit_move_insn (res, tmp); emit_label (label); --- gcc/testsuite/gcc.target/i386/fpprec-1.c.jj 2010-05-21 11:46:22.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/fpprec-1.c 2019-03-15 15:01:51.430345113 +0100 @@ -11,6 +11,9 @@ double x[] = { __builtin_nan(""), __buil 0x1.0000000000001p-1, 0x1.fffffffffffffp-2, 0x1.0000000000001p+0, 0x1.fffffffffffffp-1, 0x1.8000000000001p+0, 0x1.7ffffffffffffp+0, + -0x1.0000000000001p-1, -0x1.fffffffffffffp-2, + -0x1.0000000000001p+0, -0x1.fffffffffffffp-1, + -0x1.8000000000001p+0, -0x1.7ffffffffffffp+0, -0.0, 0.0, -0.5, 0.5, -1.0, 1.0, -1.5, 1.5, -2.0, 2.0, -2.5, 2.5 }; #define NUM (sizeof(x)/sizeof(double)) @@ -19,6 +22,7 @@ double expect_round[] = { __builtin_nan( -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023, -0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, + -1.0, -0.0, -1.0, -1.0, -2.0, -1.0, -0.0, 0.0, -1.0, 1.0, -1.0, 1.0, -2.0, 2.0, -2.0, 2.0, -3.0, 3.0 }; @@ -26,6 +30,7 @@ double expect_rint[] = { __builtin_nan(" -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023, -0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, + -1.0, -0.0, -1.0, -1.0, -2.0, -1.0, -0.0, 0.0, -0.0, 0.0, -1.0, 1.0, -2.0, 2.0, -2.0, 2.0, -2.0, 2.0 }; @@ -33,6 +38,7 @@ double expect_floor[] = { __builtin_nan( -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023, -1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, + -1.0, -1.0, -2.0, -1.0, -2.0, -2.0, -0.0, 0.0, -1.0, 0.0, -1.0, 1.0, -2.0, 1.0, -2.0, 2.0, -3.0, 2.0 }; @@ -40,6 +46,7 @@ double expect_ceil[] = { __builtin_nan(" -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023, -0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, + -0.0, -0.0, -1.0, -0.0, -1.0, -1.0, -0.0, 0.0, -0.0, 1.0, -1.0, 1.0, -1.0, 2.0, -2.0, 2.0, -2.0, 3.0 }; @@ -47,6 +54,7 @@ double expect_trunc[] = { __builtin_nan( -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023, -0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, + -0.0, -0.0, -1.0, -0.0, -1.0, -1.0, -0.0, 0.0, -0.0, 0.0, -1.0, 1.0, -1.0, 1.0, -2.0, 2.0, -2.0, 2.0 }; Jakub