Hi! This patch fixes the other bug I've mentioned. Subtracting 0x1p32 doesn't work in all cases correctly, so this patch changes it to do what we do for scalar -O2 -m32 -msse2 -mfpmath=sse double -> uint and float -> uint conversions, in particular subtract just 0x1p31 instead of 0x1p32, doing the comparison using signalling instead of non-signalling insn (after all, trying to convert qNaN to unsigned int is undefined behavior) which means we can emit it even with just -msse2, and at the end xoring in 0x80000000 into the integer if the float/double was bigger or equal than 0x1p31.
Ok for trunk? 2011-11-03 Jakub Jelinek <ja...@redhat.com> * config/i386/i386.c (ix86_expand_adjust_ufix_to_sfix_si): Add XORP argument. Subtract 0x1p31 instead of 0x1p32. Use normal signalling comparison instead of non-signalling. Store into *XORP pseudo holding 0x80000000 integers if 0x1p31 has been subtracted and 0 otherwise. * config/i386/i386-protos.h (ix86_expand_adjust_ufix_to_sfix_si): Adjust prototype. * config/i386/sse.md (fixuns_trunc<mode><sseintvecmodelower>2): Enable already for TARGET_SSE2. Xor in vector initialized by ix86_expand_adjust_ufix_to_sfix_si at the end. (vec_pack_ufix_trunc_<mode>): Likewise. * gcc.dg/torture/vec-cvt-1.c: Enable flttointtestui test. --- gcc/config/i386/i386.c.jj 2011-11-03 17:11:27.000000000 +0100 +++ gcc/config/i386/i386.c 2011-11-03 21:59:21.000000000 +0100 @@ -17018,16 +17018,17 @@ ix86_expand_convert_uns_sisf_sse (rtx ta /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc* pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*. - This is done by subtracting 0x1p32 from VAL if VAL is greater or equal - (non-signalling) than 0x1p31. */ + This is done by doing just signed conversion if < 0x1p31, and otherwise by + subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */ rtx -ix86_expand_adjust_ufix_to_sfix_si (rtx val) +ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp) { - REAL_VALUE_TYPE MTWO32r, TWO31r; - rtx two31r, mtwo32r, tmp[3]; + REAL_VALUE_TYPE TWO31r; + rtx two31r, tmp[4]; enum machine_mode mode = GET_MODE (val); enum machine_mode scalarmode = GET_MODE_INNER (mode); + enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode; rtx (*cmp) (rtx, rtx, rtx, rtx); int i; @@ -17037,22 +17038,33 @@ ix86_expand_adjust_ufix_to_sfix_si (rtx two31r = const_double_from_real_value (TWO31r, scalarmode); two31r = ix86_build_const_vector (mode, 1, two31r); two31r = force_reg (mode, two31r); - real_ldexp (&MTWO32r, &dconstm1, 32); - mtwo32r = const_double_from_real_value (MTWO32r, scalarmode); - mtwo32r = ix86_build_const_vector (mode, 1, mtwo32r); - mtwo32r = force_reg (mode, mtwo32r); switch (mode) { - case V8SFmode: cmp = gen_avx_cmpv8sf3; break; - case V4SFmode: cmp = gen_avx_cmpv4sf3; break; - case V4DFmode: cmp = gen_avx_cmpv4df3; break; - case V2DFmode: cmp = gen_avx_cmpv2df3; break; + case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break; + case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break; + case V4DFmode: cmp = gen_avx_maskcmpv4df3; break; + case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break; default: gcc_unreachable (); } - emit_insn (cmp (tmp[0], val, two31r, GEN_INT (29))); - tmp[1] = expand_simple_binop (mode, AND, tmp[0], mtwo32r, tmp[1], + tmp[3] = gen_rtx_LE (mode, two31r, val); + emit_insn (cmp (tmp[0], two31r, val, tmp[3])); + tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1], 0, OPTAB_DIRECT); - return expand_simple_binop (mode, PLUS, val, tmp[1], tmp[2], + if (intmode == V4SImode || TARGET_AVX2) + *xorp = expand_simple_binop (intmode, ASHIFT, + gen_lowpart (intmode, tmp[0]), + GEN_INT (31), NULL_RTX, 0, + OPTAB_DIRECT); + else + { + rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31); + two31 = ix86_build_const_vector (intmode, 1, two31); + *xorp = expand_simple_binop (intmode, AND, + gen_lowpart (intmode, tmp[0]), + two31, NULL_RTX, 0, + OPTAB_DIRECT); + } + return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2], 0, OPTAB_DIRECT); } --- gcc/config/i386/i386-protos.h.jj 2011-11-03 16:11:20.000000000 +0100 +++ gcc/config/i386/i386-protos.h 2011-11-03 20:48:48.000000000 +0100 @@ -109,7 +109,7 @@ extern void ix86_expand_convert_uns_sixf extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx); extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx); extern void ix86_expand_convert_sign_didf_sse (rtx, rtx); -extern rtx ix86_expand_adjust_ufix_to_sfix_si (rtx); +extern rtx ix86_expand_adjust_ufix_to_sfix_si (rtx, rtx *); extern enum ix86_fpcmp_strategy ix86_fp_comparison_strategy (enum rtx_code); extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode, rtx[]); --- gcc/config/i386/sse.md.jj 2011-11-03 17:25:22.000000000 +0100 +++ gcc/config/i386/sse.md 2011-11-03 22:09:54.000000000 +0100 @@ -2330,10 +2330,13 @@ (define_insn "fix_truncv4sfv4si2" (define_expand "fixuns_trunc<mode><sseintvecmodelower>2" [(match_operand:<sseintvecmode> 0 "register_operand" "") (match_operand:VF1 1 "register_operand" "")] - "TARGET_AVX" + "TARGET_SSE2" { - rtx tmp = ix86_expand_adjust_ufix_to_sfix_si (operands[1]); - emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); + rtx tmp[3]; + tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]); + tmp[1] = gen_reg_rtx (<sseintvecmode>mode); + emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0])); + emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2])); DONE; }) @@ -3120,12 +3123,29 @@ (define_expand "vec_pack_ufix_trunc_<mod [(match_operand:<ssepackfltmode> 0 "register_operand" "") (match_operand:VF2 1 "register_operand" "") (match_operand:VF2 2 "register_operand" "")] - "TARGET_AVX" + "TARGET_SSE2" { - rtx tmp[2]; - tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1]); - tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2]); - emit_insn (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp[0], tmp[1])); + rtx tmp[7]; + tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]); + tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]); + tmp[4] = gen_reg_rtx (<ssepackfltmode>mode); + emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1])); + if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2) + { + tmp[5] = gen_reg_rtx (<ssepackfltmode>mode); + ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0); + } + else + { + tmp[5] = gen_reg_rtx (V8SFmode); + ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]), + gen_lowpart (V8SFmode, tmp[3]), 0); + tmp[5] = gen_lowpart (V8SImode, tmp[5]); + } + tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5], + operands[0], 0, OPTAB_DIRECT); + if (tmp[6] != operands[0]) + emit_move_insn (operands[0], tmp[6]); DONE; }) --- gcc/testsuite/gcc.dg/torture/vec-cvt-1.c.jj 2011-11-03 17:43:11.000000000 +0100 +++ gcc/testsuite/gcc.dg/torture/vec-cvt-1.c 2011-11-03 21:39:54.000000000 +0100 @@ -197,7 +197,7 @@ main () flttointtestsl (); flttointtestuc (); flttointtestus (); -// flttointtestui (); + flttointtestui (); flttointtestul (); inttoflttestsc (); inttoflttestss (); Jakub