On Wed, Jan 8, 2020 at 9:09 AM Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > This is very similar to the previous PR93141 addv<mode>4 half and > improves signed __builtin_sub_overflow on double-words rather than > __builtin_add_overflow. > > I have left out the uaddv<mode>4 double-word stuff, because I ran into > issues with it - as the pattern uses (set (reg:CC flags) (compare:CC (reg:TI) > (reg:TI))) > as the first set, ifcvt.c (canonicalize_condition it calls) extracts > the TImode comparison arguments from it, but as we don't have a cmpti3 > pattern, noce_try_store_flag fails and we don't manage to convert branchy > code into setc. Even for smaller modes ifcvt in that case does weird > things, after the sub instruction which sets the flags too it emits > a cmp with the same arguments and only postreload we notice the compare is > redundant and remove it. So, dunno if we want to improve somehow ifcvt > and for conditions where we'd look through a multiple set instructions > somehow try harder to reuse the original instruction, or if usub<dwi>4 > should not use compare:CC of the operands and instead use some obfuscation > to only set CCCmode to prevent ifcvt from punting on it. > > Anyway, this part works, bootstrapped/regtested on x86_64-linux and > i686-linux, ok for trunk? > > 2020-01-07 Jakub Jelinek <ja...@redhat.com> > > PR target/93141 > (subv<mode>4): Use SWIDWI iterator instead of SWI. Use > <general_hilo_operand> instead of <general_operand>. Use > CONST_SCALAR_INT_P instead of CONST_INT_P. > (*subv<mode>4_1): Rename to ... > (subv<mode>4_1): ... this. > (*subv<dwi>4_doubleword, *addv<dwi>4_doubleword_1): New > define_insn_and_split patterns. > (*subv<mode>4_overflow_1, *addv<mode>4_overflow_2): New define_insn > patterns. > > * gcc.target/i386/pr93141-1.c: Add tests with constants that have MSB > of the low half of the constant set. > * gcc.target/i386/pr93141-2.c: New test.
LGTM. Thanks, Uros. > --- gcc/config/i386/i386.md.jj 2020-01-07 08:01:22.618613702 +0100 > +++ gcc/config/i386/i386.md 2020-01-07 10:07:01.770587027 +0100 > @@ -6569,16 +6569,17 @@ (define_insn "*subsi_2_zext" > ;; Subtract with jump on overflow. > (define_expand "subv<mode>4" > [(parallel [(set (reg:CCO FLAGS_REG) > - (eq:CCO (minus:<DWI> > - (sign_extend:<DWI> > - (match_operand:SWI 1 "nonimmediate_operand")) > - (match_dup 4)) > - (sign_extend:<DWI> > - (minus:SWI (match_dup 1) > - (match_operand:SWI 2 > - "<general_operand>"))))) > - (set (match_operand:SWI 0 "register_operand") > - (minus:SWI (match_dup 1) (match_dup 2)))]) > + (eq:CCO > + (minus:<DWI> > + (sign_extend:<DWI> > + (match_operand:SWIDWI 1 "nonimmediate_operand")) > + (match_dup 4)) > + (sign_extend:<DWI> > + (minus:SWIDWI (match_dup 1) > + (match_operand:SWIDWI 2 > + "<general_hilo_operand>"))))) > + (set (match_operand:SWIDWI 0 "register_operand") > + (minus:SWIDWI (match_dup 1) (match_dup 2)))]) > (set (pc) (if_then_else > (eq (reg:CCO FLAGS_REG) (const_int 0)) > (label_ref (match_operand 3)) > @@ -6586,7 +6587,7 @@ (define_expand "subv<mode>4" > "" > { > ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands); > - if (CONST_INT_P (operands[2])) > + if (CONST_SCALAR_INT_P (operands[2])) > operands[4] = operands[2]; > else > operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]); > @@ -6608,7 +6609,7 @@ (define_insn "*subv<mode>4" > [(set_attr "type" "alu") > (set_attr "mode" "<MODE>")]) > > -(define_insn "*subv<mode>4_1" > +(define_insn "subv<mode>4_1" > [(set (reg:CCO FLAGS_REG) > (eq:CCO (minus:<DWI> > (sign_extend:<DWI> > @@ -6633,6 +6634,162 @@ (define_insn "*subv<mode>4_1" > (const_string "4")] > (const_string "<MODE_SIZE>")))]) > > +(define_insn_and_split "*subv<dwi>4_doubleword" > + [(set (reg:CCO FLAGS_REG) > + (eq:CCO > + (minus:<QWI> > + (sign_extend:<QWI> > + (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")) > + (sign_extend:<QWI> > + (match_operand:<DWI> 2 "x86_64_hilo_general_operand" > "r<di>,o"))) > + (sign_extend:<QWI> > + (minus:<DWI> (match_dup 1) (match_dup 2))))) > + (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r") > + (minus:<DWI> (match_dup 1) (match_dup 2)))] > + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)" > + "#" > + "reload_completed" > + [(parallel [(set (reg:CC FLAGS_REG) > + (compare:CC (match_dup 1) (match_dup 2))) > + (set (match_dup 0) > + (minus:DWIH (match_dup 1) (match_dup 2)))]) > + (parallel [(set (reg:CCO FLAGS_REG) > + (eq:CCO > + (minus:<DWI> > + (minus:<DWI> > + (sign_extend:<DWI> (match_dup 4)) > + (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))) > + (sign_extend:<DWI> (match_dup 5))) > + (sign_extend:<DWI> > + (minus:DWIH > + (minus:DWIH > + (match_dup 4) > + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) > + (match_dup 5))))) > + (set (match_dup 3) > + (minus:DWIH > + (minus:DWIH > + (match_dup 4) > + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) > + (match_dup 5)))])] > +{ > + split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]); > +}) > + > +(define_insn_and_split "*subv<dwi>4_doubleword_1" > + [(set (reg:CCO FLAGS_REG) > + (eq:CCO > + (minus:<QWI> > + (sign_extend:<QWI> > + (match_operand:<DWI> 1 "nonimmediate_operand" "0")) > + (match_operand:<QWI> 3 "const_scalar_int_operand" "")) > + (sign_extend:<QWI> > + (minus:<DWI> > + (match_dup 1) > + (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>"))))) > + (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro") > + (minus:<DWI> (match_dup 1) (match_dup 2)))] > + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands) > + && CONST_SCALAR_INT_P (operands[2]) > + && rtx_equal_p (operands[2], operands[3])" > + "#" > + "reload_completed" > + [(parallel [(set (reg:CC FLAGS_REG) > + (compare:CC (match_dup 1) (match_dup 2))) > + (set (match_dup 0) > + (minus:DWIH (match_dup 1) (match_dup 2)))]) > + (parallel [(set (reg:CCO FLAGS_REG) > + (eq:CCO > + (minus:<DWI> > + (minus:<DWI> > + (sign_extend:<DWI> (match_dup 4)) > + (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))) > + (match_dup 5)) > + (sign_extend:<DWI> > + (minus:DWIH > + (minus:DWIH > + (match_dup 4) > + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) > + (match_dup 5))))) > + (set (match_dup 3) > + (minus:DWIH > + (minus:DWIH > + (match_dup 4) > + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) > + (match_dup 5)))])] > +{ > + split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]); > + if (operands[2] == const0_rtx) > + { > + emit_insn (gen_subv<mode>4_1 (operands[3], operands[4], operands[5], > + operands[5])); > + DONE; > + } > +}) > + > +(define_insn "*subv<mode>4_overflow_1" > + [(set (reg:CCO FLAGS_REG) > + (eq:CCO > + (minus:<DWI> > + (minus:<DWI> > + (sign_extend:<DWI> > + (match_operand:SWI 1 "nonimmediate_operand" "%0,0")) > + (match_operator:<DWI> 4 "ix86_carry_flag_operator" > + [(match_operand 3 "flags_reg_operand") (const_int 0)])) > + (sign_extend:<DWI> > + (match_operand:SWI 2 "<general_sext_operand>" "rWe,m"))) > + (sign_extend:<DWI> > + (minus:SWI > + (minus:SWI > + (match_dup 1) > + (match_operator:SWI 5 "ix86_carry_flag_operator" > + [(match_dup 3) (const_int 0)])) > + (match_dup 2))))) > + (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r") > + (minus:SWI > + (minus:SWI > + (match_dup 1) > + (match_op_dup 5 [(match_dup 3) (const_int 0)])) > + (match_dup 2)))] > + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)" > + "sbb{<imodesuffix>}\t{%2, %0|%0, %2}" > + [(set_attr "type" "alu") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "*subv<mode>4_overflow_2" > + [(set (reg:CCO FLAGS_REG) > + (eq:CCO > + (minus:<DWI> > + (minus:<DWI> > + (sign_extend:<DWI> > + (match_operand:SWI 1 "nonimmediate_operand" "%0")) > + (match_operator:<DWI> 4 "ix86_carry_flag_operator" > + [(match_operand 3 "flags_reg_operand") (const_int 0)])) > + (match_operand:<DWI> 6 "const_int_operand" "")) > + (sign_extend:<DWI> > + (minus:SWI > + (minus:SWI > + (match_dup 1) > + (match_operator:SWI 5 "ix86_carry_flag_operator" > + [(match_dup 3) (const_int 0)])) > + (match_operand:SWI 2 "x86_64_immediate_operand" "e"))))) > + (set (match_operand:SWI 0 "nonimmediate_operand" "=rm") > + (minus:SWI > + (minus:SWI > + (match_dup 1) > + (match_op_dup 5 [(match_dup 3) (const_int 0)])) > + (match_dup 2)))] > + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands) > + && CONST_INT_P (operands[2]) > + && INTVAL (operands[2]) == INTVAL (operands[6])" > + "sbb{<imodesuffix>}\t{%2, %0|%0, %2}" > + [(set_attr "type" "alu") > + (set_attr "mode" "<MODE>") > + (set (attr "length_immediate") > + (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") > + (const_string "1") > + (const_string "4")))]) > + > (define_expand "usubv<mode>4" > [(parallel [(set (reg:CC FLAGS_REG) > (compare:CC > --- gcc/testsuite/gcc.target/i386/pr93141-1.c.jj 2020-01-05 > 13:51:34.452627004 +0100 > +++ gcc/testsuite/gcc.target/i386/pr93141-1.c 2020-01-07 10:45:36.213489037 > +0100 > @@ -2,15 +2,17 @@ > /* { dg-do compile } */ > /* { dg-options "-O2 -masm=att" } */ > /* { dg-final { scan-assembler-not "cmp\[lq]\t" } } */ > -/* { dg-final { scan-assembler-times "setc\t%" 3 } } */ > -/* { dg-final { scan-assembler-times "seto\t%" 5 } } */ > -/* { dg-final { scan-assembler-times "adc\[lq]\t" 5 } } */ > +/* { dg-final { scan-assembler-times "setc\t%" 5 } } */ > +/* { dg-final { scan-assembler-times "seto\t%" 7 } } */ > +/* { dg-final { scan-assembler-times "adc\[lq]\t" 9 } } */ > > #ifdef __x86_64__ > typedef unsigned __int128 U; > +typedef unsigned long long HU; > typedef signed __int128 S; > #else > typedef unsigned long long U; > +typedef unsigned int HU; > typedef signed long long S; > #endif > int o; > @@ -81,3 +83,39 @@ garply (S x) > | (S) 0xbeedead, &z); > return z; > } > + > +S > +waldo (S x) > +{ > + S z; > + o = __builtin_add_overflow (x, (S) ((((S) 0xdeadbee) << (sizeof (S) * > __CHAR_BIT__ / 2)) > + | -(HU) 0xbeedead), &z); > + return z; > +} > + > +S > +fred (S x) > +{ > + S z; > + o = __builtin_add_overflow (x, (S) ((-(((S) 0xdeadbee) << (sizeof (S) * > __CHAR_BIT__ / 2))) > + | -(HU) 0xbeedead), &z); > + return z; > +} > + > +U > +plugh (U x) > +{ > + U z; > + o = __builtin_add_overflow (x, (U) ((((U) 0xdeadbee) << (sizeof (U) * > __CHAR_BIT__ / 2)) > + | -(HU) 0xbeedead), &z); > + return z; > +} > + > +U > +xyzzy (U x) > +{ > + U z; > + o = __builtin_add_overflow (x, (U) ((-(((U) 0xdeadbee) << (sizeof (U) * > __CHAR_BIT__ / 2))) > + | -(HU) 0xbeedead), &z); > + return z; > +} > --- gcc/testsuite/gcc.target/i386/pr93141-2.c.jj 2020-01-07 > 10:30:53.966831451 +0100 > +++ gcc/testsuite/gcc.target/i386/pr93141-2.c 2020-01-07 10:43:27.602433611 > +0100 > @@ -0,0 +1,78 @@ > +/* PR target/93141 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -masm=att" } */ > +/* { dg-final { scan-assembler-not "cmp\[lq]\t" } } */ > +/* { dg-final { scan-assembler-not "adc\[lq]\t" } } */ > +/* { dg-final { scan-assembler-times "seto\t%" 7 } } */ > +/* { dg-final { scan-assembler-times "sbb\[lq]\t" 5 } } */ > + > +#ifdef __x86_64__ > +typedef unsigned __int128 U; > +typedef unsigned long long HU; > +typedef signed __int128 S; > +#else > +typedef unsigned long long U; > +typedef signed int HU; > +typedef signed long long S; > +#endif > +int o; > + > +S > +qux (S x, S y) > +{ > + S z; > + o = __builtin_sub_overflow (x, y, &z); > + return z; > +} > + > +S > +quux (S x) > +{ > + S z; > + o = __builtin_sub_overflow (x, ((S) 0xdeadbee) << (sizeof (S) * > __CHAR_BIT__ / 2), &z); > + return z; > +} > + > +S > +corge (S x) > +{ > + S z; > + o = __builtin_sub_overflow (x, (((S) 0xdeadbee) << (sizeof (S) * > __CHAR_BIT__ / 2)) > + | (S) 0xbeedead, &z); > + return z; > +} > + > +S > +grault (S x) > +{ > + S z; > + o = __builtin_sub_overflow (x, -((S) 0xdeadbee) << (sizeof (S) * > __CHAR_BIT__ / 2), &z); > + return z; > +} > + > +S > +garply (S x) > +{ > + S z; > + o = __builtin_sub_overflow (x, (-(((S) 0xdeadbee) << (sizeof (S) * > __CHAR_BIT__ / 2))) > + | (S) 0xbeedead, &z); > + return z; > +} > + > +S > +waldo (S x) > +{ > + S z; > + o = __builtin_sub_overflow (x, (S) ((((S) 0xdeadbee) << (sizeof (S) * > __CHAR_BIT__ / 2)) > + | -(HU) 0xbeedead), &z); > + return z; > +} > + > +S > +fred (S x) > +{ > + S z; > + o = __builtin_sub_overflow (x, (S) ((-(((S) 0xdeadbee) << (sizeof (S) * > __CHAR_BIT__ / 2))) > + | -(HU) 0xbeedead), &z); > + return z; > +} > > Jakub >