On Sat, Jan 4, 2020 at 12:50 AM Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > As the following testcase shows, we generate quite bad code for > double-word __builtin_add_overflow on x86, we have add[dt]i3_doubleword > pattern and emit add[ql]; adc[ql];, but then we could just use setc/seto > or adc etc., but we instead perform a double-word comparison to set compute > the carry from the addition (or overflow). > The following patch on the testcase results in the attached changes for -m64 > (and -m32). Also, when the middle-end notices the optabs, it pattern > recognizes hand-written code, like e.g. in the #c0 testcase in the PR where > it does sum += prod; overflow += sum < prod; which the GIMPLE opts can > turn into overflow += __builtin_add_overflow (sum, prod, &sum);. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > Haven't done yet subv<mode>4 and usubv<mode>4 for the double-word modes, but > it ought to work similarly, just it will be more work. > > 2020-01-04 Jakub Jelinek <ja...@redhat.com> > > PR target/93141 > * config/i386/i386.md (SWIDWI): New mode iterator. > (DWI, dwi): Add TImode variants. > (addv<mode>4): Use SWIDWI iterator instead of SWI. Use > <general_hilo_operand> instead of <general_operand>. Use > CONST_SCALAR_INT_P instead of CONST_INT_P. > (*addv<mode>4_1): Rename to ... > (addv<mode>4_1): ... this. > (QWI): New mode attribute. > (*addv<dwi>4_doubleword, *addv<dwi>4_doubleword_1): New > define_insn_and_split patterns. > (*addv<mode>4_overflow_1, *addv<mode>4_overflow_2): New define_insn > patterns. > (uaddv<mode>4): Use SWIDWI iterator instead of SWI. Use > <general_hilo_operand> instead of <general_operand>. > (*addcarry<mode>_1): New define_insn. > (*add<dwi>3_doubleword_cc_overflow_1): New define_insn_and_split. > > * gcc.target/i386/pr93141-1.c: New test. > * gcc.dg/pr67089-6.c: Expect 16 ADD_OVERFLOW calls even on ia32.
LGTM, but I wonder if *addcarry<mode>_1 gets overmacroized, the insn condition is really hard to comprehend. Perhaps it should be written as a separate pattern for SImode and DImode instead? Thanks, Uros. > --- gcc/config/i386/i386.md.jj 2020-01-03 11:10:43.839511446 +0100 > +++ gcc/config/i386/i386.md 2020-01-03 20:55:39.152577178 +0100 > @@ -1036,6 +1036,9 @@ (define_mode_iterator SWIM248 [(HI "TARG > (define_mode_iterator DWI [(DI "!TARGET_64BIT") > (TI "TARGET_64BIT")]) > > +;; SWI and DWI together. > +(define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")]) > + > ;; GET_MODE_SIZE for selected modes. As GET_MODE_SIZE is not > ;; compile time constant, it is faster to use <MODE_SIZE> than > ;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on > @@ -1051,8 +1054,8 @@ (define_mode_attr MODE_SIZE [(QI "1") (H > (V4SF "16") (V8SF "32") (V16SF "64")]) > > ;; Double word integer modes as mode attribute. > -(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")]) > -(define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti")]) > +(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")]) > +(define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti") (TI "oi")]) > > ;; LEA mode corresponding to an integer mode > (define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")]) > @@ -6054,16 +6057,17 @@ (define_insn "*addqi_ext_2" > ;; Add with jump on overflow. > (define_expand "addv<mode>4" > [(parallel [(set (reg:CCO FLAGS_REG) > - (eq:CCO (plus:<DWI> > - (sign_extend:<DWI> > - (match_operand:SWI 1 "nonimmediate_operand")) > - (match_dup 4)) > - (sign_extend:<DWI> > - (plus:SWI (match_dup 1) > - (match_operand:SWI 2 > - "<general_operand>"))))) > - (set (match_operand:SWI 0 "register_operand") > - (plus:SWI (match_dup 1) (match_dup 2)))]) > + (eq:CCO > + (plus:<DWI> > + (sign_extend:<DWI> > + (match_operand:SWIDWI 1 "nonimmediate_operand")) > + (match_dup 4)) > + (sign_extend:<DWI> > + (plus:SWIDWI (match_dup 1) > + (match_operand:SWIDWI 2 > + "<general_hilo_operand>"))))) > + (set (match_operand:SWIDWI 0 "register_operand") > + (plus:SWIDWI (match_dup 1) (match_dup 2)))]) > (set (pc) (if_then_else > (eq (reg:CCO FLAGS_REG) (const_int 0)) > (label_ref (match_operand 3)) > @@ -6071,7 +6075,7 @@ (define_expand "addv<mode>4" > "" > { > ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands); > - if (CONST_INT_P (operands[2])) > + if (CONST_SCALAR_INT_P (operands[2])) > operands[4] = operands[2]; > else > operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]); > @@ -6093,7 +6097,7 @@ (define_insn "*addv<mode>4" > [(set_attr "type" "alu") > (set_attr "mode" "<MODE>")]) > > -(define_insn "*addv<mode>4_1" > +(define_insn "addv<mode>4_1" > [(set (reg:CCO FLAGS_REG) > (eq:CCO (plus:<DWI> > (sign_extend:<DWI> > @@ -6118,15 +6122,178 @@ (define_insn "*addv<mode>4_1" > (const_string "4")] > (const_string "<MODE_SIZE>")))]) > > +;; Quad word integer modes as mode attribute. > +(define_mode_attr QWI [(SI "TI") (DI "OI")]) > + > +(define_insn_and_split "*addv<dwi>4_doubleword" > + [(set (reg:CCO FLAGS_REG) > + (eq:CCO > + (plus:<QWI> > + (sign_extend:<QWI> > + (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")) > + (sign_extend:<QWI> > + (match_operand:<DWI> 2 "x86_64_hilo_general_operand" > "r<di>,o"))) > + (sign_extend:<QWI> > + (plus:<DWI> (match_dup 1) (match_dup 2))))) > + (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r") > + (plus:<DWI> (match_dup 1) (match_dup 2)))] > + "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)" > + "#" > + "reload_completed" > + [(parallel [(set (reg:CCC FLAGS_REG) > + (compare:CCC > + (plus:DWIH (match_dup 1) (match_dup 2)) > + (match_dup 1))) > + (set (match_dup 0) > + (plus:DWIH (match_dup 1) (match_dup 2)))]) > + (parallel [(set (reg:CCO FLAGS_REG) > + (eq:CCO > + (plus:<DWI> > + (plus:<DWI> > + (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)) > + (sign_extend:<DWI> (match_dup 4))) > + (sign_extend:<DWI> (match_dup 5))) > + (sign_extend:<DWI> > + (plus:DWIH > + (plus:DWIH > + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) > + (match_dup 4)) > + (match_dup 5))))) > + (set (match_dup 3) > + (plus:DWIH > + (plus:DWIH > + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) > + (match_dup 4)) > + (match_dup 5)))])] > +{ > + split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]); > +}) > + > +(define_insn_and_split "*addv<dwi>4_doubleword_1" > + [(set (reg:CCO FLAGS_REG) > + (eq:CCO > + (plus:<QWI> > + (sign_extend:<QWI> > + (match_operand:<DWI> 1 "nonimmediate_operand" "%0")) > + (match_operand:<QWI> 3 "const_scalar_int_operand" "")) > + (sign_extend:<QWI> > + (plus:<DWI> > + (match_dup 1) > + (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>"))))) > + (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro") > + (plus:<DWI> (match_dup 1) (match_dup 2)))] > + "ix86_binary_operator_ok (PLUS, <DWI>mode, operands) > + && CONST_SCALAR_INT_P (operands[2]) > + && rtx_equal_p (operands[2], operands[3])" > + "#" > + "reload_completed" > + [(parallel [(set (reg:CCC FLAGS_REG) > + (compare:CCC > + (plus:DWIH (match_dup 1) (match_dup 2)) > + (match_dup 1))) > + (set (match_dup 0) > + (plus:DWIH (match_dup 1) (match_dup 2)))]) > + (parallel [(set (reg:CCO FLAGS_REG) > + (eq:CCO > + (plus:<DWI> > + (plus:<DWI> > + (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)) > + (sign_extend:<DWI> (match_dup 4))) > + (match_dup 5)) > + (sign_extend:<DWI> > + (plus:DWIH > + (plus:DWIH > + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) > + (match_dup 4)) > + (match_dup 5))))) > + (set (match_dup 3) > + (plus:DWIH > + (plus:DWIH > + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) > + (match_dup 4)) > + (match_dup 5)))])] > +{ > + split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]); > + if (operands[2] == const0_rtx) > + { > + emit_insn (gen_addv<mode>4_1 (operands[3], operands[4], operands[5], > + operands[5])); > + DONE; > + } > +}) > + > +(define_insn "*addv<mode>4_overflow_1" > + [(set (reg:CCO FLAGS_REG) > + (eq:CCO > + (plus:<DWI> > + (plus:<DWI> > + (match_operator:<DWI> 4 "ix86_carry_flag_operator" > + [(match_operand 3 "flags_reg_operand") (const_int 0)]) > + (sign_extend:<DWI> > + (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))) > + (sign_extend:<DWI> > + (match_operand:SWI 2 "<general_sext_operand>" "rWe,m"))) > + (sign_extend:<DWI> > + (plus:SWI > + (plus:SWI > + (match_operator:SWI 5 "ix86_carry_flag_operator" > + [(match_dup 3) (const_int 0)]) > + (match_dup 1)) > + (match_dup 2))))) > + (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r") > + (plus:SWI > + (plus:SWI > + (match_op_dup 5 [(match_dup 3) (const_int 0)]) > + (match_dup 1)) > + (match_dup 2)))] > + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" > + "adc{<imodesuffix>}\t{%2, %0|%0, %2}" > + [(set_attr "type" "alu") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "*addv<mode>4_overflow_2" > + [(set (reg:CCO FLAGS_REG) > + (eq:CCO > + (plus:<DWI> > + (plus:<DWI> > + (match_operator:<DWI> 4 "ix86_carry_flag_operator" > + [(match_operand 3 "flags_reg_operand") (const_int 0)]) > + (sign_extend:<DWI> > + (match_operand:SWI 1 "nonimmediate_operand" "%0"))) > + (match_operand:<DWI> 6 "const_int_operand" "")) > + (sign_extend:<DWI> > + (plus:SWI > + (plus:SWI > + (match_operator:SWI 5 "ix86_carry_flag_operator" > + [(match_dup 3) (const_int 0)]) > + (match_dup 1)) > + (match_operand:SWI 2 "x86_64_immediate_operand" "e"))))) > + (set (match_operand:SWI 0 "nonimmediate_operand" "=rm") > + (plus:SWI > + (plus:SWI > + (match_op_dup 5 [(match_dup 3) (const_int 0)]) > + (match_dup 1)) > + (match_dup 2)))] > + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands) > + && CONST_INT_P (operands[2]) > + && INTVAL (operands[2]) == INTVAL (operands[6])" > + "adc{<imodesuffix>}\t{%2, %0|%0, %2}" > + [(set_attr "type" "alu") > + (set_attr "mode" "<MODE>") > + (set (attr "length_immediate") > + (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") > + (const_string "1") > + (const_string "4")))]) > + > (define_expand "uaddv<mode>4" > [(parallel [(set (reg:CCC FLAGS_REG) > (compare:CCC > - (plus:SWI > - (match_operand:SWI 1 "nonimmediate_operand") > - (match_operand:SWI 2 "<general_operand>")) > + (plus:SWIDWI > + (match_operand:SWIDWI 1 "nonimmediate_operand") > + (match_operand:SWIDWI 2 "<general_hilo_operand>")) > (match_dup 1))) > - (set (match_operand:SWI 0 "register_operand") > - (plus:SWI (match_dup 1) (match_dup 2)))]) > + (set (match_operand:SWIDWI 0 "register_operand") > + (plus:SWIDWI (match_dup 1) (match_dup 2)))]) > (set (pc) (if_then_else > (ltu (reg:CCC FLAGS_REG) (const_int 0)) > (label_ref (match_operand 3)) > @@ -6649,6 +6816,48 @@ (define_expand "addcarry<mode>_0" > (plus:SWI48 (match_dup 1) (match_dup 2)))])] > "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)") > > +(define_insn "*addcarry<mode>_1" > + [(set (reg:CCC FLAGS_REG) > + (compare:CCC > + (zero_extend:<DWI> > + (plus:SWI48 > + (plus:SWI48 > + (match_operator:SWI48 5 "ix86_carry_flag_operator" > + [(match_operand 3 "flags_reg_operand") (const_int 0)]) > + (match_operand:SWI48 1 "nonimmediate_operand" "%0")) > + (match_operand:SWI48 2 "x86_64_immediate_operand" "e"))) > + (plus:<DWI> > + (match_operand:<DWI> 6 "const_scalar_int_operand" "") > + (match_operator:<DWI> 4 "ix86_carry_flag_operator" > + [(match_dup 3) (const_int 0)])))) > + (set (match_operand:SWI48 0 "register_operand" "=r") > + (plus:SWI48 (plus:SWI48 (match_op_dup 5 > + [(match_dup 3) (const_int 0)]) > + (match_dup 1)) > + (match_dup 2)))] > + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands) > + && CONST_INT_P (operands[2]) > + /* Check that operands[6] is operands[2] zero extended from > + <MODE>mode to <DWI>mode. */ > + && ((<MODE>mode == SImode || INTVAL (operands[2]) >= 0) > + ? (CONST_INT_P (operands[6]) > + && UINTVAL (operands[6]) == (UINTVAL (operands[2]) > + & GET_MODE_MASK (<MODE>mode))) > + : (CONST_WIDE_INT_P (operands[6]) > + && CONST_WIDE_INT_NUNITS (operands[6]) == 2 > + && ((unsigned HOST_WIDE_INT) CONST_WIDE_INT_ELT (operands[6], 0) > + == UINTVAL (operands[2])) > + && CONST_WIDE_INT_ELT (operands[6], 1) == 0))" > + "adc{<imodesuffix>}\t{%2, %0|%0, %2}" > + [(set_attr "type" "alu") > + (set_attr "use_carry" "1") > + (set_attr "pent_pair" "pu") > + (set_attr "mode" "<MODE>") > + (set (attr "length_immediate") > + (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") > + (const_string "1") > + (const_string "4")))]) > + > (define_insn "@sub<mode>3_carry" > [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") > (minus:SWI > @@ -6885,6 +7094,54 @@ (define_insn "*addsi3_zext_cc_overflow_2 > [(set_attr "type" "alu") > (set_attr "mode" "SI")]) > > +(define_insn_and_split "*add<dwi>3_doubleword_cc_overflow_1" > + [(set (reg:CCC FLAGS_REG) > + (compare:CCC > + (plus:<DWI> > + (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0") > + (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")) > + (match_dup 1))) > + (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r") > + (plus:<DWI> (match_dup 1) (match_dup 2)))] > + "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)" > + "#" > + "reload_completed" > + [(parallel [(set (reg:CCC FLAGS_REG) > + (compare:CCC > + (plus:DWIH (match_dup 1) (match_dup 2)) > + (match_dup 1))) > + (set (match_dup 0) > + (plus:DWIH (match_dup 1) (match_dup 2)))]) > + (parallel [(set (reg:CCC FLAGS_REG) > + (compare:CCC > + (zero_extend:<DWI> > + (plus:DWIH > + (plus:DWIH > + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) > + (match_dup 4)) > + (match_dup 5))) > + (plus:<DWI> > + (match_dup 6) > + (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))))) > + (set (match_dup 3) > + (plus:DWIH > + (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) > + (match_dup 4)) > + (match_dup 5)))])] > +{ > + split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]); > + if (operands[2] == const0_rtx) > + { > + emit_insn (gen_addcarry<mode>_0 (operands[3], operands[4], > operands[5])); > + DONE; > + } > + if (CONST_INT_P (operands[5])) > + operands[6] = simplify_unary_operation (ZERO_EXTEND, <DWI>mode, > + operands[5], <MODE>mode); > + else > + operands[6] = gen_rtx_ZERO_EXTEND (<DWI>mode, operands[5]); > +}) > + > ;; x == 0 with zero flag test can be done also as x < 1U with carry flag > ;; test, where the latter is preferrable if we have some carry consuming > ;; instruction. > --- gcc/testsuite/gcc.target/i386/pr93141-1.c.jj 2020-01-03 > 20:59:02.053542989 +0100 > +++ gcc/testsuite/gcc.target/i386/pr93141-1.c 2020-01-03 21:02:33.573379636 > +0100 > @@ -0,0 +1,83 @@ > +/* PR target/93141 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -masm=att" } */ > +/* { dg-final { scan-assembler-not "cmp\[lq]\t" } } */ > +/* { dg-final { scan-assembler-times "setc\t%" 3 } } */ > +/* { dg-final { scan-assembler-times "seto\t%" 5 } } */ > +/* { dg-final { scan-assembler-times "adc\[lq]\t" 5 } } */ > + > +#ifdef __x86_64__ > +typedef unsigned __int128 U; > +typedef signed __int128 S; > +#else > +typedef unsigned long long U; > +typedef signed long long S; > +#endif > +int o; > + > +U > +foo (U x, U y) > +{ > + U z; > + o = __builtin_add_overflow (x, y, &z); > + return z; > +} > + > +U > +bar (U x) > +{ > + U z; > + o = __builtin_add_overflow (x, ((U) 0xdeadbee) << (sizeof (U) * > __CHAR_BIT__ / 2), &z); > + return z; > +} > + > +U > +baz (U x) > +{ > + U z; > + o = __builtin_add_overflow (x, (((U) 0xdeadbee) << (sizeof (U) * > __CHAR_BIT__ / 2)) > + | (U) 0xbeedead, &z); > + return z; > +} > + > +S > +qux (S x, S y) > +{ > + S z; > + o = __builtin_add_overflow (x, y, &z); > + return z; > +} > + > +S > +quux (S x) > +{ > + S z; > + o = __builtin_add_overflow (x, ((S) 0xdeadbee) << (sizeof (S) * > __CHAR_BIT__ / 2), &z); > + return z; > +} > + > +S > +corge (S x) > +{ > + S z; > + o = __builtin_add_overflow (x, (((S) 0xdeadbee) << (sizeof (S) * > __CHAR_BIT__ / 2)) > + | (S) 0xbeedead, &z); > + return z; > +} > + > +S > +grault (S x) > +{ > + S z; > + o = __builtin_add_overflow (x, -((S) 0xdeadbee) << (sizeof (S) * > __CHAR_BIT__ / 2), &z); > + return z; > +} > + > +S > +garply (S x) > +{ > + S z; > + o = __builtin_add_overflow (x, (-(((S) 0xdeadbee) << (sizeof (S) * > __CHAR_BIT__ / 2))) > + | (S) 0xbeedead, &z); > + return z; > +} > --- gcc/testsuite/gcc.dg/pr67089-6.c.jj 2015-11-25 09:57:47.986410019 +0100 > +++ gcc/testsuite/gcc.dg/pr67089-6.c 2020-01-04 00:24:48.944285746 +0100 > @@ -56,7 +56,6 @@ T (24, unsigned long long, x + y, if (d > T (25, unsigned short, 2U - x, if (r > 2U) foo (0)) > T (26, unsigned char, 2U - x, if (r <= 2U) foo (0)) > > -/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 16 "widening_mul" { > target { { i?86-*-* x86_64-*-* } && { ! ia32 } } } } } */ > +/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 16 "widening_mul" { > target { i?86-*-* x86_64-*-* } } } } */ > /* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 11 "widening_mul" { > target { { i?86-*-* x86_64-*-* } && { ! ia32 } } } } } */ > -/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 12 "widening_mul" { > target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ > /* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 9 "widening_mul" { > target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ > > Jakub