[PATCH v2] Add uaddv_optab, usubv4_optab

Richard Henderson Sun, 22 Nov 2015 02:39:08 -0800

The full round of testing from v1 turned up a couple of problems.

One of which I believe I've worked around in the i386 backend, but I believe tobe a latent problem within combine.

With the following patch, disable the add<mode>3_*_overflow_2 patterns. Thencompile c-c++-common/torture/builtin-arith-overflow-4.c with -O2 and you'll see


 t151_2add:
       testb   %dil, %dil
       leal    -1(%rdi), %eax
       jne     .L644

which is incorrect. Combine has looked through two comparisons, seen the NE inthe second comparison, and then converted a CCCmode compare to a CCZmode compare.

I wonder if the code in combine.c simplify_set which calls SELECT_CC_MODE oughtto be throwing away existing MODE_CC data. Ought we verify that the newlyselected CC mode is cc_modes_compatible with the existing? That wouldcertainly work when we begin with the "usual" fully general CCmode compare,optimizing to CCZmode, but would then reject moving between CCCmode and CCZmode.

That said, the addition of the new _overflow_2 patterns allows CSE to simplifythe expressions earlier, which avoids the problem in combine.


Ok?


r~

        * optabs.def (uaddv4_optab, usubv4_optab): New.
        * internal-fn.c (expand_addsub_overflow): Use them.
        * doc/md.texi (Standard Names): Add uaddv<m>4, usubv<m>4.

        * config/i386/i386.c (ix86_cc_mode): Extend add overflow check
        to reversed operands.
        * config/i386/i386.md (uaddv<SWI>4, usubv<SWI>4): New.
        (*add<SWI>3_cconly_overflow_1): Rename *add<SWI>3_cconly_overflow.
        (*add<SWI>3_cc_overflow_1): Rename *add<SWI>3_cc_overflow.
        (*addsi3_zext_cc_overflow_1): Rename *add3_zext_cc_overflow.
        (*add<SWI>3_cconly_overflow_2): New.
        (*add<SWI>3_cc_overflow_2): New.
        (*addsi3_zext_cc_overflow_2): New.


diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 83749d5..cc42544 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21137,7 +21137,8 @@ ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
     case LTU:                  /* CF=1 */
       /* Detect overflow checks.  They need just the carry flag.  */
       if (GET_CODE (op0) == PLUS
-         && rtx_equal_p (op1, XEXP (op0, 0)))
+         && (rtx_equal_p (op1, XEXP (op0, 0))
+             || rtx_equal_p (op1, XEXP (op0, 1))))
        return CCCmode;
       else
        return CCmode;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4c5e22a..95b598b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6156,6 +6156,24 @@
                  (const_string "4")]
              (const_string "<MODE_SIZE>")))])
 
+(define_expand "uaddv<mode>4"
+  [(parallel [(set (reg:CCC FLAGS_REG)
+                  (compare:CCC
+                    (plus:SWI
+                      (match_operand:SWI 1 "nonimmediate_operand")
+                      (match_operand:SWI 2 "<general_operand>"))
+                    (match_dup 1)))
+             (set (match_operand:SWI 0 "register_operand")
+                  (plus:SWI (match_dup 1) (match_dup 2)))])
+   (set (pc) (if_then_else
+              (ltu (reg:CCC FLAGS_REG) (const_int 0))
+              (label_ref (match_operand 3))
+              (pc)))]
+  ""
+{
+  ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
+})
+
 ;; The lea patterns for modes less than 32 bits need to be matched by
 ;; several insns converted to real lea by splitters.
 
@@ -6461,6 +6479,22 @@
                  (const_string "4")]
              (const_string "<MODE_SIZE>")))])
 
+(define_expand "usubv<mode>4"
+  [(parallel [(set (reg:CC FLAGS_REG)
+                  (compare:CC
+                    (match_operand:SWI 1 "nonimmediate_operand")
+                    (match_operand:SWI 2 "<general_operand>")))
+             (set (match_operand:SWI 0 "register_operand")
+                  (minus:SWI (match_dup 1) (match_dup 2)))])
+   (set (pc) (if_then_else
+              (ltu (reg:CC FLAGS_REG) (const_int 0))
+              (label_ref (match_operand 3))
+              (pc)))]
+  ""
+{
+  ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);
+})
+
 (define_insn "*sub<mode>_3"
   [(set (reg FLAGS_REG)
        (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0")
@@ -6611,7 +6645,7 @@
       (clobber (match_scratch:QI 2))])]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))")
 
-(define_insn "*add<mode>3_cconly_overflow"
+(define_insn "*add<mode>3_cconly_overflow_1"
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
          (plus:SWI
@@ -6624,7 +6658,20 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*add<mode>3_cc_overflow"
+(define_insn "*add<mode>3_cconly_overflow_2"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+         (plus:SWI
+           (match_operand:SWI 1 "nonimmediate_operand" "%0")
+           (match_operand:SWI 2 "<general_operand>" "<g>"))
+         (match_dup 2)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*add<mode>3_cc_overflow_1"
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
            (plus:SWI
@@ -6638,7 +6685,21 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*addsi3_zext_cc_overflow"
+(define_insn "*add<mode>3_cc_overflow_2"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+           (plus:SWI
+               (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
+               (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))
+           (match_dup 2)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+       (plus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*addsi3_zext_cc_overflow_1"
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
          (plus:SI
@@ -6652,6 +6713,20 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
+(define_insn "*addsi3_zext_cc_overflow_2"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+         (plus:SI
+           (match_operand:SI 1 "nonimmediate_operand" "%0")
+           (match_operand:SI 2 "x86_64_general_operand" "rme"))
+         (match_dup 2)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+       (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+  "add{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "<plusminus_insn>xf3"
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 79f3cf1..de1b58a 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4913,12 +4913,17 @@ signed integer addition with overflow checking.
 @item @samp{subv@var{m}4}, @samp{mulv@var{m}4}
 Similar, for other signed arithmetic operations.
 
-@cindex @code{umulv@var{m}4} instruction pattern
-@item @samp{umulv@var{m}4}
-Like @code{mulv@var{m}4} but for unsigned multiplication.  That is to
-say, the operation is the same as signed multiplication but the jump
+@cindex @code{uaddv@var{m}4} instruction pattern
+@item @samp{uaddv@var{m}4}
+Like @code{addv@var{m}4} but for unsigned addition.  That is to
+say, the operation is the same as signed addition but the jump
 is taken only on unsigned overflow.
 
+@cindex @code{usubv@var{m}4} instruction pattern
+@cindex @code{umulv@var{m}4} instruction pattern
+@item @samp{usubv@var{m}4}, @samp{umulv@var{m}4}
+Similar, for other unsigned arithmetic operations.
+
 @cindex @code{addptr@var{m}3} instruction pattern
 @item @samp{addptr@var{m}3}
 Like @code{add@var{m}3} but is guaranteed to only be used for address
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index bc77bdc..b15657f 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -546,6 +546,33 @@ expand_addsub_overflow (location_t loc, tree_code code, 
tree lhs,
   /* u1 +- u2 -> ur  */
   if (uns0_p && uns1_p && unsr_p)
     {
+      insn_code icode = optab_handler (code == PLUS_EXPR ? uaddv4_optab
+                                       : usubv4_optab, mode);
+      if (icode != CODE_FOR_nothing)
+       {
+         struct expand_operand ops[4];
+         rtx_insn *last = get_last_insn ();
+
+         res = gen_reg_rtx (mode);
+         create_output_operand (&ops[0], res, mode);
+         create_input_operand (&ops[1], op0, mode);
+         create_input_operand (&ops[2], op1, mode);
+         create_fixed_operand (&ops[3], do_error);
+         if (maybe_expand_insn (icode, 4, ops))
+           {
+             last = get_last_insn ();
+             if (profile_status_for_fn (cfun) != PROFILE_ABSENT
+                 && JUMP_P (last)
+                 && any_condjump_p (last)
+                 && !find_reg_note (last, REG_BR_PROB, 0))
+               add_int_reg_note (last, REG_BR_PROB, PROB_VERY_UNLIKELY);
+             emit_jump (done_label);
+             goto do_error_label;
+           }
+
+         delete_insns_since (last);
+       }
+
       /* Compute the operation.  On RTL level, the addition is always
         unsigned.  */
       res = expand_binop (mode, code == PLUS_EXPR ? add_optab : sub_optab,
@@ -737,92 +764,88 @@ expand_addsub_overflow (location_t loc, tree_code code, 
tree lhs,
   gcc_assert (!uns0_p && !uns1_p && !unsr_p);
 
   /* s1 +- s2 -> sr  */
- do_signed: ;
-  enum insn_code icode;
-  icode = optab_handler (code == PLUS_EXPR ? addv4_optab : subv4_optab, mode);
-  if (icode != CODE_FOR_nothing)
-    {
-      struct expand_operand ops[4];
-      rtx_insn *last = get_last_insn ();
-
-      res = gen_reg_rtx (mode);
-      create_output_operand (&ops[0], res, mode);
-      create_input_operand (&ops[1], op0, mode);
-      create_input_operand (&ops[2], op1, mode);
-      create_fixed_operand (&ops[3], do_error);
-      if (maybe_expand_insn (icode, 4, ops))
-       {
-         last = get_last_insn ();
-         if (profile_status_for_fn (cfun) != PROFILE_ABSENT
-             && JUMP_P (last)
-             && any_condjump_p (last)
-             && !find_reg_note (last, REG_BR_PROB, 0))
-           add_int_reg_note (last, REG_BR_PROB, PROB_VERY_UNLIKELY);
-         emit_jump (done_label);
-        }
-      else
-       {
-         delete_insns_since (last);
-         icode = CODE_FOR_nothing;
-       }
-    }
-
-  if (icode == CODE_FOR_nothing)
-    {
-      rtx_code_label *sub_check = gen_label_rtx ();
-      int pos_neg = 3;
-
-      /* Compute the operation.  On RTL level, the addition is always
-        unsigned.  */
-      res = expand_binop (mode, code == PLUS_EXPR ? add_optab : sub_optab,
-                         op0, op1, NULL_RTX, false, OPTAB_LIB_WIDEN);
-
-      /* If we can prove one of the arguments (for MINUS_EXPR only
-        the second operand, as subtraction is not commutative) is always
-        non-negative or always negative, we can do just one comparison
-        and conditional jump instead of 2 at runtime, 3 present in the
-        emitted code.  If one of the arguments is CONST_INT, all we
-        need is to make sure it is op1, then the first
-        do_compare_rtx_and_jump will be just folded.  Otherwise try
-        to use range info if available.  */
-      if (code == PLUS_EXPR && CONST_INT_P (op0))
-       std::swap (op0, op1);
-      else if (CONST_INT_P (op1))
-       ;
-      else if (code == PLUS_EXPR && TREE_CODE (arg0) == SSA_NAME)
-       {
-         pos_neg = get_range_pos_neg (arg0);
-         if (pos_neg != 3)
-           std::swap (op0, op1);
-       }
-      if (pos_neg == 3 && !CONST_INT_P (op1) && TREE_CODE (arg1) == SSA_NAME)
-       pos_neg = get_range_pos_neg (arg1);
-
-      /* If the op1 is negative, we have to use a different check.  */
-      if (pos_neg == 3)
-       do_compare_rtx_and_jump (op1, const0_rtx, LT, false, mode, NULL_RTX,
-                                NULL, sub_check, PROB_EVEN);
-
-      /* Compare the result of the operation with one of the operands.  */
-      if (pos_neg & 1)
-       do_compare_rtx_and_jump (res, op0, code == PLUS_EXPR ? GE : LE,
-                                false, mode, NULL_RTX, NULL, done_label,
-                                PROB_VERY_LIKELY);
-
-      /* If we get here, we have to print the error.  */
-      if (pos_neg == 3)
-       {
-         emit_jump (do_error);
+ do_signed:
+  {
+    insn_code icode = optab_handler (code == PLUS_EXPR ? addv4_optab
+                                    : subv4_optab, mode);
+    if (icode != CODE_FOR_nothing)
+      {
+       struct expand_operand ops[4];
+       rtx_insn *last = get_last_insn ();
+
+       res = gen_reg_rtx (mode);
+       create_output_operand (&ops[0], res, mode);
+       create_input_operand (&ops[1], op0, mode);
+       create_input_operand (&ops[2], op1, mode);
+       create_fixed_operand (&ops[3], do_error);
+       if (maybe_expand_insn (icode, 4, ops))
+         {
+           last = get_last_insn ();
+           if (profile_status_for_fn (cfun) != PROFILE_ABSENT
+               && JUMP_P (last)
+               && any_condjump_p (last)
+               && !find_reg_note (last, REG_BR_PROB, 0))
+             add_int_reg_note (last, REG_BR_PROB, PROB_VERY_UNLIKELY);
+           emit_jump (done_label);
+           goto do_error_label;
+         }
+
+       delete_insns_since (last);
+      }
+
+    rtx_code_label *sub_check = gen_label_rtx ();
+    int pos_neg = 3;
+
+    /* Compute the operation.  On RTL level, the addition is always
+       unsigned.  */
+    res = expand_binop (mode, code == PLUS_EXPR ? add_optab : sub_optab,
+                       op0, op1, NULL_RTX, false, OPTAB_LIB_WIDEN);
+
+    /* If we can prove one of the arguments (for MINUS_EXPR only
+       the second operand, as subtraction is not commutative) is always
+       non-negative or always negative, we can do just one comparison
+       and conditional jump instead of 2 at runtime, 3 present in the
+       emitted code.  If one of the arguments is CONST_INT, all we
+       need is to make sure it is op1, then the first
+       do_compare_rtx_and_jump will be just folded.  Otherwise try
+       to use range info if available.  */
+    if (code == PLUS_EXPR && CONST_INT_P (op0))
+      std::swap (op0, op1);
+    else if (CONST_INT_P (op1))
+      ;
+    else if (code == PLUS_EXPR && TREE_CODE (arg0) == SSA_NAME)
+      {
+        pos_neg = get_range_pos_neg (arg0);
+        if (pos_neg != 3)
+         std::swap (op0, op1);
+      }
+    if (pos_neg == 3 && !CONST_INT_P (op1) && TREE_CODE (arg1) == SSA_NAME)
+      pos_neg = get_range_pos_neg (arg1);
+
+    /* If the op1 is negative, we have to use a different check.  */
+    if (pos_neg == 3)
+      do_compare_rtx_and_jump (op1, const0_rtx, LT, false, mode, NULL_RTX,
+                              NULL, sub_check, PROB_EVEN);
+
+    /* Compare the result of the operation with one of the operands.  */
+    if (pos_neg & 1)
+      do_compare_rtx_and_jump (res, op0, code == PLUS_EXPR ? GE : LE,
+                              false, mode, NULL_RTX, NULL, done_label,
+                              PROB_VERY_LIKELY);
 
-         emit_label (sub_check);
-       }
+    /* If we get here, we have to print the error.  */
+    if (pos_neg == 3)
+      {
+       emit_jump (do_error);
+       emit_label (sub_check);
+      }
 
-      /* We have k = a + b for b < 0 here.  k <= a must hold.  */
-      if (pos_neg & 2)
-       do_compare_rtx_and_jump (res, op0, code == PLUS_EXPR ? LE : GE,
-                                false, mode, NULL_RTX, NULL, done_label,
-                                PROB_VERY_LIKELY);
-    }
+    /* We have k = a + b for b < 0 here.  k <= a must hold.  */
+    if (pos_neg & 2)
+      do_compare_rtx_and_jump (res, op0, code == PLUS_EXPR ? LE : GE,
+                              false, mode, NULL_RTX, NULL, done_label,
+                              PROB_VERY_LIKELY);
+  }
 
  do_error_label:
   emit_label (do_error);
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 0ca2333..c141a3c 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -197,6 +197,8 @@ OPTAB_D (ctrap_optab, "ctrap$a4")
 OPTAB_D (addv4_optab, "addv$I$a4")
 OPTAB_D (subv4_optab, "subv$I$a4")
 OPTAB_D (mulv4_optab, "mulv$I$a4")
+OPTAB_D (uaddv4_optab, "uaddv$I$a4")
+OPTAB_D (usubv4_optab, "usubv$I$a4")
 OPTAB_D (umulv4_optab, "umulv$I$a4")
 OPTAB_D (negv3_optab, "negv$I$a3")
 OPTAB_D (addptr3_optab, "addptr$a3")

[PATCH v2] Add uaddv_optab, usubv4_optab

Reply via email to