[PATCH v4 12/12] aarch64: Implement TImode comparisons

Richard Henderson via Gcc-patches Thu, 09 Apr 2020 20:50:27 -0700

        * config/aarch64/aarch64-modes.def (CC_NV): New.
        * config/aarch64/aarch64.c (aarch64_gen_compare_reg): Expand
        all of the comparisons for TImode, not just NE.
        (aarch64_select_cc_mode): Recognize <su>cmp<GPI>_carryin.
        (aarch64_get_condition_code_1): Handle CC_NVmode.
        * config/aarch64/aarch64.md (cbranchti4, cstoreti4): New.
        (ccmp_iorne<GPI>): New.
        (<su_optab>cmp<GPI>_carryin): New.
        (*<su_optab>cmp<GPI>_carryin): New.
        (*<su_optab>cmp<GPI>_carryin_z1): New.
        (*<su_optab>cmp<GPI>_carryin_z2): New.
        (*cmp<GPI>_carryin_m2, *ucmp<GPI>_carryin_m2): New.
        * config/aarch64/iterators.md (CC_EXTEND): New.
        * config/aarch64/predicates.md (const_dword_umax): New.
---
 gcc/config/aarch64/aarch64.c         | 164 ++++++++++++++++++++++++---
 gcc/config/aarch64/aarch64-modes.def |   1 +
 gcc/config/aarch64/aarch64.md        | 113 ++++++++++++++++++
 gcc/config/aarch64/iterators.md      |   3 +
 gcc/config/aarch64/predicates.md     |   9 ++
 5 files changed, 277 insertions(+), 13 deletions(-)


diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 837ee6a5e37..6c825b341a0 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2731,32 +2731,143 @@ rtx
 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 {
   machine_mode cmp_mode = GET_MODE (x);
-  machine_mode cc_mode;
   rtx cc_reg;
 
   if (cmp_mode == TImode)
     {
-      gcc_assert (code == NE);
+      rtx x_lo, x_hi, y_lo, y_hi, tmp;
+      struct expand_operand ops[2];
 
-      cc_mode = CCmode;
-      cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+      x_lo = operand_subword (x, 0, 0, TImode);
+      x_hi = operand_subword (x, 1, 0, TImode);
 
-      rtx x_lo = operand_subword (x, 0, 0, TImode);
-      rtx y_lo = operand_subword (y, 0, 0, TImode);
-      emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
+      if (CONST_SCALAR_INT_P (y))
+       {
+         wide_int y_wide = rtx_mode_t (y, TImode);
 
-      rtx x_hi = operand_subword (x, 1, 0, TImode);
-      rtx y_hi = operand_subword (y, 1, 0, TImode);
-      emit_insn (gen_ccmpccdi (cc_reg, cc_reg, x_hi, y_hi,
-                              gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
-                              GEN_INT (AARCH64_EQ)));
+         switch (code)
+           {
+           case EQ:
+           case NE:
+             /* For equality, IOR the two halves together.  If this gets
+                used for a branch, we expect this to fold to cbz/cbnz;
+                otherwise it's no larger than the cmp+ccmp below.  Beware
+                of the compare-and-swap post-reload split and use ccmp.  */
+             if (y_wide == 0 && can_create_pseudo_p ())
+               {
+                 tmp = gen_reg_rtx (DImode);
+                 emit_insn (gen_iordi3 (tmp, x_hi, x_lo));
+                 emit_insn (gen_cmpdi (tmp, const0_rtx));
+                 cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+                 goto done;
+               }
+             break;
+
+           case LE:
+           case GT:
+             /* Add 1 to Y to convert to LT/GE, which avoids the swap and
+                keeps the constant operand.  */
+             if (wi::cmps(y_wide, wi::max_value (TImode, SIGNED)) < 0)
+               {
+                 y = immed_wide_int_const (wi::add (y_wide, 1), TImode);
+                 code = (code == LE ? LT : GE);
+               }
+             break;
+
+           case LEU:
+           case GTU:
+             /* Add 1 to Y to convert to LT/GE, which avoids the swap and
+                keeps the constant operand.  */
+             if (wi::cmpu(y_wide, wi::max_value (TImode, UNSIGNED)) < 0)
+               {
+                 y = immed_wide_int_const (wi::add (y_wide, 1), TImode);
+                 code = (code == LEU ? LTU : GEU);
+               }
+             break;
+
+           default:
+             break;
+           }
+       }
+
+      y_lo = simplify_gen_subreg (DImode, y, TImode,
+                                 subreg_lowpart_offset (DImode, TImode));
+      y_hi = simplify_gen_subreg (DImode, y, TImode,
+                                 subreg_highpart_offset (DImode, TImode));
+
+      switch (code)
+       {
+       case LEU:
+       case GTU:
+       case LE:
+       case GT:
+         std::swap (x_lo, y_lo);
+         std::swap (x_hi, y_hi);
+         code = swap_condition (code);
+         break;
+
+       case LTU:
+       case GEU:
+       case LT:
+       case GE:
+         /* If the low word of y is 0, then this is simply a normal
+            compare of the upper words.  */
+         if (y_lo == const0_rtx)
+           {
+             if (!aarch64_plus_operand (y_hi, DImode))
+               y_hi = force_reg (DImode, y_hi);
+             return aarch64_gen_compare_reg (code, x_hi, y_hi);
+           }
+         break;
+
+       default:
+         break;
+       }
+
+      /* Emit cmpdi, forcing operands into registers as required.  */
+      create_input_operand (&ops[0], x_lo, DImode);
+      create_input_operand (&ops[1], y_lo, DImode);
+      expand_insn (CODE_FOR_cmpdi, 2, ops);
+
+      switch (code)
+       {
+       case EQ:
+       case NE:
+         /* For NE, (x_lo != y_lo) || (x_hi != y_hi).  */
+         create_input_operand (&ops[0], x_hi, DImode);
+         create_input_operand (&ops[1], y_hi, DImode);
+         expand_insn (CODE_FOR_ccmp_iornedi, 2, ops);
+         cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+         break;
+
+       case LTU:
+       case GEU:
+         create_input_operand (&ops[0], x_hi, DImode);
+         create_input_operand (&ops[1], y_hi, DImode);
+         expand_insn (CODE_FOR_ucmpdi_carryin, 2, ops);
+         cc_reg = gen_rtx_REG (CC_NOTCmode, CC_REGNUM);
+         break;
+
+       case LT:
+       case GE:
+         create_input_operand (&ops[0], x_hi, DImode);
+         create_input_operand (&ops[1], y_hi, DImode);
+         expand_insn (CODE_FOR_cmpdi_carryin, 2, ops);
+         cc_reg = gen_rtx_REG (CC_NVmode, CC_REGNUM);
+         break;
+
+       default:
+         gcc_unreachable ();
+       }
     }
   else
     {
-      cc_mode = SELECT_CC_MODE (code, x, y);
+      machine_mode cc_mode = SELECT_CC_MODE (code, x, y);
       cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
       emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
     }
+
+ done:
   return gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx);
 }
 
@@ -9551,6 +9662,24 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
       && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
     return CC_Vmode;
 
+  /* A test for signed GE/LT comparison with borrow.  */
+  if ((mode_x == DImode || mode_x == TImode)
+      && (code == GE || code == LT)
+      && (code_x == SIGN_EXTEND || x == const0_rtx)
+      && ((GET_CODE (y) == PLUS
+          && aarch64_borrow_operation (XEXP (y, 0), mode_x))
+         || aarch64_borrow_operation (y, mode_x)))
+    return CC_NVmode;
+
+  /* A test for unsigned GEU/LTU comparison with borrow.  */
+  if ((mode_x == DImode || mode_x == TImode)
+      && (code == GEU || code == LTU)
+      && (code_x == ZERO_EXTEND || x == const0_rtx)
+      && ((GET_CODE (y) == PLUS
+          && aarch64_borrow_operation (XEXP (y, 0), mode_x))
+         || aarch64_borrow_operation (y, mode_x)))
+    return CC_NOTCmode;
+
   /* For everything else, return CCmode.  */
   return CCmode;
 }
@@ -9690,6 +9819,15 @@ aarch64_get_condition_code_1 (machine_mode mode, enum 
rtx_code comp_code)
        }
       break;
 
+    case E_CC_NVmode:
+      switch (comp_code)
+       {
+       case GE: return AARCH64_GE;
+       case LT: return AARCH64_LT;
+       default: return -1;
+       }
+      break;
+
     default:
       return -1;
     }
diff --git a/gcc/config/aarch64/aarch64-modes.def 
b/gcc/config/aarch64/aarch64-modes.def
index 181b7b30dcd..beb5919ab01 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -41,6 +41,7 @@ CC_MODE (CC_C);     /* C represents unsigned overflow of a 
simple addition.  */
 CC_MODE (CC_NOTC);  /* !C represents unsigned overflow of subtraction,
                        as well as our representation of add-with-carry.  */
 CC_MODE (CC_V);     /* Only V bit of condition flags is valid.  */
+CC_MODE (CC_NV);    /* N and V bits set for signed GE/LT comparison.  */
 
 /* Half-precision floating point for __fp16.  */
 FLOAT_MODE (HF, 2, 0);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2b5a6eb510d..e62f79ed6f1 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -471,6 +471,20 @@
   operands[2] = const0_rtx;
 })
 
+(define_expand "cbranchti4"
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+                           [(match_operand:TI 1 "register_operand")
+                            (match_operand:TI 2 "aarch64_reg_or_imm")])
+                          (label_ref (match_operand 3 "" ""))
+                          (pc)))]
+  ""
+{
+  operands[0] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+                                        operands[2]);
+  operands[1] = XEXP (operands[0], 0);
+  operands[2] = const0_rtx;
+})
+
 (define_expand "cbranch<mode>4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
                            [(match_operand:GPF 1 "register_operand")
@@ -569,6 +583,25 @@
   [(set_attr "type" "fccmp<s>")]
 )
 
+;; This specialization has the advantage of being able to swap operands.
+;; Use CC_NZ because SELECT_CC_MODE uses that for comparisons against 0.
+(define_insn "ccmp_iorne<mode>"
+  [(set (reg:CC_NZ CC_REGNUM)
+       (compare:CC_NZ
+         (ior:SI
+           (ne:SI (reg:CC CC_REGNUM)
+                  (const_int 0))
+           (ne:SI (match_operand:GPI 0 "register_operand" "%r,r,r")
+                  (match_operand:GPI 1 "aarch64_ccmp_operand" "r,Uss,Usn")))
+         (const_int 0)))]
+  ""
+  "@
+   ccmp\\t%<w>0, %<w>1, 0, eq
+   ccmp\\t%<w>0, %1, 0, eq
+   ccmn\\t%<w>0, #%n1, 0, eq"
+  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
+)
+
 ;; Expansion of signed mod by a power of 2 using CSNEG.
 ;; For x0 % n where n is a power of 2 produce:
 ;; negs   x1, x0
@@ -3364,6 +3397,72 @@
   [(set_attr "type" "adc_reg")]
 )
 
+(define_expand "<su_optab>cmp<mode>_carryin"
+  [(set (reg:<CC_EXTEND> CC_REGNUM)
+       (compare:<CC_EXTEND>
+         (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand"))
+         (plus:<DWI>
+           (geu:<DWI> (reg:CC_C CC_REGNUM) (const_int 0))
+           (ANY_EXTEND:<DWI> (match_operand:GPI 1 "register_operand")))))]
+  ""
+)
+
+(define_insn "*<su_optab>cmp<mode>_carryin"
+  [(set (reg:<CC_EXTEND> CC_REGNUM)
+       (compare:<CC_EXTEND>
+         (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+         (plus:<DWI>
+           (match_operand:<DWI> 2 "aarch64_borrow_operation" "")
+           (ANY_EXTEND:<DWI> (match_operand:GPI 1 "register_operand" "r")))))]
+  ""
+  "sbcs\\t<w>zr, %<w>0, %<w>1"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*<su_optab>cmp<mode>_carryin_z1"
+  [(set (reg:<CC_EXTEND> CC_REGNUM)
+       (compare:<CC_EXTEND>
+         (const_int 0)
+         (plus:<DWI>
+           (match_operand:<DWI> 1 "aarch64_borrow_operation" "")
+           (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r")))))]
+  ""
+  "sbcs\\t<w>zr, <w>zr, %<w>0"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*<su_optab>cmp<mode>_carryin_z2"
+  [(set (reg:<CC_EXTEND> CC_REGNUM)
+       (compare:<CC_EXTEND>
+         (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+         (match_operand:<DWI> 1 "aarch64_borrow_operation" "")))]
+  ""
+  "sbcs\\t<w>zr, %<w>0, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*cmp<mode>_carryin_m2"
+  [(set (reg:<CC_EXTEND> CC_REGNUM)
+       (compare:<CC_EXTEND>
+         (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+         (neg:<DWI> (match_operand:<DWI> 1 "aarch64_carry_operation" ""))))]
+  ""
+  "adcs\\t<w>zr, %<w>0, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*ucmp<mode>_carryin_m2"
+  [(set (reg:<CC_EXTEND> CC_REGNUM)
+       (compare:<CC_EXTEND>
+         (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+         (plus:<DWI>
+           (match_operand:<DWI> 1 "aarch64_borrow_operation" "")
+           (match_operand:<DWI> 2 "const_dword_umax" ""))))]
+  ""
+  "adcs\\t<w>zr, %<w>0, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
 (define_expand "usub<GPI:mode>3_carryinC"
   [(parallel
      [(set (reg:CC_NOTC CC_REGNUM)
@@ -3985,6 +4084,20 @@
   operands[3] = const0_rtx;
 })
 
+(define_expand "cstoreti4"
+  [(set (match_operand:SI 0 "register_operand")
+       (match_operator:SI 1 "aarch64_comparison_operator"
+        [(match_operand:TI 2 "register_operand")
+         (match_operand:TI 3 "aarch64_reg_or_imm")]))]
+  ""
+{
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+                                        operands[3]);
+  PUT_MODE (operands[1], SImode);
+  operands[2] = XEXP (operands[1], 0);
+  operands[3] = const0_rtx;
+})
+
 (define_expand "cstorecc4"
   [(set (match_operand:SI 0 "register_operand")
        (match_operator 1 "aarch64_comparison_operator_mode"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 8e434389e59..f6f2e9cefd5 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1907,6 +1907,9 @@
 (define_code_attr fix_trunc_optab [(fix "fix_trunc")
                                   (unsigned_fix "fixuns_trunc")])
 
+;; For double-word comparisons
+(define_code_attr CC_EXTEND [(sign_extend "CC_NV") (zero_extend "CC_NOTC")])
+
 ;; Optab prefix for sign/zero-extending operations
 (define_code_attr su_optab [(sign_extend "") (zero_extend "u")
                            (div "") (udiv "u")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index e3572d2f60d..93d068cc69c 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -55,6 +55,15 @@
   return rtx_mode_t (op, mode) == (wi::shwi (1, mode) << bits);
 })
 
+;; True for (1 << (GET_MODE_BITSIZE (mode) / 2)) - 1
+;; I.e UINT_MAX for a given mode, in the double-word mode.
+(define_predicate "const_dword_umax"
+  (match_code "const_int,const_wide_int")
+{
+  unsigned bits = GET_MODE_BITSIZE (mode).to_constant () / 2;
+  return rtx_mode_t (op, mode) == wi::sub(wi::shwi (1, mode) << bits, 1);
+})
+
 (define_predicate "subreg_lowpart_operator"
   (ior (match_code "truncate")
        (and (match_code "subreg")
-- 
2.20.1

[PATCH v4 12/12] aarch64: Implement TImode comparisons

Reply via email to