* config/aarch64/aarch64-modes.def (CC_NV): New. * config/aarch64/aarch64.c (aarch64_gen_compare_reg): Expand all of the comparisons for TImode, not just NE. (aarch64_select_cc_mode): Recognize <su>cmp<GPI>_carryin. (aarch64_get_condition_code_1): Handle CC_NVmode. * config/aarch64/aarch64.md (cbranchti4, cstoreti4): New. (ccmp_iorne<GPI>): New. (<su_optab>cmp<GPI>_carryin): New. (*<su_optab>cmp<GPI>_carryin): New. (*<su_optab>cmp<GPI>_carryin_z1): New. (*<su_optab>cmp<GPI>_carryin_z2): New. (*cmp<GPI>_carryin_m2, *ucmp<GPI>_carryin_m2): New. * config/aarch64/iterators.md (CC_EXTEND): New. * config/aarch64/predicates.md (const_dword_umax): New. --- gcc/config/aarch64/aarch64.c | 164 ++++++++++++++++++++++++--- gcc/config/aarch64/aarch64-modes.def | 1 + gcc/config/aarch64/aarch64.md | 113 ++++++++++++++++++ gcc/config/aarch64/iterators.md | 3 + gcc/config/aarch64/predicates.md | 9 ++ 5 files changed, 277 insertions(+), 13 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 837ee6a5e37..6c825b341a0 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2731,32 +2731,143 @@ rtx aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y) { machine_mode cmp_mode = GET_MODE (x); - machine_mode cc_mode; rtx cc_reg; if (cmp_mode == TImode) { - gcc_assert (code == NE); + rtx x_lo, x_hi, y_lo, y_hi, tmp; + struct expand_operand ops[2]; - cc_mode = CCmode; - cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); + x_lo = operand_subword (x, 0, 0, TImode); + x_hi = operand_subword (x, 1, 0, TImode); - rtx x_lo = operand_subword (x, 0, 0, TImode); - rtx y_lo = operand_subword (y, 0, 0, TImode); - emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo)); + if (CONST_SCALAR_INT_P (y)) + { + wide_int y_wide = rtx_mode_t (y, TImode); - rtx x_hi = operand_subword (x, 1, 0, TImode); - rtx y_hi = operand_subword (y, 1, 0, TImode); - emit_insn (gen_ccmpccdi (cc_reg, cc_reg, x_hi, y_hi, - gen_rtx_EQ (cc_mode, cc_reg, const0_rtx), - GEN_INT (AARCH64_EQ))); + switch (code) + { + case EQ: + case NE: + /* For equality, IOR the two halves together. If this gets + used for a branch, we expect this to fold to cbz/cbnz; + otherwise it's no larger than the cmp+ccmp below. Beware + of the compare-and-swap post-reload split and use ccmp. */ + if (y_wide == 0 && can_create_pseudo_p ()) + { + tmp = gen_reg_rtx (DImode); + emit_insn (gen_iordi3 (tmp, x_hi, x_lo)); + emit_insn (gen_cmpdi (tmp, const0_rtx)); + cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); + goto done; + } + break; + + case LE: + case GT: + /* Add 1 to Y to convert to LT/GE, which avoids the swap and + keeps the constant operand. */ + if (wi::cmps(y_wide, wi::max_value (TImode, SIGNED)) < 0) + { + y = immed_wide_int_const (wi::add (y_wide, 1), TImode); + code = (code == LE ? LT : GE); + } + break; + + case LEU: + case GTU: + /* Add 1 to Y to convert to LT/GE, which avoids the swap and + keeps the constant operand. */ + if (wi::cmpu(y_wide, wi::max_value (TImode, UNSIGNED)) < 0) + { + y = immed_wide_int_const (wi::add (y_wide, 1), TImode); + code = (code == LEU ? LTU : GEU); + } + break; + + default: + break; + } + } + + y_lo = simplify_gen_subreg (DImode, y, TImode, + subreg_lowpart_offset (DImode, TImode)); + y_hi = simplify_gen_subreg (DImode, y, TImode, + subreg_highpart_offset (DImode, TImode)); + + switch (code) + { + case LEU: + case GTU: + case LE: + case GT: + std::swap (x_lo, y_lo); + std::swap (x_hi, y_hi); + code = swap_condition (code); + break; + + case LTU: + case GEU: + case LT: + case GE: + /* If the low word of y is 0, then this is simply a normal + compare of the upper words. */ + if (y_lo == const0_rtx) + { + if (!aarch64_plus_operand (y_hi, DImode)) + y_hi = force_reg (DImode, y_hi); + return aarch64_gen_compare_reg (code, x_hi, y_hi); + } + break; + + default: + break; + } + + /* Emit cmpdi, forcing operands into registers as required. */ + create_input_operand (&ops[0], x_lo, DImode); + create_input_operand (&ops[1], y_lo, DImode); + expand_insn (CODE_FOR_cmpdi, 2, ops); + + switch (code) + { + case EQ: + case NE: + /* For NE, (x_lo != y_lo) || (x_hi != y_hi). */ + create_input_operand (&ops[0], x_hi, DImode); + create_input_operand (&ops[1], y_hi, DImode); + expand_insn (CODE_FOR_ccmp_iornedi, 2, ops); + cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); + break; + + case LTU: + case GEU: + create_input_operand (&ops[0], x_hi, DImode); + create_input_operand (&ops[1], y_hi, DImode); + expand_insn (CODE_FOR_ucmpdi_carryin, 2, ops); + cc_reg = gen_rtx_REG (CC_NOTCmode, CC_REGNUM); + break; + + case LT: + case GE: + create_input_operand (&ops[0], x_hi, DImode); + create_input_operand (&ops[1], y_hi, DImode); + expand_insn (CODE_FOR_cmpdi_carryin, 2, ops); + cc_reg = gen_rtx_REG (CC_NVmode, CC_REGNUM); + break; + + default: + gcc_unreachable (); + } } else { - cc_mode = SELECT_CC_MODE (code, x, y); + machine_mode cc_mode = SELECT_CC_MODE (code, x, y); cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y)); } + + done: return gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx); } @@ -9551,6 +9662,24 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) && GET_CODE (XEXP (y, 0)) == GET_CODE (x)) return CC_Vmode; + /* A test for signed GE/LT comparison with borrow. */ + if ((mode_x == DImode || mode_x == TImode) + && (code == GE || code == LT) + && (code_x == SIGN_EXTEND || x == const0_rtx) + && ((GET_CODE (y) == PLUS + && aarch64_borrow_operation (XEXP (y, 0), mode_x)) + || aarch64_borrow_operation (y, mode_x))) + return CC_NVmode; + + /* A test for unsigned GEU/LTU comparison with borrow. */ + if ((mode_x == DImode || mode_x == TImode) + && (code == GEU || code == LTU) + && (code_x == ZERO_EXTEND || x == const0_rtx) + && ((GET_CODE (y) == PLUS + && aarch64_borrow_operation (XEXP (y, 0), mode_x)) + || aarch64_borrow_operation (y, mode_x))) + return CC_NOTCmode; + /* For everything else, return CCmode. */ return CCmode; } @@ -9690,6 +9819,15 @@ aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code) } break; + case E_CC_NVmode: + switch (comp_code) + { + case GE: return AARCH64_GE; + case LT: return AARCH64_LT; + default: return -1; + } + break; + default: return -1; } diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def index 181b7b30dcd..beb5919ab01 100644 --- a/gcc/config/aarch64/aarch64-modes.def +++ b/gcc/config/aarch64/aarch64-modes.def @@ -41,6 +41,7 @@ CC_MODE (CC_C); /* C represents unsigned overflow of a simple addition. */ CC_MODE (CC_NOTC); /* !C represents unsigned overflow of subtraction, as well as our representation of add-with-carry. */ CC_MODE (CC_V); /* Only V bit of condition flags is valid. */ +CC_MODE (CC_NV); /* N and V bits set for signed GE/LT comparison. */ /* Half-precision floating point for __fp16. */ FLOAT_MODE (HF, 2, 0); diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 2b5a6eb510d..e62f79ed6f1 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -471,6 +471,20 @@ operands[2] = const0_rtx; }) +(define_expand "cbranchti4" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand:TI 1 "register_operand") + (match_operand:TI 2 "aarch64_reg_or_imm")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + operands[0] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1], + operands[2]); + operands[1] = XEXP (operands[0], 0); + operands[2] = const0_rtx; +}) + (define_expand "cbranch<mode>4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand:GPF 1 "register_operand") @@ -569,6 +583,25 @@ [(set_attr "type" "fccmp<s>")] ) +;; This specialization has the advantage of being able to swap operands. +;; Use CC_NZ because SELECT_CC_MODE uses that for comparisons against 0. +(define_insn "ccmp_iorne<mode>" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (ior:SI + (ne:SI (reg:CC CC_REGNUM) + (const_int 0)) + (ne:SI (match_operand:GPI 0 "register_operand" "%r,r,r") + (match_operand:GPI 1 "aarch64_ccmp_operand" "r,Uss,Usn"))) + (const_int 0)))] + "" + "@ + ccmp\\t%<w>0, %<w>1, 0, eq + ccmp\\t%<w>0, %1, 0, eq + ccmn\\t%<w>0, #%n1, 0, eq" + [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] +) + ;; Expansion of signed mod by a power of 2 using CSNEG. ;; For x0 % n where n is a power of 2 produce: ;; negs x1, x0 @@ -3364,6 +3397,72 @@ [(set_attr "type" "adc_reg")] ) +(define_expand "<su_optab>cmp<mode>_carryin" + [(set (reg:<CC_EXTEND> CC_REGNUM) + (compare:<CC_EXTEND> + (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand")) + (plus:<DWI> + (geu:<DWI> (reg:CC_C CC_REGNUM) (const_int 0)) + (ANY_EXTEND:<DWI> (match_operand:GPI 1 "register_operand")))))] + "" +) + +(define_insn "*<su_optab>cmp<mode>_carryin" + [(set (reg:<CC_EXTEND> CC_REGNUM) + (compare:<CC_EXTEND> + (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r")) + (plus:<DWI> + (match_operand:<DWI> 2 "aarch64_borrow_operation" "") + (ANY_EXTEND:<DWI> (match_operand:GPI 1 "register_operand" "r")))))] + "" + "sbcs\\t<w>zr, %<w>0, %<w>1" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*<su_optab>cmp<mode>_carryin_z1" + [(set (reg:<CC_EXTEND> CC_REGNUM) + (compare:<CC_EXTEND> + (const_int 0) + (plus:<DWI> + (match_operand:<DWI> 1 "aarch64_borrow_operation" "") + (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r")))))] + "" + "sbcs\\t<w>zr, <w>zr, %<w>0" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*<su_optab>cmp<mode>_carryin_z2" + [(set (reg:<CC_EXTEND> CC_REGNUM) + (compare:<CC_EXTEND> + (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r")) + (match_operand:<DWI> 1 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t<w>zr, %<w>0, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*cmp<mode>_carryin_m2" + [(set (reg:<CC_EXTEND> CC_REGNUM) + (compare:<CC_EXTEND> + (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r")) + (neg:<DWI> (match_operand:<DWI> 1 "aarch64_carry_operation" ""))))] + "" + "adcs\\t<w>zr, %<w>0, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*ucmp<mode>_carryin_m2" + [(set (reg:<CC_EXTEND> CC_REGNUM) + (compare:<CC_EXTEND> + (ANY_EXTEND:<DWI> (match_operand:GPI 0 "register_operand" "r")) + (plus:<DWI> + (match_operand:<DWI> 1 "aarch64_borrow_operation" "") + (match_operand:<DWI> 2 "const_dword_umax" ""))))] + "" + "adcs\\t<w>zr, %<w>0, <w>zr" + [(set_attr "type" "adc_reg")] +) + (define_expand "usub<GPI:mode>3_carryinC" [(parallel [(set (reg:CC_NOTC CC_REGNUM) @@ -3985,6 +4084,20 @@ operands[3] = const0_rtx; }) +(define_expand "cstoreti4" + [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "aarch64_comparison_operator" + [(match_operand:TI 2 "register_operand") + (match_operand:TI 3 "aarch64_reg_or_imm")]))] + "" +{ + operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], + operands[3]); + PUT_MODE (operands[1], SImode); + operands[2] = XEXP (operands[1], 0); + operands[3] = const0_rtx; +}) + (define_expand "cstorecc4" [(set (match_operand:SI 0 "register_operand") (match_operator 1 "aarch64_comparison_operator_mode" diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 8e434389e59..f6f2e9cefd5 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1907,6 +1907,9 @@ (define_code_attr fix_trunc_optab [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")]) +;; For double-word comparisons +(define_code_attr CC_EXTEND [(sign_extend "CC_NV") (zero_extend "CC_NOTC")]) + ;; Optab prefix for sign/zero-extending operations (define_code_attr su_optab [(sign_extend "") (zero_extend "u") (div "") (udiv "u") diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index e3572d2f60d..93d068cc69c 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -55,6 +55,15 @@ return rtx_mode_t (op, mode) == (wi::shwi (1, mode) << bits); }) +;; True for (1 << (GET_MODE_BITSIZE (mode) / 2)) - 1 +;; I.e UINT_MAX for a given mode, in the double-word mode. +(define_predicate "const_dword_umax" + (match_code "const_int,const_wide_int") +{ + unsigned bits = GET_MODE_BITSIZE (mode).to_constant () / 2; + return rtx_mode_t (op, mode) == wi::sub(wi::shwi (1, mode) << bits, 1); +}) + (define_predicate "subreg_lowpart_operator" (ior (match_code "truncate") (and (match_code "subreg") -- 2.20.1