Hi Richard, On 10/09/2020 19:18, Richard Sandiford wrote: > Alex Coplan <alex.cop...@arm.com> writes: > > Hello, > > > > Since r11-2903-g6b3034eaba83935d9f6dfb20d2efbdb34b5b00bf introduced a > > canonicalization from mult to shift on address reloads, a missing > > pattern in the AArch64 backend was exposed. > > > > In particular, we were missing the ashift variant of > > *add_<optab><mode>_multp2 (this mult variant is redundant and was > > removed in r11-3033-g2f8ae301f6a125f50b0a758047fcddae7b68daa8). > > > > This patch adds that missing pattern (fixing PR96998), updates > > aarch64_is_extend_from_extract() to work for the shift pattern (instead > > of the redundant mult variant) and updates callers in the cost > > calculations to apply the costing for the shift variant instead. > > Hmm. I think we should instead fix this in combine.
Ok, thanks for the review. Please find a revised patch attached which fixes this in combine instead. > If we do that, we should be able to remove the handling of > extract-based addresses in aarch64_classify_index & co. Nice. I've tried to remove all the redundant extract-based address handling in the AArch64 backend as a result. The revised patch also fixes up a comment in combine which appears to have suffered from bitrot. Testing: * Bootstrap and regtest on aarch64-none-linux-gnu, arm-none-linux-gnueabihf, and x86_64-pc-linux-gnu. * Regtested an x64 -> aarch64-none-elf cross with -mabi=ilp32. * Checked that we no longer ICE when building linux-next on AArch64. OK for trunk? Thanks, Alex --- gcc/ChangeLog: * combine.c (expand_compound_operation): Tweak variable name in comment to match source. (make_extraction): Handle mult by power of two in addition to ashift. * config/aarch64/aarch64.c (aarch64_is_extend_from_extract): Delete. (aarch64_classify_index): Remove redundant extend-as-extract handling. (aarch64_strip_extend): Likewise. (aarch64_rtx_arith_op_extract_p): Likewise, remove now-unused parameter. Update callers... (aarch64_rtx_costs): ... here. gcc/testsuite/ChangeLog: * gcc.c-torture/compile/pr96998.c: New test.
diff --git a/gcc/combine.c b/gcc/combine.c index c88382e..cd8e544 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -7419,8 +7419,8 @@ expand_compound_operation (rtx x) } /* If we reach here, we want to return a pair of shifts. The inner - shift is a left shift of BITSIZE - POS - LEN bits. The outer - shift is a right shift of BITSIZE - LEN bits. It is arithmetic or + shift is a left shift of MODEWIDTH - POS - LEN bits. The outer + shift is a right shift of MODEWIDTH - LEN bits. It is arithmetic or logical depending on the value of UNSIGNEDP. If this was a ZERO_EXTEND or ZERO_EXTRACT, this pair of shifts will be @@ -7650,20 +7650,29 @@ make_extraction (machine_mode mode, rtx inner, HOST_WIDE_INT pos, is_mode = GET_MODE (SUBREG_REG (inner)); inner = SUBREG_REG (inner); } - else if (GET_CODE (inner) == ASHIFT + else if ((GET_CODE (inner) == ASHIFT || GET_CODE (inner) == MULT) && CONST_INT_P (XEXP (inner, 1)) - && pos_rtx == 0 && pos == 0 - && len > UINTVAL (XEXP (inner, 1))) - { - /* We're extracting the least significant bits of an rtx - (ashift X (const_int C)), where LEN > C. Extract the - least significant (LEN - C) bits of X, giving an rtx - whose mode is MODE, then shift it left C times. */ - new_rtx = make_extraction (mode, XEXP (inner, 0), - 0, 0, len - INTVAL (XEXP (inner, 1)), - unsignedp, in_dest, in_compare); - if (new_rtx != 0) - return gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1)); + && pos_rtx == 0 && pos == 0) + { + const HOST_WIDE_INT ci = INTVAL (XEXP (inner, 1)); + const bool mult = GET_CODE (inner) == MULT; + const int shift_amt = mult ? exact_log2 (ci) : ci; + + if (shift_amt > 0 && len > (unsigned)shift_amt) + { + /* We're extracting the least significant bits of an rtx + (ashift X (const_int C)) or (mult X (const_int (2^C))), + where LEN > C. Extract the least significant (LEN - C) bits + of X, giving an rtx whose mode is MODE, then shift it left + C times. */ + new_rtx = make_extraction (mode, XEXP (inner, 0), + 0, 0, len - shift_amt, + unsignedp, in_dest, in_compare); + if (new_rtx) + return mult + ? gen_rtx_MULT (mode, new_rtx, XEXP (inner, 1)) + : gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1)); + } } else if (GET_CODE (inner) == TRUNCATE /* If trying or potentionally trying to extract diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index b251f39..56738ae 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2811,33 +2811,6 @@ aarch64_is_noplt_call_p (rtx sym) return false; } -/* Return true if the offsets to a zero/sign-extract operation - represent an expression that matches an extend operation. The - operands represent the parameters from - - (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */ -bool -aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm, - rtx extract_imm) -{ - HOST_WIDE_INT mult_val, extract_val; - - if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm)) - return false; - - mult_val = INTVAL (mult_imm); - extract_val = INTVAL (extract_imm); - - if (extract_val > 8 - && extract_val < GET_MODE_BITSIZE (mode) - && exact_log2 (extract_val & ~7) > 0 - && (extract_val & 7) <= 4 - && mult_val == (1 << (extract_val & 7))) - return true; - - return false; -} - /* Emit an insn that's a simple single-set. Both the operands must be known to be valid. */ inline static rtx_insn * @@ -8837,22 +8810,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x, index = XEXP (XEXP (x, 0), 0); shift = INTVAL (XEXP (x, 1)); } - /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */ - else if ((GET_CODE (x) == SIGN_EXTRACT - || GET_CODE (x) == ZERO_EXTRACT) - && GET_MODE (x) == DImode - && GET_CODE (XEXP (x, 0)) == MULT - && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode - && CONST_INT_P (XEXP (XEXP (x, 0), 1))) - { - type = (GET_CODE (x) == SIGN_EXTRACT) - ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; - index = XEXP (XEXP (x, 0), 0); - shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))); - if (INTVAL (XEXP (x, 1)) != 32 + shift - || INTVAL (XEXP (x, 2)) != 0) - shift = -1; - } /* (and:DI (mult:DI (reg:DI) (const_int scale)) (const_int 0xffffffff<<shift)) */ else if (GET_CODE (x) == AND @@ -8868,22 +8825,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x, if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift) shift = -1; } - /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */ - else if ((GET_CODE (x) == SIGN_EXTRACT - || GET_CODE (x) == ZERO_EXTRACT) - && GET_MODE (x) == DImode - && GET_CODE (XEXP (x, 0)) == ASHIFT - && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode - && CONST_INT_P (XEXP (XEXP (x, 0), 1))) - { - type = (GET_CODE (x) == SIGN_EXTRACT) - ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; - index = XEXP (XEXP (x, 0), 0); - shift = INTVAL (XEXP (XEXP (x, 0), 1)); - if (INTVAL (XEXP (x, 1)) != 32 + shift - || INTVAL (XEXP (x, 2)) != 0) - shift = -1; - } /* (and:DI (ashift:DI (reg:DI) (const_int shift)) (const_int 0xffffffff<<shift)) */ else if (GET_CODE (x) == AND @@ -11259,16 +11200,6 @@ aarch64_strip_extend (rtx x, bool strip_shift) if (!is_a <scalar_int_mode> (GET_MODE (op), &mode)) return op; - /* Zero and sign extraction of a widened value. */ - if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT) - && XEXP (op, 2) == const0_rtx - && GET_CODE (XEXP (op, 0)) == MULT - && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1), - XEXP (op, 1))) - return XEXP (XEXP (op, 0), 0); - - /* It can also be represented (for zero-extend) as an AND with an - immediate. */ if (GET_CODE (op) == AND && GET_CODE (XEXP (op, 0)) == MULT && CONST_INT_P (XEXP (XEXP (op, 0), 1)) @@ -11606,31 +11537,11 @@ aarch64_branch_cost (bool speed_p, bool predictable_p) /* Return true if the RTX X in mode MODE is a zero or sign extract usable in an ADD or SUB (extended register) instruction. */ static bool -aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode) -{ - /* Catch add with a sign extract. - This is add_<optab><mode>_multp2. */ - if (GET_CODE (x) == SIGN_EXTRACT - || GET_CODE (x) == ZERO_EXTRACT) - { - rtx op0 = XEXP (x, 0); - rtx op1 = XEXP (x, 1); - rtx op2 = XEXP (x, 2); - - if (GET_CODE (op0) == MULT - && CONST_INT_P (op1) - && op2 == const0_rtx - && CONST_INT_P (XEXP (op0, 1)) - && aarch64_is_extend_from_extract (mode, - XEXP (op0, 1), - op1)) - { - return true; - } - } +aarch64_rtx_arith_op_extract_p (rtx x) +{ /* The simple case <ARITH>, XD, XN, XM, [us]xt. No shift. */ - else if (GET_CODE (x) == SIGN_EXTEND + if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND) return REG_P (XEXP (x, 0)); @@ -12319,7 +12230,7 @@ cost_minus: /* Look for SUB (extended register). */ if (is_a <scalar_int_mode> (mode, &int_mode) - && aarch64_rtx_arith_op_extract_p (op1, int_mode)) + && aarch64_rtx_arith_op_extract_p (op1)) { if (speed) *cost += extra_cost->alu.extend_arith; @@ -12399,7 +12310,7 @@ cost_plus: /* Look for ADD (extended register). */ if (is_a <scalar_int_mode> (mode, &int_mode) - && aarch64_rtx_arith_op_extract_p (op0, int_mode)) + && aarch64_rtx_arith_op_extract_p (op0)) { if (speed) *cost += extra_cost->alu.extend_arith; diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96998.c b/gcc/testsuite/gcc.c-torture/compile/pr96998.c new file mode 100644 index 0000000..a75d5dc --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr96998.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target arm*-*-* aarch64*-*-* } } */ + +int h(void); +struct c d; +struct c { + int e[1]; +}; + +void f(void) { + int g; + for (;; g = h()) { + int *i = &d.e[g]; + asm("" : "=Q"(*i)); + } +}