Alex Coplan <alex.cop...@arm.com> writes: > Hi Richard, > > On 10/09/2020 19:18, Richard Sandiford wrote: >> Alex Coplan <alex.cop...@arm.com> writes: >> > Hello, >> > >> > Since r11-2903-g6b3034eaba83935d9f6dfb20d2efbdb34b5b00bf introduced a >> > canonicalization from mult to shift on address reloads, a missing >> > pattern in the AArch64 backend was exposed. >> > >> > In particular, we were missing the ashift variant of >> > *add_<optab><mode>_multp2 (this mult variant is redundant and was >> > removed in r11-3033-g2f8ae301f6a125f50b0a758047fcddae7b68daa8). >> > >> > This patch adds that missing pattern (fixing PR96998), updates >> > aarch64_is_extend_from_extract() to work for the shift pattern (instead >> > of the redundant mult variant) and updates callers in the cost >> > calculations to apply the costing for the shift variant instead. >> >> Hmm. I think we should instead fix this in combine. > > Ok, thanks for the review. Please find a revised patch attached which > fixes this in combine instead.
Thanks for doing this, looks like a really nice clean-up. > @@ -7650,20 +7650,29 @@ make_extraction (machine_mode mode, rtx inner, > HOST_WIDE_INT pos, > is_mode = GET_MODE (SUBREG_REG (inner)); > inner = SUBREG_REG (inner); > } > - else if (GET_CODE (inner) == ASHIFT > + else if ((GET_CODE (inner) == ASHIFT || GET_CODE (inner) == MULT) > && CONST_INT_P (XEXP (inner, 1)) > - && pos_rtx == 0 && pos == 0 > - && len > UINTVAL (XEXP (inner, 1))) > - { > - /* We're extracting the least significant bits of an rtx > - (ashift X (const_int C)), where LEN > C. Extract the > - least significant (LEN - C) bits of X, giving an rtx > - whose mode is MODE, then shift it left C times. */ > - new_rtx = make_extraction (mode, XEXP (inner, 0), > - 0, 0, len - INTVAL (XEXP (inner, 1)), > - unsignedp, in_dest, in_compare); > - if (new_rtx != 0) > - return gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1)); > + && pos_rtx == 0 && pos == 0) > + { > + const HOST_WIDE_INT ci = INTVAL (XEXP (inner, 1)); > + const bool mult = GET_CODE (inner) == MULT; > + const int shift_amt = mult ? exact_log2 (ci) : ci; Not going to be a problem in practice but: HOST_WIDE_INT would be better than int here, so that we don't truncate the value for ASHIFT before it has been tested. Similarly: > + > + if (shift_amt > 0 && len > (unsigned)shift_amt) unsigned HOST_WIDE_INT here. > + { > + /* We're extracting the least significant bits of an rtx > + (ashift X (const_int C)) or (mult X (const_int (2^C))), > + where LEN > C. Extract the least significant (LEN - C) bits > + of X, giving an rtx whose mode is MODE, then shift it left > + C times. */ > + new_rtx = make_extraction (mode, XEXP (inner, 0), > + 0, 0, len - shift_amt, > + unsignedp, in_dest, in_compare); > + if (new_rtx) > + return mult > + ? gen_rtx_MULT (mode, new_rtx, XEXP (inner, 1)) > + : gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1)); Easier as gen_rtx_fmt_ee (GET_CODE (inner), mode, …); The combine parts LGTM otherwise, but Segher should have the final say. > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index b251f39..56738ae 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -2811,33 +2811,6 @@ aarch64_is_noplt_call_p (rtx sym) > return false; > } > > -/* Return true if the offsets to a zero/sign-extract operation > - represent an expression that matches an extend operation. The > - operands represent the parameters from > - > - (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */ > -bool > -aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm, > - rtx extract_imm) > -{ > - HOST_WIDE_INT mult_val, extract_val; > - > - if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm)) > - return false; > - > - mult_val = INTVAL (mult_imm); > - extract_val = INTVAL (extract_imm); > - > - if (extract_val > 8 > - && extract_val < GET_MODE_BITSIZE (mode) > - && exact_log2 (extract_val & ~7) > 0 > - && (extract_val & 7) <= 4 > - && mult_val == (1 << (extract_val & 7))) > - return true; > - > - return false; > -} > - > /* Emit an insn that's a simple single-set. Both the operands must be > known to be valid. */ > inline static rtx_insn * > @@ -8837,22 +8810,6 @@ aarch64_classify_index (struct aarch64_address_info > *info, rtx x, > index = XEXP (XEXP (x, 0), 0); > shift = INTVAL (XEXP (x, 1)); > } > - /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */ > - else if ((GET_CODE (x) == SIGN_EXTRACT > - || GET_CODE (x) == ZERO_EXTRACT) > - && GET_MODE (x) == DImode > - && GET_CODE (XEXP (x, 0)) == MULT > - && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode > - && CONST_INT_P (XEXP (XEXP (x, 0), 1))) > - { > - type = (GET_CODE (x) == SIGN_EXTRACT) > - ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; > - index = XEXP (XEXP (x, 0), 0); > - shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))); > - if (INTVAL (XEXP (x, 1)) != 32 + shift > - || INTVAL (XEXP (x, 2)) != 0) > - shift = -1; > - } > /* (and:DI (mult:DI (reg:DI) (const_int scale)) > (const_int 0xffffffff<<shift)) */ > else if (GET_CODE (x) == AND > @@ -8868,22 +8825,6 @@ aarch64_classify_index (struct aarch64_address_info > *info, rtx x, > if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift) > shift = -1; > } > - /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */ > - else if ((GET_CODE (x) == SIGN_EXTRACT > - || GET_CODE (x) == ZERO_EXTRACT) > - && GET_MODE (x) == DImode > - && GET_CODE (XEXP (x, 0)) == ASHIFT > - && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode > - && CONST_INT_P (XEXP (XEXP (x, 0), 1))) > - { > - type = (GET_CODE (x) == SIGN_EXTRACT) > - ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; > - index = XEXP (XEXP (x, 0), 0); > - shift = INTVAL (XEXP (XEXP (x, 0), 1)); > - if (INTVAL (XEXP (x, 1)) != 32 + shift > - || INTVAL (XEXP (x, 2)) != 0) > - shift = -1; > - } > /* (and:DI (ashift:DI (reg:DI) (const_int shift)) > (const_int 0xffffffff<<shift)) */ > else if (GET_CODE (x) == AND > @@ -11259,16 +11200,6 @@ aarch64_strip_extend (rtx x, bool strip_shift) > if (!is_a <scalar_int_mode> (GET_MODE (op), &mode)) > return op; > > - /* Zero and sign extraction of a widened value. */ > - if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT) > - && XEXP (op, 2) == const0_rtx > - && GET_CODE (XEXP (op, 0)) == MULT > - && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1), > - XEXP (op, 1))) > - return XEXP (XEXP (op, 0), 0); > - > - /* It can also be represented (for zero-extend) as an AND with an > - immediate. */ > if (GET_CODE (op) == AND > && GET_CODE (XEXP (op, 0)) == MULT > && CONST_INT_P (XEXP (XEXP (op, 0), 1)) > @@ -11606,31 +11537,11 @@ aarch64_branch_cost (bool speed_p, bool > predictable_p) > /* Return true if the RTX X in mode MODE is a zero or sign extract > usable in an ADD or SUB (extended register) instruction. */ > static bool > -aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode) > -{ > - /* Catch add with a sign extract. > - This is add_<optab><mode>_multp2. */ > - if (GET_CODE (x) == SIGN_EXTRACT > - || GET_CODE (x) == ZERO_EXTRACT) > - { > - rtx op0 = XEXP (x, 0); > - rtx op1 = XEXP (x, 1); > - rtx op2 = XEXP (x, 2); > - > - if (GET_CODE (op0) == MULT > - && CONST_INT_P (op1) > - && op2 == const0_rtx > - && CONST_INT_P (XEXP (op0, 1)) > - && aarch64_is_extend_from_extract (mode, > - XEXP (op0, 1), > - op1)) > - { > - return true; > - } > - } > +aarch64_rtx_arith_op_extract_p (rtx x) > +{ > /* The simple case <ARITH>, XD, XN, XM, [us]xt. > No shift. */ > - else if (GET_CODE (x) == SIGN_EXTEND > + if (GET_CODE (x) == SIGN_EXTEND > || GET_CODE (x) == ZERO_EXTEND) Should reindent this line too. > return REG_P (XEXP (x, 0)); > > @@ -12319,7 +12230,7 @@ cost_minus: > > /* Look for SUB (extended register). */ > if (is_a <scalar_int_mode> (mode, &int_mode) > - && aarch64_rtx_arith_op_extract_p (op1, int_mode)) > + && aarch64_rtx_arith_op_extract_p (op1)) > { > if (speed) > *cost += extra_cost->alu.extend_arith; > @@ -12399,7 +12310,7 @@ cost_plus: > > /* Look for ADD (extended register). */ > if (is_a <scalar_int_mode> (mode, &int_mode) > - && aarch64_rtx_arith_op_extract_p (op0, int_mode)) > + && aarch64_rtx_arith_op_extract_p (op0)) > { > if (speed) > *cost += extra_cost->alu.extend_arith; int_mode is now unused in both hunks, so can just remove the “, &int_mode”s. The aarch64 changes are OK with those (incredibly inconsequential) comments fixed. Thanks, Richard