================ @@ -4825,6 +4827,113 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain, Mask); } +// Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA. +// Case 1: If the vector number (vecnum) is an immediate in range, it gets +// folded into the instruction +// ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11] +// Case 2: If the vecnum is not an immediate, then it is used to modify the base +// and tile slice registers +// ldr(%tileslice, %ptr, %vecnum) +// -> +// %svl = rdsvl +// %ptr2 = %ptr + %svl * %vecnum +// %tileslice2 = %tileslice + %vecnum +// ldr [%tileslice2, 0], [%ptr2, 0] +// Case 3: If the vecnum is an immediate out of range, then the same is done as +// case 2, but the base and slice registers are modified by the greatest +// multiple of 15 lower than the vecnum and the remainder is folded into the +// instruction. This means that successive loads and stores that are offset from +// each other can share the same base and slice register updates. +// ldr(%tileslice, %ptr, 22) +// ldr(%tileslice, %ptr, 23) +// -> +// %svl = rdsvl +// %ptr2 = %ptr + %svl * 15 +// %tileslice2 = %tileslice + 15 +// ldr [%tileslice2, 7], [%ptr2, 7] +// ldr [%tileslice2, 8], [%ptr2, 8] +// Case 4: If the vecnum is an add of an immediate, then the non-immediate +// operand and the immediate can be folded into the instruction, like case 2. +// ldr(%tileslice, %ptr, %vecnum + 7) +// ldr(%tileslice, %ptr, %vecnum + 8) +// -> +// %svl = rdsvl +// %ptr2 = %ptr + %svl * %vecnum +// %tileslice2 = %tileslice + %vecnum +// ldr [%tileslice2, 7], [%ptr2, 7] +// ldr [%tileslice2, 8], [%ptr2, 8] +// Case 5: The vecnum being an add of an immediate out of range is also handled, +// in which case the same remainder logic as case 3 is used. +SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) { + SDLoc DL(N); + + SDValue TileSlice = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue VecNum = N->getOperand(4); + int Addend = 0; + + // If the vnum is an add, we can fold that add into the instruction if the + // operand is an immediate. The range check is performed below. + if (VecNum.getOpcode() == ISD::ADD) { + if (auto ImmNode = dyn_cast<ConstantSDNode>(VecNum.getOperand(1))) { + Addend = ImmNode->getSExtValue(); + VecNum = VecNum.getOperand(0); + } + } + + SDValue Remainder = DAG.getTargetConstant(Addend, DL, MVT::i32); + + // true if the base and slice registers need to be modified + bool NeedsAdd = true; + auto ImmNode = dyn_cast<ConstantSDNode>(VecNum); + if (ImmNode || Addend != 0) { + int Imm = ImmNode ? ImmNode->getSExtValue() + Addend : Addend; + Remainder = DAG.getTargetConstant(Imm % 16, DL, MVT::i32); + if (Imm >= 0 && Imm <= 15) { + // If vnum is an immediate in range then we don't need to modify the tile + // slice and base register. We could also get here because Addend != 0 but + // vecnum is not an immediate, in which case we still want the base and + // slice register to be modified + NeedsAdd = !ImmNode; ---------------- SamTebbs33 wrote:
I actually didn't realise that `SDValue()` is a falsey value. That certainly does eliminate the need for the `NeedsAdd` boolean. Thank you! https://github.com/llvm/llvm-project/pull/68565 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits