================ @@ -9665,18 +9665,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value *> &Ops, unsigned IntID) { - if (Ops.size() == 3) { - Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); - llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); - llvm::Value *MulVL = Builder.CreateMul( - CntsbCall, - Builder.getInt64(cast<llvm::ConstantInt>(Ops[2])->getZExtValue()), - "mulvl"); - - Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); - Ops[0] = EmitTileslice(Ops[0], Ops[2]); - Ops.erase(&Ops[2]); - } + if (Ops.size() == 2) { + // Intrinsics without a vecnum also use this function, so just provide 0 + Ops.push_back(Ops[1]); + Ops[1] = Builder.getInt32(0); + } else { + int Imm = -1; + if (ConstantInt* C = dyn_cast<ConstantInt>(Ops[2])) + if (C->getZExtValue() <= 15) + Imm = C->getZExtValue(); + + if (Imm != -1) { ---------------- sdesmalen-arm wrote:
Rather than checking this in Clang, I think it makes more sense to handle this in SelectionDAG. The reason for that is that when someone uses this intrinsic inside a loop like this: ``` for(int i=0; i<N; ++i) svldr_vnum_za(slice, ptr, i); ``` And in LLVM IR, the compiler would choose to unroll the loop, we'd end up with something like this: ``` for(int i=0; i<N; i+=4) { svldr_vnum_za(slice, ptr, i); svldr_vnum_za(slice, ptr, i+1); svldr_vnum_za(slice, ptr, i+2); svldr_vnum_za(slice, ptr, i+3); } ``` It would be good if we could if we could add `i` to `slice` and use the immediate forms of the instructions. If we have an intrinsic like: ```void @llvm.aarch64.sme.ldr(i32 %tileslice, ptr %base, i64 %vnum)``` Then for the instruction which takes the following inputs: ```(ins MatrixIndexGPR32Op12_15:$slice_base, sme_elm_idx0_15:$slice_idx, GPR64sp:$ptr, imm0_15:$ptr_idx), ``` You can do custom matching in ISel by handling three different cases: * `%vnum` is a constant between 0-15. This will leave `$slice_base` and `$ptr` untouched, and will use the matched immediate (let's call it $imm0_15) for the two immediates taken by the instruction (`slice_idx` and `ptr_idx`). * `%vnum` is an ADD value of an opaque value and an immediate between 0-15. This will match to the instruction where the opaque value is added to `$slice_base` and `$ptr` with the remaining immediate being used for `$slice_idx` and `$ptr_idx`. * `%vnum` is an opaque value. This will match to the instruction where `%vnum` is added to both the `$slice_base` and the `$ptr` parameter, with `$slice_idx` and `$ptr_idx` being `0`. (note that when adding the `vnum` to `$ptr`, that it needs to be scaled by `cntd`) https://github.com/llvm/llvm-project/pull/68565 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits