LuoYuanke marked an inline comment as done. LuoYuanke added inline comments.
================ Comment at: llvm/lib/IR/DataLayout.cpp:819 + case Type::X86_AMXTyID: + return Align(64); default: ---------------- pengfei wrote: > Should be 512 bits? Yes. It is 512. Thanks. ================ Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:72 LLVMContext &Ctx = Builder.getContext(); - Type *Ty = LD->getType(); - EVT VT = EVT::getEVT(Ty); - EVT HalfVT = VT.getHalfNumVectorElementsVT(Ctx); - Type *HalfTy = HalfVT.getTypeForEVT(Ctx); - - Value *Ptr = LD->getPointerOperand(); - PointerType *HalfPtrTy = HalfTy->getPointerTo(LD->getPointerAddressSpace()); - Value *HalfPtr = Builder.CreateBitCast(Ptr, HalfPtrTy); - // The HW require the alignment for AMX tile is 64, but front-end generate - // code for the vector alignment which is the vector size. - uint64_t HalfTySize = HalfTy->getPrimitiveSizeInBits().getFixedSize() / 8; - Align Alignment = std::min(LD->getAlign(), Align(HalfTySize)); - auto *Lo = - Builder.CreateAlignedLoad(HalfTy, HalfPtr, Alignment, LD->isVolatile()); - - HalfPtr = Builder.CreateGEP(HalfTy, HalfPtr, Builder.getInt32(1)); - auto *Hi = - Builder.CreateAlignedLoad(HalfTy, HalfPtr, Alignment, LD->isVolatile()); - - LoadMap[Inst] = std::make_pair(Lo, Hi); -} - -bool X86LowerAMXType::visitLD() { - if (LDSet.empty()) - return false; - for (auto &Inst : LDSet) { - int Count = 0; - Value *NewInst = nullptr; - // The user should be all AMX intrinsics or all LLVM instruction. - // Don't support it is used by both AMX intrinsics and LLVM instructions. - for (auto I = Inst->use_begin(), E = Inst->use_end(); I != E;) { - Use &U = *I++; - const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U.getUser()); - if (!II) { - Count++; - continue; - } - if (NewInst) - continue; - Value *Row, *Col; - switch (II->getIntrinsicID()) { - default: - report_fatal_error("Non-AMX intrinsic use tile type."); - break; - case Intrinsic::x86_tdpbssd_internal: { - unsigned OpNo = U.getOperandNo(); - switch (OpNo) { - case 3: - Row = II->getArgOperand(0); - Col = II->getArgOperand(1); - break; - case 4: - Row = II->getArgOperand(0); - Col = II->getArgOperand(2); - break; - case 5: - Row = II->getArgOperand(2); - Col = II->getArgOperand(1); - break; - } - break; - } - case Intrinsic::x86_tilestored64_internal: { - Row = II->getArgOperand(0); - Col = II->getArgOperand(1); - break; - } - } - assert(Count == 0 && "Can NOT mix amx intrinsic and LLVM instruction"); - // FIXME: The shape def should be ahead of load. - IRBuilder<> Builder(Inst); - LLVMContext &Ctx = Builder.getContext(); - // Use the maximun column as stride. - Value *Stride = Builder.getInt64(64); - Value *I8Ptr = - Builder.CreateBitCast(Inst->getOperand(0), Type::getInt8PtrTy(Ctx)); - std::array<Value *, 4> Args = {Row, Col, I8Ptr, Stride}; - - NewInst = Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, - None, Args); - - Inst->replaceAllUsesWith(NewInst); - } - if (!NewInst) - splitLD(Inst); + AllocaInst *AllocaAddr = CreateAllocaInst(Builder, Bitcast->getParent()); + Value *I8Ptr = ---------------- craig.topper wrote: > Shouldn't this be in the function's entry block? Yes. It is in function's entry block. It is done in line 48 of function CreateAllocaInst(). CreateAllocaInst() is actually copied from your code. :) ================ Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:79 + // --> + // %addr = alloca <256 x i32>, align 1024 + // store <256 x i32> %src, <256 x i32>* %addr, align 1024 ---------------- pengfei wrote: > Why the alignment not be 64? 1024 is conservatives, because vector require the alignment to be the vector size. Here generate vector <256 x i32> load/store. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D91927/new/ https://reviews.llvm.org/D91927 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits