hliao updated this revision to Diff 256321. hliao added a comment. Rebase to trunk.
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D77777/new/ https://reviews.llvm.org/D77777 Files: clang/lib/CodeGen/TargetInfo.cpp clang/test/CodeGenCUDA/surface.cu clang/test/CodeGenCUDA/texture.cu llvm/lib/Target/NVPTX/CMakeLists.txt llvm/lib/Target/NVPTX/NVPTX.h llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp llvm/lib/Target/NVPTX/NVPTXTexSurfHandleInternalizer.cpp llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
Index: llvm/test/CodeGen/NVPTX/tex-read-cuda.ll =================================================================== --- llvm/test/CodeGen/NVPTX/tex-read-cuda.ll +++ llvm/test/CodeGen/NVPTX/tex-read-cuda.ll @@ -6,6 +6,7 @@ declare { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64, i32) declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*) +declare i64 @llvm.nvvm.texsurf.handle.p1i64(metadata, i64 addrspace(1)*) ; SM20-LABEL: .entry foo ; SM30-LABEL: .entry foo @@ -28,7 +29,7 @@ ; SM20-LABEL: .entry bar ; SM30-LABEL: .entry bar define void @bar(float* %red, i32 %idx) { -; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0 +; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0 %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex0) ; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}] ; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}] @@ -40,7 +41,24 @@ ret void } -!nvvm.annotations = !{!1, !2, !3} +; SM20-LABEL: .entry bax +; SM30-LABEL: .entry bax +define void @bax(float* %red, i32 %idx) { +; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0 + %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.p1i64(metadata !5, i64 addrspace(1)* @tex0) +; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}] +; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}] + %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx) + %ret = extractvalue { float, float, float, float } %val, 0 +; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RED]] +; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RED]] + store float %ret, float* %red + ret void +} + +!nvvm.annotations = !{!1, !2, !3, !4} !1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1} !2 = !{void (float*, i32)* @bar, !"kernel", i32 1} -!3 = !{i64 addrspace(1)* @tex0, !"texture", i32 1} +!3 = !{void (float*, i32)* @bax, !"kernel", i32 1} +!4 = !{i64 addrspace(1)* @tex0, !"texture", i32 1} +!5 = !{i64 addrspace(1)* @tex0} Index: llvm/lib/Target/NVPTX/NVPTXTexSurfHandleInternalizer.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/NVPTX/NVPTXTexSurfHandleInternalizer.cpp @@ -0,0 +1,81 @@ +//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// Replace `nvvm.texsurf.handle` intrinsics with their internal version, i.e. +// `nvvm.texsurf.handle.internal`. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "nvptx-texsurf-handle-internalizer" + +namespace llvm { +void initializeTexSurfHandleInternalizerPass(PassRegistry &); +} + +namespace { + +class TexSurfHandleInternalizer : public FunctionPass { +public: + static char ID; + + TexSurfHandleInternalizer() : FunctionPass(ID) { + initializeTexSurfHandleInternalizerPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Internalize `nvvm.texsurf.handle` intrinsics"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + } + + bool runOnFunction(Function &F) override { + bool Changed = false; + for (auto &BB : F) + for (auto BI = BB.begin(), BE = BB.end(); BI != BE; /*EMPTY*/) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*BI++); + if (!II || II->getIntrinsicID() != Intrinsic::nvvm_texsurf_handle) + continue; + assert(II->getArgOperand(1) == + cast<ValueAsMetadata>( + cast<MetadataAsValue>(II->getArgOperand(0))->getMetadata()) + ->getValue()); + // Replace it with the internal version. + IRBuilder<> Builder(II); + auto *NewII = Builder.CreateUnaryIntrinsic( + Intrinsic::nvvm_texsurf_handle_internal, II->getArgOperand(1)); + II->replaceAllUsesWith(NewII); + II->eraseFromParent(); + Changed = true; + } + return Changed; + } +}; + +} // end of anonymous namespace + +FunctionPass *llvm::createNVPTXTexSurfHandleInternalizerPass() { + return new TexSurfHandleInternalizer(); +} + +char TexSurfHandleInternalizer::ID = 0; + +INITIALIZE_PASS(TexSurfHandleInternalizer, "nvptx-texsurf-handle-internalizer", + "Interalize texsurf-handle intrinsic", false, false) Index: llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp =================================================================== --- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -161,6 +161,7 @@ } void addIRPasses() override; + bool addPreISel() override; bool addInstSelector() override; void addPreRegAlloc() override; void addPostRegAlloc() override; @@ -300,6 +301,11 @@ } } +bool NVPTXPassConfig::addPreISel() { + addPass(createNVPTXTexSurfHandleInternalizerPass()); + return false; +} + bool NVPTXPassConfig::addInstSelector() { const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); Index: llvm/lib/Target/NVPTX/NVPTX.h =================================================================== --- llvm/lib/Target/NVPTX/NVPTX.h +++ llvm/lib/Target/NVPTX/NVPTX.h @@ -47,6 +47,7 @@ FunctionPass *createNVPTXLowerAllocaPass(); MachineFunctionPass *createNVPTXPeephole(); MachineFunctionPass *createNVPTXProxyRegErasurePass(); +FunctionPass *createNVPTXTexSurfHandleInternalizerPass(); namespace NVPTX { enum DrvInterface { Index: llvm/lib/Target/NVPTX/CMakeLists.txt =================================================================== --- llvm/lib/Target/NVPTX/CMakeLists.txt +++ llvm/lib/Target/NVPTX/CMakeLists.txt @@ -19,20 +19,21 @@ NVPTXImageOptimizer.cpp NVPTXInstrInfo.cpp NVPTXLowerAggrCopies.cpp - NVPTXLowerArgs.cpp NVPTXLowerAlloca.cpp - NVPTXPeephole.cpp + NVPTXLowerArgs.cpp NVPTXMCExpr.cpp + NVPTXPeephole.cpp NVPTXPrologEpilogPass.cpp + NVPTXProxyRegErasure.cpp NVPTXRegisterInfo.cpp NVPTXReplaceImageHandles.cpp NVPTXSubtarget.cpp NVPTXTargetMachine.cpp NVPTXTargetTransformInfo.cpp + NVPTXTexSurfHandleInternalizer.cpp NVPTXUtilities.cpp NVVMIntrRange.cpp NVVMReflect.cpp - NVPTXProxyRegErasure.cpp ) add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) Index: clang/test/CodeGenCUDA/texture.cu =================================================================== --- clang/test/CodeGenCUDA/texture.cu +++ clang/test/CodeGenCUDA/texture.cu @@ -37,9 +37,9 @@ __attribute__((device)) v4f tex2d_ld(texture<float, 2, NormalizedFloat>, int, int) asm("llvm.nvvm.tex.unified.2d.v4f32.s32"); // DEVICE-LABEL: float @_Z3fooff(float %x, float %y) -// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex) +// DEVICE: call i64 @llvm.nvvm.texsurf.handle.p1i64(metadata [[TEX:.*]], [[TEX]]) // DEVICE: call %struct.v4f @llvm.nvvm.tex.unified.2d.v4f32.f32(i64 %{{.*}}, float %{{.*}}, float %{{.*}}) -// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @norm) +// DEVICE: call i64 @llvm.nvvm.texsurf.handle.p1i64(metadata [[NORM:.*]], [[NORM]]) // DEVICE: call %struct.v4f @llvm.nvvm.tex.unified.2d.v4f32.s32(i64 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) __attribute__((device)) float foo(float x, float y) { return tex2d_ld(tex, x, y).x + tex2d_ld(norm, int(x), int(y)).x; Index: clang/test/CodeGenCUDA/surface.cu =================================================================== --- clang/test/CodeGenCUDA/surface.cu +++ clang/test/CodeGenCUDA/surface.cu @@ -28,7 +28,7 @@ __attribute__((device)) int suld_2d_zero(surface<void, 2>, int, int) asm("llvm.nvvm.suld.2d.i32.zero"); // DEVICE-LABEL: i32 @_Z3fooii(i32 %x, i32 %y) -// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf) +// DEVICE: call i64 @llvm.nvvm.texsurf.handle.p1i64(metadata [[SURF:.*]], [[SURF]]) // DEVICE: call i32 @llvm.nvvm.suld.2d.i32.zero(i64 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) __attribute__((device)) int foo(int x, int y) { return suld_2d_zero(surf, x, y); Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -6482,12 +6482,14 @@ if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C)) C = llvm::cast<llvm::Constant>(ASC->getPointerOperand()); if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) { + llvm::Value *MD = llvm::MetadataAsValue::get( + CGF.getLLVMContext(), llvm::ConstantAsMetadata::get(GV)); // Load the handle from the specific global variable using // `nvvm.texsurf.handle.internal` intrinsic. Handle = CGF.EmitRuntimeCall( - CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal, + CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle, {GV->getType()}), - {GV}, "texsurf_handle"); + {MD, GV}, "texsurf_handle"); } else Handle = CGF.EmitLoadOfScalar(Src, SourceLocation()); CGF.EmitStoreOfScalar(Handle, Dst);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits