================ @@ -0,0 +1,501 @@ +//===- LowerGPUIntrinsic.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Lower the llvm.gpu intrinsics to target specific code sequences. +// Can be called from clang if building for a specific GPU or from the backend +// as part of a SPIRV lowering pipeline. Initial pass can lower to amdgcn or +// nvptx, adding further architectures means adding a column to the lookup table +// and further intrinsics adding a row. +// +// The idea is for the intrinsics to represent a thin abstraction over the +// different GPU architectures. In particular, code compiled to spirv-- without +// specifying a specific target can be specialised at JIT time, at which point +// this pass will rewrite those intrinsics to ones that the current backend +// knows. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/LowerGPUIntrinsic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/TargetParser/Triple.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +#define DEBUG_TYPE "lower-gpu-intrinsic" + +using namespace llvm; + +namespace { + +// For each intrinsic, specify what function to call to lower it +typedef bool (*lowerFunction)(Module &M, IRBuilder<> &, Intrinsic::ID from, + CallBase *CI); + +// Simple lowering, directly replace the intrinsic with a different one +// with the same type, and optionally refine range metadata on the return value +template <Intrinsic::ID To> +bool S(Module &M, IRBuilder<> &, Intrinsic::ID from, CallBase *CI) { + + static_assert(To != Intrinsic::not_intrinsic); + Intrinsic::ID GenericID = from; + Intrinsic::ID SpecificID = To; + + bool Changed = false; + Function *Generic = Intrinsic::getDeclarationIfExists(&M, GenericID); + auto *Specific = Intrinsic::getOrInsertDeclaration(&M, SpecificID); + + if ((Generic->getType() != Specific->getType()) || + (Generic->getReturnType() != Specific->getReturnType())) + report_fatal_error("LowerGPUIntrinsic: Inconsistent types between " + "intrinsics in lookup table"); + + CI->setCalledFunction(Specific); + Changed = true; + + return Changed; +} + +// Replace intrinsic call with a linear sequence of instructions +typedef Value *(*builder)(Module &M, IRBuilder<> &Builder, Intrinsic::ID from, + CallBase *CI); + +template <builder F> +bool B(Module &M, IRBuilder<> &Builder, Intrinsic::ID from, CallBase *CI) { + bool Changed = false; + + Builder.SetInsertPoint(CI); + + Value *replacement = F(M, Builder, from, CI); + if (replacement) { + CI->replaceAllUsesWith(replacement); + CI->eraseFromParent(); + Changed = true; + } + + return Changed; +} + +template <Intrinsic::ID Numerator, Intrinsic::ID Denominator> +Value *intrinsicRatio(Module &M, IRBuilder<> &Builder, Intrinsic::ID, + CallBase *) { + Value *N = Builder.CreateIntrinsic(Numerator, {}, {}); + Value *D = Builder.CreateIntrinsic(Denominator, {}, {}); + return Builder.CreateUDiv(N, D); +} + +namespace amdgpu { +Value *lane_mask(Module &M, IRBuilder<> &Builder, Intrinsic::ID from, + CallBase *CI) { + auto &Ctx = M.getContext(); + return Builder.CreateIntrinsic( + Intrinsic::amdgcn_ballot, {Type::getInt64Ty(Ctx)}, + {ConstantInt::get(Type::getInt1Ty(Ctx), true)}); +} + +Value *lane_id(Module &M, IRBuilder<> &Builder, Intrinsic::ID from, + CallBase *CI) { + auto &Ctx = M.getContext(); + Constant *M1 = ConstantInt::get(Type::getInt32Ty(Ctx), -1); + Constant *Z = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + + CallInst *Lo = + Builder.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {}, {M1, Z}); + return Builder.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {}, {M1, Lo}); +} + +Value *first_lane(Module &M, IRBuilder<> &Builder, Intrinsic::ID from, + CallBase *CI) { + auto &Ctx = M.getContext(); + return Builder.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, + {Type::getInt32Ty(Ctx)}, + {CI->getArgOperand(1)}); +} + +Value *shuffle_idx(Module &M, IRBuilder<> &Builder, Intrinsic::ID from, + CallBase *CI) { + auto &Ctx = M.getContext(); + + Value *idx = CI->getArgOperand(1); + Value *x = CI->getArgOperand(2); + Value *width = CI->getArgOperand(3); + + Value *id = Builder.CreateIntrinsic(Intrinsic::gpu_lane_id, {}, {}); + + Value *n = Builder.CreateSub(ConstantInt::get(Type::getInt32Ty(Ctx), 0), + width, "not"); + Value *a = Builder.CreateAnd(id, n, "and"); + Value *add = Builder.CreateAdd(a, idx, "add"); + Value *shl = + Builder.CreateShl(add, ConstantInt::get(Type::getInt32Ty(Ctx), 2), "shl"); + return Builder.CreateIntrinsic(Intrinsic::amdgcn_ds_bpermute, {}, {shl, x}); +} + +Value *ballot(Module &M, IRBuilder<> &Builder, Intrinsic::ID from, + CallBase *CI) { + auto &Ctx = M.getContext(); + + Value *C = + Builder.CreateIntrinsic(Intrinsic::amdgcn_ballot, {Type::getInt64Ty(Ctx)}, + {CI->getArgOperand(1)}); + + return Builder.CreateAnd(C, CI->getArgOperand(0)); +} + +Value *sync_threads(Module &M, IRBuilder<> &Builder, Intrinsic::ID from, + CallBase *CI) { + auto &Ctx = M.getContext(); + Builder.CreateIntrinsic(Intrinsic::amdgcn_s_barrier, {}, {}); + + Value *F = Builder.CreateFence(AtomicOrdering::SequentiallyConsistent, + Ctx.getOrInsertSyncScopeID("workgroup")); + + return F; +} + +Value *sync_lane(Module &M, IRBuilder<> &Builder, Intrinsic::ID from, + CallBase *CI) { + return Builder.CreateIntrinsic(Intrinsic::amdgcn_wave_barrier, {}, {}); +} + +Value *thread_suspend(Module &M, IRBuilder<> &Builder, Intrinsic::ID from, + CallBase *CI) { + + auto &Ctx = M.getContext(); + return Builder.CreateIntrinsic(Intrinsic::amdgcn_s_sleep, {}, + {ConstantInt::get(Type::getInt32Ty(Ctx), 2)}); +} + +Value *dispatch_ptr(IRBuilder<> &Builder) { + CallInst *Call = + Builder.CreateIntrinsic(Intrinsic::amdgcn_dispatch_ptr, {}, {}); + Call->addRetAttr( + Attribute::getWithDereferenceableBytes(Call->getContext(), 64)); + Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4))); + return Call; +} + +Value *implicit_arg_ptr(IRBuilder<> &Builder) { + CallInst *Call = + Builder.CreateIntrinsic(Intrinsic::amdgcn_implicitarg_ptr, {}, {}); + Call->addRetAttr( + Attribute::getWithDereferenceableBytes(Call->getContext(), 256)); + Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8))); + return Call; +} + +template <unsigned Index> +Value *grid_size(Module &M, IRBuilder<> &Builder, Intrinsic::ID, CallBase *) { + auto &Ctx = M.getContext(); + const unsigned XOffset = 12; + auto *DP = dispatch_ptr(Builder); + + // Indexing the HSA kernel_dispatch_packet struct. + auto *Offset = ConstantInt::get(Type::getInt32Ty(Ctx), XOffset + Index * 4); + auto *GEP = Builder.CreateGEP(Type::getInt8Ty(Ctx), DP, Offset); + auto *LD = Builder.CreateLoad(Type::getInt32Ty(Ctx), GEP); + llvm::MDBuilder MDB(Ctx); + // Known non-zero. + LD->setMetadata(llvm::LLVMContext::MD_range, + MDB.createRange(APInt(32, 1), APInt::getZero(32))); + LD->setMetadata(llvm::LLVMContext::MD_invariant_load, + llvm::MDNode::get(Ctx, {})); + return LD; +} + +template <int Index> +Value *WGSize(Module &M, IRBuilder<> &Builder, Intrinsic::ID from, + CallBase *CI) { + + // Note: "__oclc_ABI_version" is supposed to be emitted and initialized by + // clang during compilation of user code. + StringRef Name = "__oclc_ABI_version"; ---------------- jhuber6 wrote:
I hate this thing. https://github.com/llvm/llvm-project/pull/131190 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits