llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-transforms Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> --- Full diff: https://github.com/llvm/llvm-project/pull/102007.diff 2 Files Affected: - (modified) llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp (+9-7) - (added) llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll (+68) ``````````diff diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 87b885447cc02..2ddf24be67702 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -369,13 +369,13 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, Value *OldV, Value *NewV) const { Module *M = II->getParent()->getParent()->getParent(); - - switch (II->getIntrinsicID()) { - case Intrinsic::objectsize: { + Intrinsic::ID IID = II->getIntrinsicID(); + switch (IID) { + case Intrinsic::objectsize: + case Intrinsic::masked_load: { Type *DestTy = II->getType(); Type *SrcTy = NewV->getType(); - Function *NewDecl = - Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy}); + Function *NewDecl = Intrinsic::getDeclaration(M, IID, {DestTy, SrcTy}); II->setArgOperand(0, NewV); II->setCalledFunction(NewDecl); return true; @@ -386,12 +386,12 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, case Intrinsic::masked_gather: { Type *RetTy = II->getType(); Type *NewPtrTy = NewV->getType(); - Function *NewDecl = - Intrinsic::getDeclaration(M, II->getIntrinsicID(), {RetTy, NewPtrTy}); + Function *NewDecl = Intrinsic::getDeclaration(M, IID, {RetTy, NewPtrTy}); II->setArgOperand(0, NewV); II->setCalledFunction(NewDecl); return true; } + case Intrinsic::masked_store: case Intrinsic::masked_scatter: { Type *ValueTy = II->getOperand(0)->getType(); Type *NewPtrTy = NewV->getType(); @@ -429,11 +429,13 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), PostorderStack, Visited); break; + case Intrinsic::masked_load: case Intrinsic::masked_gather: case Intrinsic::prefetch: appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), PostorderStack, Visited); break; + case Intrinsic::masked_store: case Intrinsic::masked_scatter: appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(1), PostorderStack, Visited); diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll new file mode 100644 index 0000000000000..e14dfd055cbe8 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s + +define <32 x i32> @masked_load_v32i32_global_to_flat(ptr addrspace(1) %ptr, <32 x i1> %mask) { +; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) { +; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p1(ptr addrspace(1) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer) +; CHECK-NEXT: ret <32 x i32> [[LOAD]] +; + %cast = addrspacecast ptr addrspace(1) %ptr to ptr + %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer) + ret <32 x i32> %load +} +define <32 x i32> @masked_load_v32i32_local_to_flat(ptr addrspace(3) %ptr, <32 x i1> %mask) { +; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_local_to_flat( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) { +; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p3(ptr addrspace(3) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer) +; CHECK-NEXT: ret <32 x i32> [[LOAD]] +; + %cast = addrspacecast ptr addrspace(3) %ptr to ptr + %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer) + ret <32 x i32> %load +} + +define <32 x i32> @masked_load_v32i32_private_to_flat(ptr addrspace(5) %ptr, <32 x i1> %mask) { +; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_private_to_flat( +; CHECK-SAME: ptr addrspace(5) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) { +; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p5(ptr addrspace(5) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer) +; CHECK-NEXT: ret <32 x i32> [[LOAD]] +; + %cast = addrspacecast ptr addrspace(5) %ptr to ptr + %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer) + ret <32 x i32> %load +} + +define void @masked_store_v32i32_global_to_flat(ptr addrspace(1) %ptr, <32 x i1> %mask) { +; CHECK-LABEL: define void @masked_store_v32i32_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) { +; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p1(<32 x i32> zeroinitializer, ptr addrspace(1) [[PTR]], i32 128, <32 x i1> [[MASK]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(1) %ptr to ptr + tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask) + ret void +} + +define void @masked_store_v32i32_local_to_flat(ptr addrspace(3) %ptr, <32 x i1> %mask) { +; CHECK-LABEL: define void @masked_store_v32i32_local_to_flat( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) { +; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p3(<32 x i32> zeroinitializer, ptr addrspace(3) [[PTR]], i32 128, <32 x i1> [[MASK]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(3) %ptr to ptr + tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask) + ret void +} + +define void @masked_store_v32i32_private_to_flat(ptr addrspace(5) %ptr, <32 x i1> %mask) { +; CHECK-LABEL: define void @masked_store_v32i32_private_to_flat( +; CHECK-SAME: ptr addrspace(5) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) { +; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p5(<32 x i32> zeroinitializer, ptr addrspace(5) [[PTR]], i32 128, <32 x i1> [[MASK]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(5) %ptr to ptr + tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask) + ret void +} + `````````` </details> https://github.com/llvm/llvm-project/pull/102007 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits