gandhi21299 updated this revision to Diff 374354.
gandhi21299 added a comment.
- replaced a `cast` with a `dyn_cast` since the return value from
`getCalleeFunction()` is not always a Function
- `RUN on line 2` was causing 2 more scalar registers to be used on tonga due
to @func_alias not being inlined, hence I eliminated that test
- `RUN on line 3` generated a call instruction to an aliased function which is
not supported on r600 (according to @arsenm ), hence I eliminated that test as
well
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D109707/new/
https://reviews.llvm.org/D109707
Files:
clang/lib/Driver/ToolChains/Clang.cpp
clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu
llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
llvm/test/CodeGen/AMDGPU/inline-calls.ll
Index: llvm/test/CodeGen/AMDGPU/inline-calls.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/inline-calls.ll
+++ llvm/test/CodeGen/AMDGPU/inline-calls.ll
@@ -1,6 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck
%s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
; ALL-NOT: {{^}}func:
define internal i32 @func(i32 %a) {
@@ -18,8 +16,8 @@
ret void
}
-; CHECK-NOT: func_alias
-; ALL-NOT: func_alias
+; CHECK: func_alias
+; ALL: func_alias
@func_alias = alias i32 (i32), i32 (i32)* @func
; ALL: {{^}}kernel3:
Index: llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -62,7 +62,7 @@
return nullptr;
}
- return cast<Function>(Op.getGlobal());
+ return dyn_cast<Function>(Op.getGlobal());
}
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
Index: llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -93,6 +93,8 @@
for (GlobalAlias &A : M.aliases()) {
if (Function* F = dyn_cast<Function>(A.getAliasee())) {
+ if (A.getLinkage() != GlobalValue::InternalLinkage)
+ continue;
A.replaceAllUsesWith(F);
AliasesToRemove.push_back(&A);
}
Index: clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu
@@ -0,0 +1,15 @@
+// RUN: %clang --offload-arch=gfx906 --cuda-device-only -x hip -emit-llvm -S
-o - %s \
+// RUN: -fgpu-rdc -O3 -mllvm -amdgpu-early-inline-all=true -mllvm
-amdgpu-function-calls=false | \
+// RUN: FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// CHECK: %struct.B = type { i8 }
+struct B {
+
+ // CHECK: @_ZN1BC1Ei = hidden unnamed_addr alias void (%struct.B*, i32),
void (%struct.B*, i32)* @_ZN1BC2Ei
+ __device__ B(int x);
+};
+
+__device__ B::B(int x) {
+}
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5084,9 +5084,9 @@
}
// Enable -mconstructor-aliases except on darwin, where we have to work
around
- // a linker bug (see <rdar://problem/7651567>), and CUDA/AMDGPU device code,
- // where aliases aren't supported.
- if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX() && !RawTriple.isAMDGPU())
+ // a linker bug (see <rdar://problem/7651567>), and CUDA device code, where
+ // aliases aren't supported.
+ if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX())
CmdArgs.push_back("-mconstructor-aliases");
// Darwin's kernel doesn't support guard variables; just die if we
Index: llvm/test/CodeGen/AMDGPU/inline-calls.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/inline-calls.ll
+++ llvm/test/CodeGen/AMDGPU/inline-calls.ll
@@ -1,6 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
; ALL-NOT: {{^}}func:
define internal i32 @func(i32 %a) {
@@ -18,8 +16,8 @@
ret void
}
-; CHECK-NOT: func_alias
-; ALL-NOT: func_alias
+; CHECK: func_alias
+; ALL: func_alias
@func_alias = alias i32 (i32), i32 (i32)* @func
; ALL: {{^}}kernel3:
Index: llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -62,7 +62,7 @@
return nullptr;
}
- return cast<Function>(Op.getGlobal());
+ return dyn_cast<Function>(Op.getGlobal());
}
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
Index: llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -93,6 +93,8 @@
for (GlobalAlias &A : M.aliases()) {
if (Function* F = dyn_cast<Function>(A.getAliasee())) {
+ if (A.getLinkage() != GlobalValue::InternalLinkage)
+ continue;
A.replaceAllUsesWith(F);
AliasesToRemove.push_back(&A);
}
Index: clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu
@@ -0,0 +1,15 @@
+// RUN: %clang --offload-arch=gfx906 --cuda-device-only -x hip -emit-llvm -S -o - %s \
+// RUN: -fgpu-rdc -O3 -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false | \
+// RUN: FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// CHECK: %struct.B = type { i8 }
+struct B {
+
+ // CHECK: @_ZN1BC1Ei = hidden unnamed_addr alias void (%struct.B*, i32), void (%struct.B*, i32)* @_ZN1BC2Ei
+ __device__ B(int x);
+};
+
+__device__ B::B(int x) {
+}
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5084,9 +5084,9 @@
}
// Enable -mconstructor-aliases except on darwin, where we have to work around
- // a linker bug (see <rdar://problem/7651567>), and CUDA/AMDGPU device code,
- // where aliases aren't supported.
- if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX() && !RawTriple.isAMDGPU())
+ // a linker bug (see <rdar://problem/7651567>), and CUDA device code, where
+ // aliases aren't supported.
+ if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX())
CmdArgs.push_back("-mconstructor-aliases");
// Darwin's kernel doesn't support guard variables; just die if we
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits