date:20250211

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Amit Kumar Pandey via cfe-commits


https://github.com/ampandey-1995 updated 
https://github.com/llvm/llvm-project/pull/126671

>From 8367c38f7f04273e3ab2451351b6db8d3f7dbc0c Mon Sep 17 00:00:00 2001
From: Amit Pandey 
Date: Tue, 11 Feb 2025 08:06:21 +0530
Subject: [PATCH 1/2] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking
 for OpenMP." (#126628)

  - This reverts commit 0c6c4a99936d4d39015c8d2332483f8db78f69cf.
  - Add '-mcode-object-version=5' as to explicitly use code object
version 5 to match with 'FAIL' diagnostic.
  - Add Requires directive to support lit test run on platforms
registered with x86_64 and amdgpu.
---
 clang/lib/Driver/ToolChains/AMDGPU.cpp| 15 +++--
 clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp  |  2 +-
 .../Driver/amdgpu-openmp-sanitize-options.c   | 57 +--
 clang/test/Driver/hip-sanitize-options.hip|  2 +-
 4 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index e66e5a32e58acdc..202198e96c01278 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -1014,7 +1014,12 @@ RocmInstallationDetector::getCommonBitcodeLibs(
 bool isOpenMP = false) const {
   llvm::SmallVector BCLibs;
 
-  auto GPUSanEnabled = [GPUSan]() { return std::get(GPUSan); };
+  // GPU Sanitizer currently only supports ASan and is enabled through host
+  // ASan.
+  auto GPUSanEnabled = [GPUSan]() {
+return std::get(GPUSan) &&
+   std::get(GPUSan).needsAsanRt();
+  };
   auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
   bool Internalize = true) {
 BCLib.ShouldInternalize = Internalize;
@@ -1022,9 +1027,7 @@ RocmInstallationDetector::getCommonBitcodeLibs(
   };
   auto AddSanBCLibs = [&]() {
 if (GPUSanEnabled()) {
-  auto SanArgs = std::get(GPUSan);
-  if (SanArgs.needsAsanRt())
-AddBCLib(getAsanRTLPath(), false);
+  AddBCLib(getAsanRTLPath(), false);
 }
   };
 
@@ -1066,7 +1069,7 @@ ROCMToolChain::getCommonDeviceLibNames(const 
llvm::opt::ArgList &DriverArgs,
   // them all?
   std::tuple GPUSan(
   DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
- options::OPT_fno_gpu_sanitize, false),
+ options::OPT_fno_gpu_sanitize, true),
   getSanitizerArgs(DriverArgs));
   bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
 options::OPT_fno_gpu_flush_denormals_to_zero,
@@ -1099,7 +1102,7 @@ bool AMDGPUToolChain::shouldSkipSanitizeOption(
 return false;
 
   if (!DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
-  options::OPT_fno_gpu_sanitize, false))
+  options::OPT_fno_gpu_sanitize, true))
 return true;
 
   auto &Diags = TC.getDriver().getDiags();
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp 
b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index 00bf9c7338edd11..aba79f5fa6fa7b3 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -68,7 +68,7 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL)
+  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
 DAL = new DerivedArgList(Args.getBaseArgs());
 
   const OptTable &Opts = getDriver().getOpts();
diff --git a/clang/test/Driver/amdgpu-openmp-sanitize-options.c 
b/clang/test/Driver/amdgpu-openmp-sanitize-options.c
index c28a758bfc0c5e8..f6a8a7dc57cccd9 100644
--- a/clang/test/Driver/amdgpu-openmp-sanitize-options.c
+++ b/clang/test/Driver/amdgpu-openmp-sanitize-options.c
@@ -1,11 +1,11 @@
 // REQUIRES: x86-registered-target, amdgpu-registered-target
 
 // Fail on invalid ROCm Path.
-// RUN:   not %clang -no-canonical-prefixes -### 
--target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=gfx908:xnack+ 
-fsanitize=address -fgpu-sanitize -nogpuinc --rocm-path=%S/Inputs/rocm-invalid  
%s 2>&1 \
+// RUN:   not %clang -no-canonical-prefixes -### -mcode-object-version=5 
--target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=gfx908:xnack+ 
-fsanitize=address -fgpu-sanitize -nogpuinc --rocm-path=%S/Inputs/rocm-invalid  
%s 2>&1 \
 // RUN:   | FileCheck --check-prefix=FAIL %s
 
 // Enable multiple sanitizer's apart from ASan with invalid rocm-path.
-// RUN:   not %clang -no-canonical-prefixes -### 
--target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=gfx908:xnack+ 
-fsanitize=address -fsanitize=leak -fgpu-sanitize 
--rocm-path=%S/Inputs/rocm-invalid -nogpuinc  %s 2>&1 \
+// RUN:   not %clang -no-canonical-prefixes -### -mcode-object-version=5 
--target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=gfx908:xnack+ 
-fsanitize=address -fsanitize=leak -fgpu-sanitize 
--rocm-path=%S/Inputs/rocm-invalid -nogpuinc  %s 2>&1 \
 // R

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Amit Kumar Pandey via cfe-commits


https://github.com/ampandey-1995 updated 
https://github.com/llvm/llvm-project/pull/126671

>From 8367c38f7f04273e3ab2451351b6db8d3f7dbc0c Mon Sep 17 00:00:00 2001
From: Amit Pandey 
Date: Tue, 11 Feb 2025 08:06:21 +0530
Subject: [PATCH 1/2] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking
 for OpenMP." (#126628)

  - This reverts commit 0c6c4a99936d4d39015c8d2332483f8db78f69cf.
  - Add '-mcode-object-version=5' as to explicitly use code object
version 5 to match with 'FAIL' diagnostic.
  - Add Requires directive to support lit test run on platforms
registered with x86_64 and amdgpu.
---
 clang/lib/Driver/ToolChains/AMDGPU.cpp| 15 +++--
 clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp  |  2 +-
 .../Driver/amdgpu-openmp-sanitize-options.c   | 57 +--
 clang/test/Driver/hip-sanitize-options.hip|  2 +-
 4 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index e66e5a32e58acd..202198e96c0127 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -1014,7 +1014,12 @@ RocmInstallationDetector::getCommonBitcodeLibs(
 bool isOpenMP = false) const {
   llvm::SmallVector BCLibs;
 
-  auto GPUSanEnabled = [GPUSan]() { return std::get(GPUSan); };
+  // GPU Sanitizer currently only supports ASan and is enabled through host
+  // ASan.
+  auto GPUSanEnabled = [GPUSan]() {
+return std::get(GPUSan) &&
+   std::get(GPUSan).needsAsanRt();
+  };
   auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
   bool Internalize = true) {
 BCLib.ShouldInternalize = Internalize;
@@ -1022,9 +1027,7 @@ RocmInstallationDetector::getCommonBitcodeLibs(
   };
   auto AddSanBCLibs = [&]() {
 if (GPUSanEnabled()) {
-  auto SanArgs = std::get(GPUSan);
-  if (SanArgs.needsAsanRt())
-AddBCLib(getAsanRTLPath(), false);
+  AddBCLib(getAsanRTLPath(), false);
 }
   };
 
@@ -1066,7 +1069,7 @@ ROCMToolChain::getCommonDeviceLibNames(const 
llvm::opt::ArgList &DriverArgs,
   // them all?
   std::tuple GPUSan(
   DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
- options::OPT_fno_gpu_sanitize, false),
+ options::OPT_fno_gpu_sanitize, true),
   getSanitizerArgs(DriverArgs));
   bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
 options::OPT_fno_gpu_flush_denormals_to_zero,
@@ -1099,7 +1102,7 @@ bool AMDGPUToolChain::shouldSkipSanitizeOption(
 return false;
 
   if (!DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
-  options::OPT_fno_gpu_sanitize, false))
+  options::OPT_fno_gpu_sanitize, true))
 return true;
 
   auto &Diags = TC.getDriver().getDiags();
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp 
b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index 00bf9c7338edd1..aba79f5fa6fa7b 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -68,7 +68,7 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL)
+  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
 DAL = new DerivedArgList(Args.getBaseArgs());
 
   const OptTable &Opts = getDriver().getOpts();
diff --git a/clang/test/Driver/amdgpu-openmp-sanitize-options.c 
b/clang/test/Driver/amdgpu-openmp-sanitize-options.c
index c28a758bfc0c5e..f6a8a7dc57cccd 100644
--- a/clang/test/Driver/amdgpu-openmp-sanitize-options.c
+++ b/clang/test/Driver/amdgpu-openmp-sanitize-options.c
@@ -1,11 +1,11 @@
 // REQUIRES: x86-registered-target, amdgpu-registered-target
 
 // Fail on invalid ROCm Path.
-// RUN:   not %clang -no-canonical-prefixes -### 
--target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=gfx908:xnack+ 
-fsanitize=address -fgpu-sanitize -nogpuinc --rocm-path=%S/Inputs/rocm-invalid  
%s 2>&1 \
+// RUN:   not %clang -no-canonical-prefixes -### -mcode-object-version=5 
--target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=gfx908:xnack+ 
-fsanitize=address -fgpu-sanitize -nogpuinc --rocm-path=%S/Inputs/rocm-invalid  
%s 2>&1 \
 // RUN:   | FileCheck --check-prefix=FAIL %s
 
 // Enable multiple sanitizer's apart from ASan with invalid rocm-path.
-// RUN:   not %clang -no-canonical-prefixes -### 
--target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=gfx908:xnack+ 
-fsanitize=address -fsanitize=leak -fgpu-sanitize 
--rocm-path=%S/Inputs/rocm-invalid -nogpuinc  %s 2>&1 \
+// RUN:   not %clang -no-canonical-prefixes -### -mcode-object-version=5 
--target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=gfx908:xnack+ 
-fsanitize=address -fsanitize=leak -fgpu-sanitize 
--rocm-path=%S/Inputs/rocm-invalid -nogpuinc  %s 2>&1 \
 // RUN:

[clang] [Offload] Treat an empty packager archicture as 'generic' (PR #126655)

2025-02-11 Thread Joseph Huber via cfe-commits


https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/126655

>From 050abedb87283e8ee31a95366866fa5c22d1719e Mon Sep 17 00:00:00 2001
From: Joseph Huber 
Date: Mon, 10 Feb 2025 20:30:41 -0600
Subject: [PATCH] [Offload] Treat an empty packager archicture as 'generic'

Summary:
The `clang-offload-packager` records the architecture of the job.
Currently there are cases where this will be empty. SYCL, CPU, and when
the user manually overrides it to be empty. In these cases we should
alwas consider it 'generic'. Adding this string both makes it clear how
it behaves and triggers the special handling for this architecture,
allowing it to bind to different architectures.
---
 clang/lib/Driver/ToolChains/Clang.cpp  |  2 +-
 clang/test/Driver/sycl-offload-jit.cpp |  2 +-
 .../clang-linker-wrapper/ClangLinkerWrapper.cpp| 14 --
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index ea376ac00d9108..5deafa2ad0f4a6 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -9163,7 +9163,7 @@ void OffloadPackager::ConstructJob(Compilation &C, const 
JobAction &JA,
 SmallVector Parts{
 "file=" + File.str(),
 "triple=" + TC->getTripleString(),
-"arch=" + Arch.str(),
+"arch=" + (Arch.empty() ? "generic" : Arch.str()),
 "kind=" + Kind.str(),
 };
 
diff --git a/clang/test/Driver/sycl-offload-jit.cpp 
b/clang/test/Driver/sycl-offload-jit.cpp
index eb192e08a3bc0c..e040f4ded18e92 100644
--- a/clang/test/Driver/sycl-offload-jit.cpp
+++ b/clang/test/Driver/sycl-offload-jit.cpp
@@ -27,7 +27,7 @@
 // CHK-DEVICE-TRIPLE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHK-DEVICE-TRIPLE-SAME: "-fsycl-is-device"
 // CHK-DEVICE-TRIPLE-SAME: "-O2"
-// CHK-DEVICE-TRIPLE: clang-offload-packager{{.*}} 
"--image=file={{.*}}.bc,triple=spirv64-unknown-unknown,arch=,kind=sycl"
+// CHK-DEVICE-TRIPLE: clang-offload-packager{{.*}} 
"--image=file={{.*}}.bc,triple=spirv64-unknown-unknown,arch=generic,kind=sycl"
 
 /// Check -fsycl-is-device is passed when compiling for the device.
 /// Check -fsycl-is-host is passed when compiling for host.
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp 
b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index b189cfee674dd3..aa43b2f5f2a1b3 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -474,8 +474,6 @@ Expected clang(ArrayRef InputFiles, 
const ArgList &Args) {
 
   const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
   StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
-  if (Arch.empty())
-Arch = "native";
   // Create a new file to write the linked device image to. Assume that the
   // input filename already has the device and architecture.
   auto TempFileOrErr =
@@ -492,11 +490,14 @@ Expected clang(ArrayRef InputFiles, 
const ArgList &Args) {
   "-o",
   *TempFileOrErr,
   Args.MakeArgString("--target=" + Triple.getTriple()),
-  Triple.isAMDGPU() ? Args.MakeArgString("-mcpu=" + Arch)
-: Args.MakeArgString("-march=" + Arch),
-  Args.MakeArgString("-" + OptLevel),
   };
 
+  if (!Arch.empty())
+Triple.isAMDGPU() ? CmdArgs.push_back(Args.MakeArgString("-mcpu=" + Arch))
+  : CmdArgs.push_back(Args.MakeArgString("-march=" + 
Arch));
+
+  CmdArgs.push_back(Args.MakeArgString("-" + OptLevel));
+
   // Forward all of the `--offload-opt` and similar options to the device.
   CmdArgs.push_back("-flto");
   for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm))
@@ -826,8 +827,9 @@ DerivedArgList getLinkerArgs(ArrayRef Input,
 
   // Set the subarchitecture and target triple for this compilation.
   const OptTable &Tbl = getOptTable();
+  StringRef Arch = Args.MakeArgString(Input.front().getBinary()->getArch());
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
-   Args.MakeArgString(Input.front().getBinary()->getArch()));
+   Arch == "generic" ? "" : Arch);
   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
Args.MakeArgString(Input.front().getBinary()->getTriple()));
 

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Force AttributedStmtClass to not be scope parents (PR #125370)

2025-02-11 Thread Aaron Ballman via cfe-commits


https://github.com/AaronBallman approved this pull request.

LGTM, thank you for the fix!

https://github.com/llvm/llvm-project/pull/125370
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Force AttributedStmtClass to not be scope parents (PR #125370)

2025-02-11 Thread via cfe-commits


github-actions[bot] wrote:



@YutongZhuu Congratulations on having your first Pull Request (PR) merged into 
the LLVM Project!

Your changes will be combined with recent changes from other authors, then 
tested by our [build bots](https://lab.llvm.org/buildbot/). If there is a 
problem with a build, you may receive a report in an email or a comment on this 
PR.

Please check whether problems have been caused by your change specifically, as 
the builds can include changes from many authors. It is not uncommon for your 
change to be included in a build that fails due to someone else's changes, or 
infrastructure issues.

How to do this, and the rest of the post-merge process, is covered in detail 
[here](https://llvm.org/docs/MyFirstTypoFix.html#myfirsttypofix-issues-after-landing-your-pr).

If your change does cause a problem, it may be reverted, or you can revert it 
yourself. This is a normal part of [LLVM 
development](https://llvm.org/docs/DeveloperPolicy.html#patch-reversion-policy).
 You can fix your changes and open a new PR to merge them again.

If you don't get any reports, no action is required from you. Your changes are 
working as expected, well done!


https://github.com/llvm/llvm-project/pull/125370
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 8d902f2 - [clang] Force AttributedStmtClass to not be scope parents (#125370)

2025-02-11 Thread via cfe-commits


Author: Yutong Zhu
Date: 2025-02-11T08:53:19-05:00
New Revision: 8d902f2cb0bc8825bcde911897e99aadbd5d28e9

URL: 
https://github.com/llvm/llvm-project/commit/8d902f2cb0bc8825bcde911897e99aadbd5d28e9
DIFF: 
https://github.com/llvm/llvm-project/commit/8d902f2cb0bc8825bcde911897e99aadbd5d28e9.diff

LOG: [clang] Force AttributedStmtClass to not be scope parents  (#125370)

Example from the issue:
```c++
void Func(int x) {
switch (x) {
[[likely]] case 0:
case 1:
int i = 3;
case 2:
break;
}
}
```

Clang checks if ``case 2`` can be reachable by checking its scope. The
variable declaration should create a scope containing ``case 2``, but
due to the structure of the AST, ``case 2`` gets the scope of the
``likely`` statement, but not ``int i = 3;``. Therefore, I changed this
code to force attribute statement not to be scope parents.

Fixes #84072

Added: 
clang/test/CXX/stmt.stmt/stmt.select/stmt.switch/p4.cpp

Modified: 
clang/docs/ReleaseNotes.rst
clang/lib/Sema/JumpDiagnostics.cpp

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 71cf8d48d2d57..369d9e9de7d16 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -129,6 +129,8 @@ Improvements to Clang's diagnostics
   which are supposed to only exist once per program, but may get duplicated 
when
   built into a shared library.
 - Fixed a bug where Clang's Analysis did not correctly model the destructor 
behavior of ``union`` members (#GH119415).
+- A statement attribute applied to a ``case`` label no longer suppresses
+  'bypassing variable initialization' diagnostics (#84072).
 
 Improvements to Clang's time-trace
 --

diff  --git a/clang/lib/Sema/JumpDiagnostics.cpp 
b/clang/lib/Sema/JumpDiagnostics.cpp
index ffbb9bc0bfe7c..edcfffa2b3894 100644
--- a/clang/lib/Sema/JumpDiagnostics.cpp
+++ b/clang/lib/Sema/JumpDiagnostics.cpp
@@ -597,15 +597,6 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
 LabelAndGotoScopes[S] = ParentScope;
 break;
 
-  case Stmt::AttributedStmtClass: {
-AttributedStmt *AS = cast(S);
-if (GetMustTailAttr(AS)) {
-  LabelAndGotoScopes[AS] = ParentScope;
-  MustTailStmts.push_back(AS);
-}
-break;
-  }
-
   case Stmt::OpenACCComputeConstructClass: {
 unsigned NewParentScope = Scopes.size();
 OpenACCComputeConstruct *CC = cast(S);
@@ -649,7 +640,7 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
   continue;
 }
 
-// Cases, labels, and defaults aren't "scope parents".  It's also
+// Cases, labels, attributes, and defaults aren't "scope parents".  It's 
also
 // important to handle these iteratively instead of recursively in
 // order to avoid blowing out the stack.
 while (true) {
@@ -658,7 +649,13 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
 Next = SC->getSubStmt();
   else if (LabelStmt *LS = dyn_cast(SubStmt))
 Next = LS->getSubStmt();
-  else
+  else if (AttributedStmt *AS = dyn_cast(SubStmt)) {
+if (GetMustTailAttr(AS)) {
+  LabelAndGotoScopes[AS] = ParentScope;
+  MustTailStmts.push_back(AS);
+}
+Next = AS->getSubStmt();
+  } else
 break;
 
   LabelAndGotoScopes[SubStmt] = ParentScope;

diff  --git a/clang/test/CXX/stmt.stmt/stmt.select/stmt.switch/p4.cpp 
b/clang/test/CXX/stmt.stmt/stmt.select/stmt.switch/p4.cpp
new file mode 100644
index 0..e816da1803694
--- /dev/null
+++ b/clang/test/CXX/stmt.stmt/stmt.select/stmt.switch/p4.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang -fsyntax-only -std=c++20 -Xclang -verify %s
+
+void Func(int x) {
+switch (x) {
+[[likely]] case 0:
+case 1: 
+int i = 3; // expected-note {{jump bypasses variable 
initialization}}
+case 2: // expected-error {{cannot jump from switch statement to this 
case label}}
+break;
+}
+}



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Force AttributedStmtClass to not be scope parents (PR #125370)

2025-02-11 Thread Aaron Ballman via cfe-commits


https://github.com/AaronBallman closed 
https://github.com/llvm/llvm-project/pull/125370
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Force AttributedStmtClass to not be scope parents (PR #125370)

2025-02-11 Thread LLVM Continuous Integration via cfe-commits


llvm-ci wrote:

LLVM Buildbot has detected a new failure on builder 
`openmp-offload-amdgpu-runtime` running on `omp-vega20-0` while building 
`clang` at step 7 "Add check check-offload".

Full details are available at: 
https://lab.llvm.org/buildbot/#/builders/30/builds/15573


Here is the relevant piece of the build log for the reference

```
Step 7 (Add check check-offload) failure: test (failure)
 TEST 'libomptarget :: amdgcn-amd-amdhsa :: 
api/omp_host_call.c' FAILED 
Exit Code: 2

Command Output (stdout):
--
# RUN: at line 1
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/clang 
-fopenmp-I 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test -I 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src
 -L 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload
 -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib -L 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src
  -nogpulib 
-Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload
 
-Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src
 -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib 
 -fopenmp-targets=amdgcn-amd-amdhsa 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/api/omp_host_call.c
 -o 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/api/Output/omp_host_call.c.tmp
 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a
 && 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/api/Output/omp_host_call.c.tmp
 | 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/api/omp_host_call.c
# executed command: 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/clang 
-fopenmp -I 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test -I 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src
 -L 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload
 -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib -L 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src
 -nogpulib 
-Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload
 
-Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src
 -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib 
-fopenmp-targets=amdgcn-amd-amdhsa 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/api/omp_host_call.c
 -o 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/api/Output/omp_host_call.c.tmp
 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a
# note: command had no output on stdout or stderr
# executed command: 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/api/Output/omp_host_call.c.tmp
# note: command had no output on stdout or stderr
# error: command failed with exit status: -11
# executed command: 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/api/omp_host_call.c
# .---command stderr
# | FileCheck error: '' is empty.
# | FileCheck command line:  
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck 
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/api/omp_host_call.c
# `-
# error: command failed with exit status: 2

--




```



https://github.com/llvm/llvm-project/pull/125370
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang] allow restrict qualifier for array types with pointer types as element types (PR #120896)

2025-02-11 Thread Oleksandr T. via cfe-commits



@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -std=c23 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -std=c17 -fsyntax-only -pedantic -verify=pedantic %s
+// RUN: %clang_cc1 -std=c17 -fsyntax-only -Wpre-c2x-compat 
-verify=pre-c2x-compat %s
+
+typedef int (*T1)[2];
+restrict T1 t1; // pedantic-warning {{'restrict' qualifier on pointers 
to arrays is a C23 extension}} \

a-tarasyuk wrote:

@efriedma-quic @AaronBallman were you referring to the following 
warning/extension diagnostics? Should these kinds of diagnostics be considered 
in cases where errors occur?




https://github.com/llvm/llvm-project/pull/120896
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Update TypeSanitizer.rst (PR #126721)

2025-02-11 Thread via cfe-commits


github-actions[bot] wrote:



Thank you for submitting a Pull Request (PR) to the LLVM Project!

This PR will be automatically labeled and the relevant teams will be notified.

If you wish to, you can add reviewers by using the "Reviewers" section on this 
page.

If this is not working for you, it is probably because you do not have write 
permissions for the repository. In which case you can instead tag reviewers by 
name in a comment by using `@` followed by their GitHub username.

If you have received no comments on your PR for a week, you can request a 
review by "ping"ing the PR by adding a comment “Ping”. The common courtesy 
"ping" rate is once a week. Please remember that you are asking for valuable 
time from other developers.

If you have further questions, they may be answered by the [LLVM GitHub User 
Guide](https://llvm.org/docs/GitHub.html).

You can also ask questions in a comment on this PR, on the [LLVM 
Discord](https://discord.com/invite/xS7Z362) or on the 
[forums](https://discourse.llvm.org/).

https://github.com/llvm/llvm-project/pull/126721
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Amit Kumar Pandey via cfe-commits


https://github.com/ampandey-1995 updated 
https://github.com/llvm/llvm-project/pull/126671

>From 8367c38f7f04273e3ab2451351b6db8d3f7dbc0c Mon Sep 17 00:00:00 2001
From: Amit Pandey 
Date: Tue, 11 Feb 2025 08:06:21 +0530
Subject: [PATCH] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for
 OpenMP." (#126628)

  - This reverts commit 0c6c4a99936d4d39015c8d2332483f8db78f69cf.
  - Add '-mcode-object-version=5' as to explicitly use code object
version 5 to match with 'FAIL' diagnostic.
  - Add Requires directive to support lit test run on platforms
registered with x86_64 and amdgpu.
---
 clang/lib/Driver/ToolChains/AMDGPU.cpp| 15 +++--
 clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp  |  2 +-
 .../Driver/amdgpu-openmp-sanitize-options.c   | 57 +--
 clang/test/Driver/hip-sanitize-options.hip|  2 +-
 4 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index e66e5a32e58acdc..202198e96c01278 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -1014,7 +1014,12 @@ RocmInstallationDetector::getCommonBitcodeLibs(
 bool isOpenMP = false) const {
   llvm::SmallVector BCLibs;
 
-  auto GPUSanEnabled = [GPUSan]() { return std::get(GPUSan); };
+  // GPU Sanitizer currently only supports ASan and is enabled through host
+  // ASan.
+  auto GPUSanEnabled = [GPUSan]() {
+return std::get(GPUSan) &&
+   std::get(GPUSan).needsAsanRt();
+  };
   auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
   bool Internalize = true) {
 BCLib.ShouldInternalize = Internalize;
@@ -1022,9 +1027,7 @@ RocmInstallationDetector::getCommonBitcodeLibs(
   };
   auto AddSanBCLibs = [&]() {
 if (GPUSanEnabled()) {
-  auto SanArgs = std::get(GPUSan);
-  if (SanArgs.needsAsanRt())
-AddBCLib(getAsanRTLPath(), false);
+  AddBCLib(getAsanRTLPath(), false);
 }
   };
 
@@ -1066,7 +1069,7 @@ ROCMToolChain::getCommonDeviceLibNames(const 
llvm::opt::ArgList &DriverArgs,
   // them all?
   std::tuple GPUSan(
   DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
- options::OPT_fno_gpu_sanitize, false),
+ options::OPT_fno_gpu_sanitize, true),
   getSanitizerArgs(DriverArgs));
   bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
 options::OPT_fno_gpu_flush_denormals_to_zero,
@@ -1099,7 +1102,7 @@ bool AMDGPUToolChain::shouldSkipSanitizeOption(
 return false;
 
   if (!DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
-  options::OPT_fno_gpu_sanitize, false))
+  options::OPT_fno_gpu_sanitize, true))
 return true;
 
   auto &Diags = TC.getDriver().getDiags();
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp 
b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index 00bf9c7338edd11..aba79f5fa6fa7b3 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -68,7 +68,7 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL)
+  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
 DAL = new DerivedArgList(Args.getBaseArgs());
 
   const OptTable &Opts = getDriver().getOpts();
diff --git a/clang/test/Driver/amdgpu-openmp-sanitize-options.c 
b/clang/test/Driver/amdgpu-openmp-sanitize-options.c
index c28a758bfc0c5e8..f6a8a7dc57cccd9 100644
--- a/clang/test/Driver/amdgpu-openmp-sanitize-options.c
+++ b/clang/test/Driver/amdgpu-openmp-sanitize-options.c
@@ -1,11 +1,11 @@
 // REQUIRES: x86-registered-target, amdgpu-registered-target
 
 // Fail on invalid ROCm Path.
-// RUN:   not %clang -no-canonical-prefixes -### 
--target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=gfx908:xnack+ 
-fsanitize=address -fgpu-sanitize -nogpuinc --rocm-path=%S/Inputs/rocm-invalid  
%s 2>&1 \
+// RUN:   not %clang -no-canonical-prefixes -### -mcode-object-version=5 
--target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=gfx908:xnack+ 
-fsanitize=address -fgpu-sanitize -nogpuinc --rocm-path=%S/Inputs/rocm-invalid  
%s 2>&1 \
 // RUN:   | FileCheck --check-prefix=FAIL %s
 
 // Enable multiple sanitizer's apart from ASan with invalid rocm-path.
-// RUN:   not %clang -no-canonical-prefixes -### 
--target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=gfx908:xnack+ 
-fsanitize=address -fsanitize=leak -fgpu-sanitize 
--rocm-path=%S/Inputs/rocm-invalid -nogpuinc  %s 2>&1 \
+// RUN:   not %clang -no-canonical-prefixes -### -mcode-object-version=5 
--target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=gfx908:xnack+ 
-fsanitize=address -fsanitize=leak -fgpu-sanitize 
--rocm-path=%S/Inputs/rocm-invalid -nogpuinc  %s 2>&1 \
 // RUN:

[clang] Update TypeSanitizer.rst (PR #126721)

2025-02-11 Thread via cfe-commits


llvmbot wrote:




@llvm/pr-subscribers-clang

Author: None (sitrin)


Changes

Fixes issue #126719. The word `table` is now in place of the word 
`tale`.

---
Full diff: https://github.com/llvm/llvm-project/pull/126721.diff


1 Files Affected:

- (modified) clang/docs/TypeSanitizer.rst (+1-1) 


``diff
diff --git a/clang/docs/TypeSanitizer.rst b/clang/docs/TypeSanitizer.rst
index 4d1dfc23a6c51e..3c683a6c24bb49 100644
--- a/clang/docs/TypeSanitizer.rst
+++ b/clang/docs/TypeSanitizer.rst
@@ -27,7 +27,7 @@ reduce these impacts.
 The TypeSanitizer Algorithm
 ===
 For each TBAA type-access descriptor, encoded in LLVM IR using TBAA Metadata, 
the instrumentation 
-pass generates descriptor tales. Thus there is a unique pointer to each type 
(and access descriptor).
+pass generates descriptor tables. Thus there is a unique pointer to each type 
(and access descriptor).
 These tables are comdat (except for anonymous-namespace types), so the pointer 
values are unique 
 across the program.
 

``




https://github.com/llvm/llvm-project/pull/126721
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Matt Arsenault via cfe-commits



@@ -1014,17 +1014,20 @@ RocmInstallationDetector::getCommonBitcodeLibs(
 bool isOpenMP = false) const {
   llvm::SmallVector BCLibs;
 
-  auto GPUSanEnabled = [GPUSan]() { return std::get(GPUSan); };
+  // GPU Sanitizer currently only supports ASan and is enabled through host
+  // ASan.
+  auto GPUSanEnabled = [GPUSan]() {
+return std::get(GPUSan) &&
+   std::get(GPUSan).needsAsanRt();
+  };

arsenm wrote:

Just remove the lambda. It's over complicating a hypothetical future 

https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Thread Safety Analysis: Support warning on taking address of guarded variables (PR #123063)

2025-02-11 Thread Aaron Puchert via cfe-commits


https://github.com/aaronpuchert commented:

I've taken a brief look at the [Linux kernel 
changes](https://github.com/google/kernel-sanitizers/compare/2014c95afecee3e76ca4a56956a936e23283f05b...cap-analysis).
 I didn't check how many variables are affected by `__rcu_guarded`, but 
otherwise there seem to be maybe two dozen guarded variables. If you really 
didn't encounter any false positives that's not bad.

However, I'd still prefer if we could mark this as experimental, because it's a 
bit coarse:
* The address-of operation isn't interesting by itself, it merely happens to be 
required to do pass-by-reference in C.
* The equivalent of passing a `pt_guarded_by` variable by value doesn't seem to 
warn.
* The actual access itself is not checked, whether explicit or assumed behind a 
function call.

In the long term, a better approach might be:
* Extend alias tracking to pointers, perhaps restricted to those that don't 
change value.
* Warn when passing pointers to guarded variables into other functions.

https://github.com/llvm/llvm-project/pull/123063
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Thread Safety Analysis: Support warning on taking address of guarded variables (PR #123063)

2025-02-11 Thread Aaron Puchert via cfe-commits


https://github.com/aaronpuchert edited 
https://github.com/llvm/llvm-project/pull/123063
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Thread Safety Analysis: Support warning on taking address of guarded variables (PR #123063)

2025-02-11 Thread Aaron Puchert via cfe-commits



@@ -515,8 +515,18 @@ Warning flags
   + ``-Wthread-safety-analysis``: The core analysis.
   + ``-Wthread-safety-precise``: Requires that mutex expressions match 
precisely.
This warning can be disabled for code which has a lot of aliases.
-  + ``-Wthread-safety-reference``: Checks when guarded members are passed by 
reference.
-
+  + ``-Wthread-safety-reference``: Checks when guarded variables are passed by 
reference.
+
+* ``-Wthread-safety-addressof``: Warn when the address of guarded variables is
+  taken (``&var``). Since taking the address of a variable does *not
+  necessarily imply a read or write*, the warning is off by default to avoid
+  false positives. In codebases that prefer passing pointers rather than
+  references (for C++ codebases), or passing pointers is ubiquitous (for C
+  codebases), enabling this warning will result in fewer false negatives; for
+  example, where the manipulation of common data structures is done via
+  functions that take pointers to instances of the data structure. Note,
+  however, that the analysis does not track pointers, and false positives *and*
+  negatives are still possible.

aaronpuchert wrote:

I'm not sure if we should still mark this as experimental in case this doesn't 
turn out so well.

https://github.com/llvm/llvm-project/pull/123063
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Thread Safety Analysis: Support warning on taking address of guarded variables (PR #123063)

2025-02-11 Thread Aaron Puchert via cfe-commits



@@ -1,7 +1,9 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 -Wthread-safety 
-Wthread-safety-beta -Wno-thread-safety-negative -fcxx-exceptions 
-DUSE_CAPABILITY=0 %s
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 -Wthread-safety 
-Wthread-safety-beta -Wno-thread-safety-negative -fcxx-exceptions 
-DUSE_CAPABILITY=1 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 -Wthread-safety 
-Wthread-safety-beta -Wno-thread-safety-negative -Wthread-safety-addressof 
-fcxx-exceptions -DUSE_CAPABILITY=1 -DCHECK_ADDRESSOF %s

aaronpuchert wrote:

Some goes here: the test is really big and I don't want a combinatorial 
explosion. We're also having `-beta` on by default, so there is no reason to 
have `-addressof` separately. (We have `-negative` separately because it's a 
different kind of analysis.)

https://github.com/llvm/llvm-project/pull/123063
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Thread Safety Analysis: Support warning on taking address of guarded variables (PR #123063)

2025-02-11 Thread Aaron Puchert via cfe-commits



@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -Wthread-safety -Wthread-safety-beta 
%s
 // RUN: %clang_cc1 -fsyntax-only -verify -Wthread-safety -Wthread-safety-beta 
-fexperimental-late-parse-attributes -DLATE_PARSING %s
+// RUN: %clang_cc1 -fsyntax-only -verify -Wthread-safety -Wthread-safety-beta 
-Wthread-safety-addressof -fexperimental-late-parse-attributes -DLATE_PARSING 
-DCHECK_ADDRESSOF %s

aaronpuchert wrote:

I don't like so many run lines. Just add it to the default run line. There is 
no need to check in this test that Clang properly filters diagnostics, because 
it's not the Thread Safety Analysis, but the diagnostics engine that decides 
which warnings to print.

https://github.com/llvm/llvm-project/pull/123063
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Thread Safety Analysis: Support warning on taking address of guarded variables (PR #123063)

2025-02-11 Thread Aaron Puchert via cfe-commits



@@ -1983,11 +1983,21 @@ class ThreadSafetyReporter : public 
clang::threadSafety::ThreadSafetyHandler {
 
   void handleNoMutexHeld(const NamedDecl *D, ProtectedOperationKind POK,
  AccessKind AK, SourceLocation Loc) override {
-assert((POK == POK_VarAccess || POK == POK_VarDereference) &&
-   "Only works for variables");
-unsigned DiagID = POK == POK_VarAccess?
-diag::warn_variable_requires_any_lock:
-diag::warn_var_deref_requires_any_lock;
+unsigned DiagID = 0;
+switch (POK) {
+case POK_VarAccess:
+  DiagID = diag::warn_variable_requires_any_lock;
+  break;
+case POK_VarDereference:
+  DiagID = diag::warn_var_deref_requires_any_lock;
+  break;
+case POK_AddressOf:
+  DiagID = diag::warn_addressof_requires_any_lock;
+  break;
+default:
+  assert(false && "Only works for variables");
+  break;

aaronpuchert wrote:

Wouldn't `llvm_unreachable` be the standard idiom here?

https://github.com/llvm/llvm-project/pull/123063
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Thread Safety Analysis: Support warning on taking address of guarded variables (PR #123063)

2025-02-11 Thread Aaron Puchert via cfe-commits



@@ -133,7 +134,12 @@ int main(void) {
 
   Foo_func3(5);
 
+#ifdef CHECK_ADDRESSOF
+  set_value(&a_, 0); // expected-warning{{calling function 'set_value' 
requires holding mutex 'foo_.mu_' exclusively}} \
+expected-warning{{taking address of variable 'a_' 
requires holding mutex 'foo_.mu_'}}

aaronpuchert wrote:

Something like this should also work:
```suggestion
  set_value(&a_, 0); // expected-warning{{calling function 'set_value' requires 
holding mutex 'foo_.mu_' exclusively}}
 // expected-warning@-1{{taking address of variable 'a_' 
requires holding mutex 'foo_.mu_'}}
```

https://github.com/llvm/llvm-project/pull/123063
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CLANG]Update svget, svset, svcrete, svundef to have FP8 variants (PR #126754)

2025-02-11 Thread Virginia Cangelosi via cfe-commits


https://github.com/virginia-cangelosi created 
https://github.com/llvm/llvm-project/pull/126754

None

>From 725d5cae3ee38201eea257720b53cdae1d0ecea4 Mon Sep 17 00:00:00 2001
From: Virginia Cangelosi 
Date: Tue, 11 Feb 2025 14:42:22 +
Subject: [PATCH] [CLANG]Update svget, svset, svcrete and svundef intrinsics to
 have FP8 variants

---
 clang/include/clang/Basic/arm_sve.td  | 26 +--
 .../AArch64/sve-intrinsics/acle_sve_create2.c | 17 
 .../AArch64/sve-intrinsics/acle_sve_create3.c | 19 ++
 .../AArch64/sve-intrinsics/acle_sve_create4.c | 21 +++
 .../AArch64/sve-intrinsics/acle_sve_get2.c| 19 ++
 .../AArch64/sve-intrinsics/acle_sve_get3.c| 21 +++
 .../AArch64/sve-intrinsics/acle_sve_get4.c| 23 
 .../AArch64/sve-intrinsics/acle_sve_set2.c| 19 ++
 .../AArch64/sve-intrinsics/acle_sve_set3.c| 21 +++
 .../AArch64/sve-intrinsics/acle_sve_set4.c| 23 
 .../AArch64/sve-intrinsics/acle_sve_undef.c   | 13 ++
 .../AArch64/sve-intrinsics/acle_sve_undef2.c  | 13 ++
 .../AArch64/sve-intrinsics/acle_sve_undef3.c  | 13 ++
 .../AArch64/sve-intrinsics/acle_sve_undef4.c  | 13 ++
 14 files changed, 248 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index b20383e72e66a..3afbba51bd138 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1303,14 +1303,14 @@ def SVZIP2Q_BF16  : SInst<"svzip2q[_{d}]", 
"ddd",  "b", MergeNone, "aarc
 
 

 // Vector creation
-def SVUNDEF_1 : SInst<"svundef_{d}",  "dv", "csilUcUsUiUlhfd", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
-def SVUNDEF_2 : SInst<"svundef2_{d}", "2v", "csilUcUsUiUlhfd", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
-def SVUNDEF_3 : SInst<"svundef3_{d}", "3v", "csilUcUsUiUlhfd", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
-def SVUNDEF_4 : SInst<"svundef4_{d}", "4v", "csilUcUsUiUlhfd", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
+def SVUNDEF_1 : SInst<"svundef_{d}",  "dv", "csilUcUsUiUlhfdm", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
+def SVUNDEF_2 : SInst<"svundef2_{d}", "2v", "csilUcUsUiUlhfdm", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
+def SVUNDEF_3 : SInst<"svundef3_{d}", "3v", "csilUcUsUiUlhfdm", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
+def SVUNDEF_4 : SInst<"svundef4_{d}", "4v", "csilUcUsUiUlhfdm", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
 
-def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd",   "csilUcUsUiUlhfd", 
MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
-def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd",  "csilUcUsUiUlhfd", 
MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
-def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4", "csilUcUsUiUlhfd", 
MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
+def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd",   "csilUcUsUiUlhfdm", 
MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
+def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd",  "csilUcUsUiUlhfdm", 
MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
+def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4", "csilUcUsUiUlhfdm", 
MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
 
 let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
 def SVUNDEF_1_BF16 : SInst<"svundef_{d}",  "dv", "b", MergeNone, "", [IsUndef, 
VerifyRuntimeMode]>;
@@ -1330,13 +1330,13 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" 
in {
 
 

 // Vector insertion and extraction
-def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfd", MergeNone, "", 
[IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
-def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfd", MergeNone, "", 
[IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_2>]>;
-def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfd", MergeNone, "", 
[IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
+def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfdm", MergeNone, "", 
[IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
+def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfdm", MergeNone, "", 
[IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_2>]>;
+def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfdm", MergeNone, "", 
[IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
 
-def SVSET_2 : SInst<"svset2[_{d}]", "22id", "csilUcUsUiUlhfd", MergeNone, "", 
[IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
-def SVSET_3 : SInst<"svset3[_{d}]", "33id", "csilUcUsUiUlhfd", MergeNone, "", 
[IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_2>]>;
-def SVSET_4 : SInst<"svset4[_{d}]", "44id", "csilUcUsUiUlhfd", MergeNone, "", 
[IsTupl

[clang] [CLANG]Update svget, svset, svcrete, svundef to have FP8 variants (PR #126754)

2025-02-11 Thread via cfe-commits


llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Virginia Cangelosi (virginia-cangelosi)


Changes



---

Patch is 25.73 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/126754.diff


14 Files Affected:

- (modified) clang/include/clang/Basic/arm_sve.td (+13-13) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create2.c (+17) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create3.c (+19) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create4.c (+21) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get2.c (+19) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get3.c (+21) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get4.c (+23) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set2.c (+19) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set3.c (+21) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set4.c (+23) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef.c (+13) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef2.c (+13) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef3.c (+13) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef4.c (+13) 


``diff
diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index b20383e72e66a37..3afbba51bd13815 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1303,14 +1303,14 @@ def SVZIP2Q_BF16  : SInst<"svzip2q[_{d}]", 
"ddd",  "b", MergeNone, "aarc
 
 

 // Vector creation
-def SVUNDEF_1 : SInst<"svundef_{d}",  "dv", "csilUcUsUiUlhfd", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
-def SVUNDEF_2 : SInst<"svundef2_{d}", "2v", "csilUcUsUiUlhfd", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
-def SVUNDEF_3 : SInst<"svundef3_{d}", "3v", "csilUcUsUiUlhfd", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
-def SVUNDEF_4 : SInst<"svundef4_{d}", "4v", "csilUcUsUiUlhfd", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
+def SVUNDEF_1 : SInst<"svundef_{d}",  "dv", "csilUcUsUiUlhfdm", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
+def SVUNDEF_2 : SInst<"svundef2_{d}", "2v", "csilUcUsUiUlhfdm", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
+def SVUNDEF_3 : SInst<"svundef3_{d}", "3v", "csilUcUsUiUlhfdm", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
+def SVUNDEF_4 : SInst<"svundef4_{d}", "4v", "csilUcUsUiUlhfdm", MergeNone, "", 
[IsUndef, VerifyRuntimeMode]>;
 
-def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd",   "csilUcUsUiUlhfd", 
MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
-def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd",  "csilUcUsUiUlhfd", 
MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
-def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4", "csilUcUsUiUlhfd", 
MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
+def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd",   "csilUcUsUiUlhfdm", 
MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
+def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd",  "csilUcUsUiUlhfdm", 
MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
+def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4", "csilUcUsUiUlhfdm", 
MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
 
 let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
 def SVUNDEF_1_BF16 : SInst<"svundef_{d}",  "dv", "b", MergeNone, "", [IsUndef, 
VerifyRuntimeMode]>;
@@ -1330,13 +1330,13 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" 
in {
 
 

 // Vector insertion and extraction
-def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfd", MergeNone, "", 
[IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
-def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfd", MergeNone, "", 
[IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_2>]>;
-def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfd", MergeNone, "", 
[IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
+def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfdm", MergeNone, "", 
[IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
+def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfdm", MergeNone, "", 
[IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_2>]>;
+def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfdm", MergeNone, "", 
[IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
 
-def SVSET_2 : SInst<"svset2[_{d}]", "22id", "csilUcUsUiUlhfd", MergeNone, "", 
[IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
-def SVSET_3 : SInst<"svset3[_{d}]", "33id", "csilUcUsUiUlhfd", MergeNone, "", 
[IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_2>]>;
-def SVSET_4 : SInst<"svset4[_{d}]", "44id", "csilU

[clang] [llvm] [NVPTX] Add intrinsics for redux.sync f32 instructions (PR #126664)

2025-02-11 Thread Srinivasa Ravi via cfe-commits


https://github.com/Wolfram70 updated 
https://github.com/llvm/llvm-project/pull/126664

>From 88e076bb9af3b1bc63d76feef1ba842d88fbd95f Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi 
Date: Mon, 10 Feb 2025 14:13:42 +0530
Subject: [PATCH] [NVPTX] Add intrinsics for redux.sync f32 instructions

Adds NVVM intrinsics and NVPTX codegen for redux.sync f32 instructions
introduced in ptx8.6 for sm_100a. Tests added in
CodeGen/NVPTX/redux-sync.ll and verified through ptxas 12.8.0.

PTX Spec Reference:
https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-redux-sync
---
 clang/include/clang/Basic/BuiltinsNVPTX.td   |   8 ++
 clang/test/CodeGenCUDA/redux-f32-builtins.cu |  34 +
 llvm/include/llvm/IR/IntrinsicsNVVM.td   |  12 ++
 llvm/lib/Target/NVPTX/NVPTXIntrinsics.td |  19 +++
 llvm/test/CodeGen/NVPTX/redux-sync-f32.ll| 139 +++
 5 files changed, 212 insertions(+)
 create mode 100644 clang/test/CodeGenCUDA/redux-f32-builtins.cu
 create mode 100644 llvm/test/CodeGen/NVPTX/redux-sync-f32.ll

diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td 
b/clang/include/clang/Basic/BuiltinsNVPTX.td
index 9d24a992563a450..327dc88cffdb4e6 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -669,6 +669,14 @@ def __nvvm_redux_sync_umax : 
NVPTXBuiltinSMAndPTX<"unsigned int(unsigned int, in
 def __nvvm_redux_sync_and : NVPTXBuiltinSMAndPTX<"int(int, int)", SM_80, 
PTX70>;
 def __nvvm_redux_sync_xor : NVPTXBuiltinSMAndPTX<"int(int, int)", SM_80, 
PTX70>;
 def __nvvm_redux_sync_or : NVPTXBuiltinSMAndPTX<"int(int, int)", SM_80, PTX70>;
+def __nvvm_redux_sync_fmin : NVPTXBuiltinSMAndPTX<"float(float, int)", 
SM_100a, PTX86>;
+def __nvvm_redux_sync_fmin_abs : NVPTXBuiltinSMAndPTX<"float(float, int)", 
SM_100a, PTX86>;
+def __nvvm_redux_sync_fmin_NaN : NVPTXBuiltinSMAndPTX<"float(float, int)", 
SM_100a, PTX86>;
+def __nvvm_redux_sync_fmin_abs_NaN : NVPTXBuiltinSMAndPTX<"float(float, int)", 
SM_100a, PTX86>;
+def __nvvm_redux_sync_fmax : NVPTXBuiltinSMAndPTX<"float(float, int)", 
SM_100a, PTX86>;
+def __nvvm_redux_sync_fmax_abs : NVPTXBuiltinSMAndPTX<"float(float, int)", 
SM_100a, PTX86>;
+def __nvvm_redux_sync_fmax_NaN : NVPTXBuiltinSMAndPTX<"float(float, int)", 
SM_100a, PTX86>;
+def __nvvm_redux_sync_fmax_abs_NaN : NVPTXBuiltinSMAndPTX<"float(float, int)", 
SM_100a, PTX86>;
 
 // Membar
 
diff --git a/clang/test/CodeGenCUDA/redux-f32-builtins.cu 
b/clang/test/CodeGenCUDA/redux-f32-builtins.cu
new file mode 100644
index 000..7359fb000699169
--- /dev/null
+++ b/clang/test/CodeGenCUDA/redux-f32-builtins.cu
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 "-triple" "nvptx-nvidia-cuda" "-target-feature" "+ptx86" 
"-target-cpu" "sm_100a" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" "-target-feature" "+ptx86" 
"-target-cpu" "sm_100a" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+
+// CHECK: define{{.*}} void @_Z6kernelPf(ptr noundef %out_f)
+__attribute__((global)) void kernel(float* out_f) {
+  float a = 3.0;
+  int i = 0;
+
+  out_f[i++] = __nvvm_redux_sync_fmin(a, 0xFF);
+  // CHECK: call contract float @llvm.nvvm.redux.sync.fmin
+
+  out_f[i++] = __nvvm_redux_sync_fmin_abs(a, 0xFF);
+  // CHECK: call contract float @llvm.nvvm.redux.sync.fmin.abs
+
+  out_f[i++] = __nvvm_redux_sync_fmin_NaN(a, 0xF0);
+  // CHECK: call contract float @llvm.nvvm.redux.sync.fmin.NaN
+
+  out_f[i++] = __nvvm_redux_sync_fmin_abs_NaN(a, 0x0F);
+  // CHECK: call contract float @llvm.nvvm.redux.sync.fmin.abs.NaN
+
+  out_f[i++] = __nvvm_redux_sync_fmax(a, 0xFF);
+  // CHECK: call contract float @llvm.nvvm.redux.sync.fmax
+
+  out_f[i++] = __nvvm_redux_sync_fmax_abs(a, 0x01);
+  // CHECK: call contract float @llvm.nvvm.redux.sync.fmax.abs
+
+  out_f[i++] = __nvvm_redux_sync_fmax_NaN(a, 0xF1);
+  // CHECK: call contract float @llvm.nvvm.redux.sync.fmax.NaN
+
+  out_f[i++] = __nvvm_redux_sync_fmax_abs_NaN(a, 0x10);
+  // CHECK: call contract float @llvm.nvvm.redux.sync.fmax.abs.NaN
+
+  // CHECK: ret void
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td 
b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index f299a145ac73b12..5aa7ebf48a6e5e2 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -4823,6 +4823,18 @@ def int_nvvm_redux_sync_xor : 
ClangBuiltin<"__nvvm_redux_sync_xor">,
 def int_nvvm_redux_sync_or : ClangBuiltin<"__nvvm_redux_sync_or">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
 [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
+
+// redux.sync.op.{abs}.{NaN}.f32 dst, src, membermask;
+foreach binOp = ["min", "max"] in {
+  foreach abs = ["", "_abs"] in {
+foreach NaN = ["", "_NaN"] in {
+  def int_nvvm_redux_sync_f # binOp # abs # NaN : 
+ClangBuiltin,
+Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty],
+  [Int

[clang] [llvm] [NVPTX] Add intrinsics for redux.sync f32 instructions (PR #126664)

2025-02-11 Thread Durgadoss R via cfe-commits


https://github.com/durga4github approved this pull request.

The latest revision looks good to me.

https://github.com/llvm/llvm-project/pull/126664
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang-linker-wrapper][lit] Fix SPIR-V ELF test when spirv-tools feature is available (PR #126756)

2025-02-11 Thread Nick Sarnie via cfe-commits


https://github.com/sarnex created 
https://github.com/llvm/llvm-project/pull/126756

My last change made the test not run when the `spirv-tools` feature is not 
available, which is always the case in CI for clang tests, but it fails if 
`spirv-tools` is available for the following reasons:
1) We didn't build `spirv-link` as part of the internal `SPIRV-Tools` build, 
which is required by the `clang` call in `clang-linker-wrapper`, I already 
fixed that [here](https://github.com/llvm/llvm-project/pull/126319).
2) We didn't depend on the `SPIRV-Tools` CMake targets in clang tests, so 
depending on what CMake targets were build before running `check-clang`, 
`SPIR-V Tools` might not have been built.
3) We didn't check for `llvm-spirv` being available, which is not part of 
`SPIRV-Tools` but is currently required for SPIR-V compilation.

Manually confirmed this works.

>From a758efe88d1e58a5b53dd7a4b7da6e174f645356 Mon Sep 17 00:00:00 2001
From: "Sarnie, Nick" 
Date: Tue, 11 Feb 2025 07:57:11 -0800
Subject: [PATCH] [clang-linker-wrapper][lit] Fix SPIR-V ELF test when
 spirv-tools feature is available

Signed-off-by: Sarnie, Nick 
---
 clang/test/CMakeLists.txt | 9 +
 clang/test/Tooling/clang-linker-wrapper-spirv-elf.cpp | 1 +
 clang/test/Tooling/lit.local.cfg  | 6 ++
 3 files changed, 16 insertions(+)

diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt
index e9eb54a67204cda..b796a51ef600e72 100644
--- a/clang/test/CMakeLists.txt
+++ b/clang/test/CMakeLists.txt
@@ -103,6 +103,15 @@ if(CLANG_BUILD_EXAMPLES AND CLANG_PLUGIN_SUPPORT)
 )
 endif ()
 
+if(LLVM_INCLUDE_SPIRV_TOOLS_TESTS)
+  list(APPEND CLANG_TEST_DEPS
+spirv-dis
+spirv-val
+spirv-as
+spirv-link
+)
+endif()
+
 set(CLANG_TEST_PARAMS
   USE_Z3_SOLVER=0
   )
diff --git a/clang/test/Tooling/clang-linker-wrapper-spirv-elf.cpp 
b/clang/test/Tooling/clang-linker-wrapper-spirv-elf.cpp
index 4f8658064e857d0..9b16727d7419251 100644
--- a/clang/test/Tooling/clang-linker-wrapper-spirv-elf.cpp
+++ b/clang/test/Tooling/clang-linker-wrapper-spirv-elf.cpp
@@ -1,6 +1,7 @@
 // Verify the ELF packaging of OpenMP SPIR-V device images.
 // REQUIRES: system-linux
 // REQUIRES: spirv-tools
+// REQUIRES: llvm-spirv
 // RUN: mkdir -p %t_tmp
 // RUN: cd %t_tmp
 // RUN: %clangxx -fopenmp -fopenmp-targets=spirv64-intel -nogpulib -c -o 
%t_clang-linker-wrapper-spirv-elf.o %s
diff --git a/clang/test/Tooling/lit.local.cfg b/clang/test/Tooling/lit.local.cfg
index bc2a096c8f64f88..9083a48c7bb2a4f 100644
--- a/clang/test/Tooling/lit.local.cfg
+++ b/clang/test/Tooling/lit.local.cfg
@@ -1,3 +1,5 @@
+import shutil
+
 if not config.root.clang_staticanalyzer:
 config.unsupported = True
 
@@ -6,3 +8,7 @@ if config.spirv_tools_tests:
 config.substitutions.append(("spirv-dis", 
os.path.join(config.llvm_tools_dir, "spirv-dis")))
 config.substitutions.append(("spirv-val", 
os.path.join(config.llvm_tools_dir, "spirv-val")))
 config.substitutions.append(("spirv-as", 
os.path.join(config.llvm_tools_dir, "spirv-as")))
+config.substitutions.append(("spirv-link", 
os.path.join(config.llvm_tools_dir, "spirv-link")))
+
+if shutil.which("llvm-spirv"):
+config.available_features.add("llvm-spirv")

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CLANG]Update svget, svset, svcrete, svundef to have FP8 variants (PR #126754)

2025-02-11 Thread Virginia Cangelosi via cfe-commits


https://github.com/virginia-cangelosi edited 
https://github.com/llvm/llvm-project/pull/126754
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Amit Kumar Pandey via cfe-commits



@@ -68,7 +68,9 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
+  // Skip sanitize options passed from the HostTC. The decision to instrument
+  // device code is computed only by 'shouldSkipSanitizeOption'.
+  if (!DAL && DAL->hasArg(options::OPT_fsanitize_EQ))

ampandey-1995 wrote:

Sorry, actually here the DAL computed from the HostTC picks up the sanitize 
option like ```-fsanitize=address``` and pass forward to ```DAL``` of device 
toolchain. The ```shouldSkipSanitizeOption ```  runs over each argument of 
```Args [The user provided arguments which are not derived]``` and checks if 
the ```-fsanitize=address``` is there it early returns false which then avoids 
checking the :xnack+ capability. 

So,  assume the  case where when we pass ```-fsanitize=address 
--offload-arch=gfx90a``` . This should avoid appending  
```-fsanitize=address``` in the DAL for device toolchain but it dosen't since 
the DAL already has -fsanitize=address from host toolchain and due to early 
return from ```shouldSkipSanitizeOption``` where it checks for the 
```:xnack+``` capability is skipped. 

https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Amit Kumar Pandey via cfe-commits


https://github.com/ampandey-1995 edited 
https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang-linker-wrapper][lit] Fix SPIR-V ELF test when spirv-tools feature is available (PR #126756)

2025-02-11 Thread Nick Sarnie via cfe-commits


https://github.com/sarnex edited 
https://github.com/llvm/llvm-project/pull/126756
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Joseph Huber via cfe-commits



@@ -68,7 +68,9 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
+  // Skip sanitize options passed from the HostTC. The decision to instrument
+  // device code is computed only by 'shouldSkipSanitizeOption'.
+  if (!DAL && DAL->hasArg(options::OPT_fsanitize_EQ))

jhuber6 wrote:

Sorry I'm still confused. If the user cannot enable `-fsanitize=address` if the 
GPU target doesn't support `xnack` then that should be separate driver check / 
error. Unless we're just silently ignoring that?

https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Amit Kumar Pandey via cfe-commits



@@ -68,7 +68,9 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
+  // Skip sanitize options passed from the HostTC. The decision to instrument
+  // device code is computed only by 'shouldSkipSanitizeOption'.
+  if (!DAL && DAL->hasArg(options::OPT_fsanitize_EQ))

ampandey-1995 wrote:

What I have seen for HIPAMDToolChain , the DAL is always constructed every time 
dynamically allocated every time.

https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HLSL] Appropriately set function attribute optnone (PR #125937)

2025-02-11 Thread Chris B via cfe-commits



@@ -345,6 +345,14 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
 WaveSizeAttr->getPreferred());
 Fn->addFnAttr(WaveSizeKindStr, WaveSizeStr);
   }
+  // HLSL entry functions are materialized for module functions with
+  // HLSLShaderAttr attribute. SetLLVMFunctionAttributesForDefinition called
+  // later in the compiler-flow for such module functions is not aware of and
+  // hence not able to set attributes of the newly materialized entry 
functions.
+  // So, set attributes of entry function here, as appropriate.
+  if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+Fn->addFnAttr(llvm::Attribute::OptimizeNone);
+  }

llvm-beanz wrote:

nit: 
https://llvm.org/docs/CodingStandards.html#don-t-use-braces-on-simple-single-statement-bodies-of-if-else-loop-statements
```suggestion
  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
Fn->addFnAttr(llvm::Attribute::OptimizeNone);
```

https://github.com/llvm/llvm-project/pull/125937
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Amit Kumar Pandey via cfe-commits


https://github.com/ampandey-1995 edited 
https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HLSL] Appropriately set function attribute optnone (PR #125937)

2025-02-11 Thread Chris B via cfe-commits


https://github.com/llvm-beanz edited 
https://github.com/llvm/llvm-project/pull/125937
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HLSL] Appropriately set function attribute optnone (PR #125937)

2025-02-11 Thread Chris B via cfe-commits


https://github.com/llvm-beanz approved this pull request.

One nit, otherwise LGTM.

https://github.com/llvm/llvm-project/pull/125937
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HLSL] Fix resource wrapper declaration (PR #125718)

2025-02-11 Thread Chris B via cfe-commits


https://github.com/llvm-beanz commented:

I have a couple concerns about this approach. I'm curious for @bogner's 
thoughts since he has approved the PR.

1) If/when we support separate compilation, the user-facing exported "resource" 
object will be the resource wrapper, not the handle, so that indicates to me we 
should probably have the wrappers being public.
2) It feels wrong to do this in CodeGen. It seems like we should be identifying 
an appropriate storage class in Sema such that we assign the correct linkage 
automatically.

@hekota might also have some good insights here because she is dealing with 
some of HLSL's implicit storage class/address space nonsense in her work with 
cbuffers.

https://github.com/llvm/llvm-project/pull/125718
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HLSL] Change clang Driver Options to not set CXXOperatorNames (PR #126758)

2025-02-11 Thread Farzon Lotfi via cfe-commits


https://github.com/farzonl created 
https://github.com/llvm/llvm-project/pull/126758

- Disable `CXXOperatorNames` for HLSL
- Add tests to confirm we can use the alt names as functions

>From 3b7e458bfeb2abab799789d30ebaa4b214e4168e Mon Sep 17 00:00:00 2001
From: Farzon Lotfi 
Date: Tue, 11 Feb 2025 11:07:23 -0500
Subject: [PATCH] [HLSL] Change clang Driver Options to not set
 CXXOperatorNames

---
 clang/include/clang/Driver/Options.td |  2 +-
 .../use-cxx-alt-operator-names.hlsl   | 41 +++
 2 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 1cf62ab46613456..618815db2843404 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3397,7 +3397,7 @@ def fno_objc_weak : Flag<["-"], "fno-objc-weak">, 
Group,
 def fno_omit_frame_pointer : Flag<["-"], "fno-omit-frame-pointer">, 
Group,
   Visibility<[ClangOption, FlangOption]>;
 defm operator_names : BoolFOption<"operator-names",
-  LangOpts<"CXXOperatorNames">, Default,
+  LangOpts<"CXXOperatorNames">, Default,
   NegFlag,
   PosFlag>;
diff --git a/clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl 
b/clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl
new file mode 100644
index 000..8ae253a9f3c06cb
--- /dev/null
+++ b/clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl
@@ -0,0 +1,41 @@
+
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s  \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-unknown-vulkan-compute %s \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK-LABEL: and
+void and() {}
+
+// CHECK-LABEL: and_eq
+void and_eq() {}
+
+// CHECK-LABEL: bitand
+void bitand() {}
+
+// CHECK-LABEL: bitor
+void bitor() {}
+
+// CHECK-LABEL: compl
+void compl() {}
+
+// CHECK-LABEL: not
+void not() {}
+
+// CHECK-LABEL: not_eq
+void not_eq() {}
+
+// CHECK-LABEL: or
+void or() {}
+
+// CHECK-LABEL: or_eq
+void or_eq() {}
+
+// CHECK-LABEL: xor
+void xor() {}
+
+// CHECK-LABEL: xor_eq
+void xor_eq() {}

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HLSL][RootSignature] Implement Lexing of DescriptorTables (PR #122981)

2025-02-11 Thread Finn Plummer via cfe-commits



@@ -0,0 +1,171 @@
+#include "clang/Parse/ParseHLSLRootSignature.h"
+
+namespace clang {
+namespace hlsl {
+
+// Lexer Definitions
+
+static bool IsNumberChar(char C) {
+  // TODO(#120472): extend for float support exponents
+  return isdigit(C); // integer support
+}
+
+bool RootSignatureLexer::LexNumber(RootSignatureToken &Result) {
+  // NumericLiteralParser does not handle the sign so we will manually apply it
+  bool Negative = Buffer.front() == '-';
+  bool Signed = Negative || Buffer.front() == '+';
+  if (Signed)
+AdvanceBuffer();
+
+  // Retrieve the possible number
+  StringRef NumSpelling = Buffer.take_while(IsNumberChar);
+
+  // Catch this now as the Literal Parser will accept it as valid
+  if (NumSpelling.empty()) {
+PP.getDiagnostics().Report(Result.TokLoc,
+   diag::err_hlsl_invalid_number_literal);
+return true;
+  }
+
+  // Parse the numeric value and do semantic checks on its specification
+  clang::NumericLiteralParser Literal(NumSpelling, SourceLoc,
+  PP.getSourceManager(), PP.getLangOpts(),
+  PP.getTargetInfo(), PP.getDiagnostics());
+  if (Literal.hadError)
+return true; // Error has already been reported so just return
+
+  // Note: if IsNumberChar allows for hexidecimal we will need to turn this
+  // into a diagnostics for potential fixed-point literals
+  assert(Literal.isIntegerLiteral() && "IsNumberChar will only support 
digits");
+
+  // Retrieve the number value to store into the token
+  Result.Kind = TokenKind::int_literal;
+
+  // NOTE: for compabibility with DXC, we will treat any integer with '+' as an
+  // unsigned integer
+  llvm::APSInt X = llvm::APSInt(32, !Negative);
+  if (Literal.GetIntegerValue(X)) {
+// Report that the value has overflowed
+PP.getDiagnostics().Report(Result.TokLoc,
+   diag::err_hlsl_number_literal_overflow)
+<< (unsigned)Signed << NumSpelling;
+return true;
+  }
+
+  X = Negative ? -X : X;
+  Result.NumLiteral = APValue(X);
+
+  AdvanceBuffer(NumSpelling.size());
+  return false;
+}
+
+bool RootSignatureLexer::Lex(SmallVector &Tokens) {
+  // Discard any leading whitespace
+  AdvanceBuffer(Buffer.take_while(isspace).size());
+
+  while (!Buffer.empty()) {
+// Record where this token is in the text for usage in parser diagnostics
+RootSignatureToken Result(SourceLoc);
+if (LexToken(Result))
+  return true;
+
+// Successfully Lexed the token so we can store it
+Tokens.push_back(Result);
+
+// Discard any trailing whitespace
+AdvanceBuffer(Buffer.take_while(isspace).size());
+  }
+
+  return false;
+}
+
+bool RootSignatureLexer::LexToken(RootSignatureToken &Result) {
+  char C = Buffer.front();
+
+  // Punctuators
+  switch (C) {
+#define PUNCTUATOR(X, Y)   
\
+  case Y: {
\
+Result.Kind = TokenKind::pu_##X;   
\
+AdvanceBuffer();   
\
+return false;  
\
+  }
+#include "clang/Parse/HLSLRootSignatureTokenKinds.def"
+  default:
+break;
+  }
+
+  // Numeric constant
+  if (isdigit(C) || C == '-' || C == '+')
+return LexNumber(Result);
+
+  // All following tokens require at least one additional character
+  if (Buffer.size() <= 1) {
+PP.getDiagnostics().Report(Result.TokLoc, diag::err_hlsl_invalid_token);

inbelic wrote:

Would you also remove the same error message from 
[here](https://github.com/llvm/llvm-project/blob/d648f4ccb8c74060e9e6d75b46f54ac0127e4302/clang/lib/Parse/ParseHLSLRootSignature.cpp#L161)?

They were added just to have _some_ indication that lexing failed.

https://github.com/llvm/llvm-project/pull/122981
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Amit Kumar Pandey via cfe-commits



@@ -68,7 +68,9 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
+  // Skip sanitize options passed from the HostTC. The decision to instrument
+  // device code is computed only by 'shouldSkipSanitizeOption'.
+  if (!DAL && DAL->hasArg(options::OPT_fsanitize_EQ))

ampandey-1995 wrote:

 Should The ```shouldSkipSanitizeOption``` consider an extra parameter 
```DAL``` allocated by the host and resolve that issue ? @jhuber6  and 
@yxsamliu  is that the correct way ,anything I am missing @yxsamliu ?

https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HLSL] Change clang Driver Options to not set CXXOperatorNames (PR #126758)

2025-02-11 Thread Chris B via cfe-commits





llvm-beanz wrote:

This seems like the wrong place to put this test. We don't need actual codegen 
here, we just need to make sure it parses.

I'd put this under SemaHLSL and instead use a run line like:
```hlsl
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -ast-dump | 
FileCheck %s
```

Restricting test cases to only running the parts of the compiler that are 
strictly required to verify correctness of a change reduces the time it takes 
to run the test. While in isolation for a single test that may only be shaving 
off fractions of a second, in aggregate across the entire test suite it can 
have a huge impact.

https://github.com/llvm/llvm-project/pull/126758
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang-linker-wrapper][lit] Fix SPIR-V ELF test when spirv-tools feature is available (PR #126756)

2025-02-11 Thread Nick Sarnie via cfe-commits


https://github.com/sarnex edited 
https://github.com/llvm/llvm-project/pull/126756
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[libclc] [libclc] Move conversion builtins to the CLC library (PR #124727)

2025-02-11 Thread Fraser Cormack via cfe-commits


https://github.com/frasercrmck updated 
https://github.com/llvm/llvm-project/pull/124727

>From c0252c8da6fb4ae68699a7e6fc38ed7c643338e8 Mon Sep 17 00:00:00 2001
From: Fraser Cormack 
Date: Mon, 11 Nov 2024 15:46:56 +
Subject: [PATCH 1/2] [libclc] Move conversion builtins to the CLC library

This commit moves the implementations of conversion builtins to the CLC
library. It keeps the dichotomy of regular vs. clspv implementations of
the conversions. However, for the sake of a consistent interface all CLC
conversion routines are built, even the ones that clspv opts out of in
the user-facing OpenCL layer.

It simultaneously updates the python script to use f-strings for
formatting.
---
 libclc/CMakeLists.txt |  31 +-
 libclc/clc/include/clc/clc_convert.h  |  98 +
 libclc/clc/include/clc/float/definitions.h|  88 +
 libclc/generic/include/clc/convert.h  |  83 +++--
 .../generic/include/clc/float/definitions.h   |  88 -
 libclc/generic/lib/gen_convert.py | 337 --
 6 files changed, 419 insertions(+), 306 deletions(-)
 create mode 100644 libclc/clc/include/clc/clc_convert.h
 create mode 100644 libclc/clc/include/clc/float/definitions.h
 delete mode 100644 libclc/generic/include/clc/float/definitions.h

diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 2978fadc2c29fc3..c88ea9700d100b8 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -247,11 +247,27 @@ add_custom_target( "generate_convert.cl" DEPENDS 
convert.cl )
 set_target_properties( "generate_convert.cl" PROPERTIES FOLDER 
"libclc/Sourcegenning" )
 
 add_custom_command(
-  OUTPUT clspv-convert.cl
-  COMMAND ${Python3_EXECUTABLE} ${script_loc} --clspv > clspv-convert.cl
+  OUTPUT clc-convert.cl
+  COMMAND ${Python3_EXECUTABLE} ${script_loc} --clc > clc-convert.cl
   DEPENDS ${script_loc} )
-add_custom_target( "clspv-generate_convert.cl" DEPENDS clspv-convert.cl )
-set_target_properties( "clspv-generate_convert.cl" PROPERTIES FOLDER 
"libclc/Sourcegenning" )
+add_custom_target( "clc-generate_convert.cl" DEPENDS clc-convert.cl )
+set_target_properties( "clc-generate_convert.cl" PROPERTIES FOLDER 
"libclc/Sourcegenning" )
+
+if ( clspv-- IN_LIST LIBCLC_TARGETS_TO_BUILD OR clspv64-- IN_LIST 
LIBCLC_TARGETS_TO_BUILD )
+  add_custom_command(
+OUTPUT clspv-convert.cl
+COMMAND ${Python3_EXECUTABLE} ${script_loc} --clspv > clspv-convert.cl
+DEPENDS ${script_loc} )
+  add_custom_target( "clspv-generate_convert.cl" DEPENDS clspv-convert.cl )
+  set_target_properties( "clspv-generate_convert.cl" PROPERTIES FOLDER 
"libclc/Sourcegenning" )
+
+  add_custom_command(
+OUTPUT clc-clspv-convert.cl
+COMMAND ${Python3_EXECUTABLE} ${script_loc} --clc --clspv > 
clc-clspv-convert.cl
+DEPENDS ${script_loc} )
+  add_custom_target( "clc-clspv-generate_convert.cl" DEPENDS 
clc-clspv-convert.cl )
+  set_target_properties( "clc-clspv-generate_convert.cl" PROPERTIES FOLDER 
"libclc/Sourcegenning" )
+endif()
 
 enable_testing()
 
@@ -289,6 +305,12 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
   set( clc_lib_files )
   set( clc_dirs ${dirs} generic )
 
+  if( ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 )
+set( clc_gen_files clc-clspv-convert.cl )
+  else()
+set( clc_gen_files clc-convert.cl )
+  endif()
+
   libclc_configure_lib_source(
 clc_lib_files
 CLC_INTERNAL
@@ -372,6 +394,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
   COMPILE_FLAGS ${clc_build_flags}
   OPT_FLAGS ${opt_flags}
   LIB_FILES ${clc_lib_files}
+  GEN_FILES ${clc_gen_files}
 )
 
 list( APPEND build_flags
diff --git a/libclc/clc/include/clc/clc_convert.h 
b/libclc/clc/include/clc/clc_convert.h
new file mode 100644
index 000..20bbd57540b3064
--- /dev/null
+++ b/libclc/clc/include/clc/clc_convert.h
@@ -0,0 +1,98 @@
+#ifndef __CLC_CLC_CONVERT_H__
+#define __CLC_CLC_CONVERT_H__
+
+#define _CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX)  
\
+  _CLC_OVERLOAD _CLC_DECL TO_TYPE __clc_convert_##TO_TYPE##SUFFIX(FROM_TYPE x);
+
+#define _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX)   
\
+  _CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX)
\
+  _CLC_CONVERT_DECL(FROM_TYPE##2, TO_TYPE##2, SUFFIX)  
\
+  _CLC_CONVERT_DECL(FROM_TYPE##3, TO_TYPE##3, SUFFIX)  
\
+  _CLC_CONVERT_DECL(FROM_TYPE##4, TO_TYPE##4, SUFFIX)  
\
+  _CLC_CONVERT_DECL(FROM_TYPE##8, TO_TYPE##8, SUFFIX)  
\
+  _CLC_CONVERT_DECL(FROM_TYPE##16, TO_TYPE##16, SUFFIX)
+
+#define _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX)   
\
+  _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, char, SUFFIX)
\
+  _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, uchar, SUFFIX)   
\
+  _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, int, SUFFIX) 
\
+  _CLC_VECTOR_CONVE

[clang] [clang][analyzer][NFC] Fix typos in comments (PR #126676)

2025-02-11 Thread Ben Shi via cfe-commits


https://github.com/benshi001 closed 
https://github.com/llvm/llvm-project/pull/126676
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[libclc] [libclc] Move sign to the CLC builtins library (PR #115699)

2025-02-11 Thread Fraser Cormack via cfe-commits


https://github.com/frasercrmck closed 
https://github.com/llvm/llvm-project/pull/115699
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[libclc] 64735ad - [libclc] Move sign to the CLC builtins library (#115699)

2025-02-11 Thread via cfe-commits


Author: Fraser Cormack
Date: 2025-02-11T11:14:49Z
New Revision: 64735ad63975c1126f84dbf7921ce4341dfa2419

URL: 
https://github.com/llvm/llvm-project/commit/64735ad63975c1126f84dbf7921ce4341dfa2419
DIFF: 
https://github.com/llvm/llvm-project/commit/64735ad63975c1126f84dbf7921ce4341dfa2419.diff

LOG: [libclc] Move sign to the CLC builtins library (#115699)

This commit moves the sign builtin's implementation to the CLC library.
It simultaneously optimizes it (for vector types) by removing
control-flow from the implementation.

The __CLC_INTERNAL preprocessor definition has been repurposed (without
the leading underscores) to be passed when building the internal CLC
library. It was only used in one other place to guard an extra maths
preprocessor definition, which we can do unconditionally.

Added: 
libclc/clc/include/clc/common/clc_sign.h
libclc/clc/lib/generic/common/clc_sign.cl
libclc/clc/lib/generic/common/clc_sign.inc

Modified: 
libclc/CMakeLists.txt
libclc/clc/include/clc/math/gentype.inc
libclc/clc/lib/generic/SOURCES
libclc/generic/include/clc/float/definitions.h
libclc/generic/lib/common/sign.cl

Removed: 




diff  --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index b28da904ef68e15..2978fadc2c29fc3 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -351,7 +351,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
 string( TOUPPER "CLC_${MACRO_ARCH}" CLC_TARGET_DEFINE )
 
 list( APPEND build_flags
-  -D__CLC_INTERNAL
   -D${CLC_TARGET_DEFINE}
   # All libclc builtin libraries see CLC headers
   -I${CMAKE_CURRENT_SOURCE_DIR}/clc/include
@@ -363,12 +362,14 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
   list( APPEND build_flags -mcpu=${cpu} )
 endif()
 
+set( clc_build_flags ${build_flags} -DCLC_INTERNAL )
+
 add_libclc_builtin_set(
   CLC_INTERNAL
   ARCH ${ARCH}
   ARCH_SUFFIX clc-${arch_suffix}
   TRIPLE ${clang_triple}
-  COMPILE_FLAGS ${build_flags}
+  COMPILE_FLAGS ${clc_build_flags}
   OPT_FLAGS ${opt_flags}
   LIB_FILES ${clc_lib_files}
 )

diff  --git a/libclc/clc/include/clc/common/clc_sign.h 
b/libclc/clc/include/clc/common/clc_sign.h
new file mode 100644
index 000..9e0984db7bb3be3
--- /dev/null
+++ b/libclc/clc/include/clc/common/clc_sign.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_COMMON_CLC_SIGN_H__
+#define __CLC_COMMON_CLC_SIGN_H__
+
+#define __CLC_FUNCTION __clc_sign
+#define __CLC_BODY 
+
+#include 
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_COMMON_CLC_SIGN_H__

diff  --git a/libclc/clc/include/clc/math/gentype.inc 
b/libclc/clc/include/clc/math/gentype.inc
index 87719f2d9bc0e74..3c80f1c6172ad0b 100644
--- a/libclc/clc/include/clc/math/gentype.inc
+++ b/libclc/clc/include/clc/math/gentype.inc
@@ -3,55 +3,69 @@
 
 #define __CLC_SCALAR_GENTYPE float
 #define __CLC_FPSIZE 32
+#define __CLC_FP_LIT(x) x##F
 
 #define __CLC_GENTYPE float
 #define __CLC_INTN int
+#define __CLC_BIT_INTN int
 #define __CLC_SCALAR
 #include __CLC_BODY
 #undef __CLC_GENTYPE
+#undef __CLC_BIT_INTN
 #undef __CLC_INTN
 #undef __CLC_SCALAR
 
 #define __CLC_GENTYPE float2
 #define __CLC_INTN int2
+#define __CLC_BIT_INTN int2
 #define __CLC_VECSIZE 2
 #include __CLC_BODY
 #undef __CLC_VECSIZE
 #undef __CLC_GENTYPE
+#undef __CLC_BIT_INTN
 #undef __CLC_INTN
 
 #define __CLC_GENTYPE float3
 #define __CLC_INTN int3
+#define __CLC_BIT_INTN int3
 #define __CLC_VECSIZE 3
 #include __CLC_BODY
 #undef __CLC_VECSIZE
 #undef __CLC_GENTYPE
+#undef __CLC_BIT_INTN
 #undef __CLC_INTN
 
 #define __CLC_GENTYPE float4
 #define __CLC_INTN int4
+#define __CLC_BIT_INTN int4
 #define __CLC_VECSIZE 4
 #include __CLC_BODY
 #undef __CLC_VECSIZE
 #undef __CLC_GENTYPE
+#undef __CLC_BIT_INTN
 #undef __CLC_INTN
 
 #define __CLC_GENTYPE float8
 #define __CLC_INTN int8
+#define __CLC_BIT_INTN int8
 #define __CLC_VECSIZE 8
 #include __CLC_BODY
 #undef __CLC_VECSIZE
 #undef __CLC_GENTYPE
+#undef __CLC_BIT_INTN
 #undef __CLC_INTN
 
 #define __CLC_GENTYPE float16
 #define __CLC_INTN int16
+#define __CLC_BIT_INTN int16
 #define __CLC_VECSIZE 16
 #include __CLC_BODY
 #undef __CLC_VECSIZE
 #undef __CLC_GENTYPE
+#undef __CLC_BIT_INTN
 #undef __CLC_INTN
 
+#undef __CLC_FP_LIT
 #undef __CLC_FPSIZE
 #undef __CLC_SCALAR_GENTYPE
 
@@ -61,55 +75,69 @@
 
 #define __CLC_SCALAR_GENTYPE double
 #define __CLC_FPSIZE 64
+#define __CLC_FP_LIT(x) (x)
 
 #define __CLC_SCALAR
 #define __CLC_GENTYPE double
 #define __CLC_INTN int
+#define __CLC_BIT_INTN long
 #include __CLC_BODY
 #undef __CLC_GENTYPE
+#undef __CLC_BIT_INTN
 #undef __CLC_INTN
 #undef __CLC_SCALAR
 
 #define __CLC_GENTYPE double2
 #define __CLC_INTN int2
+#define __CLC_BIT_INTN long2
 #define __CLC_VECSIZE 2
 #include __CLC_BODY
 #undef __CLC_VECSIZE
 #undef __CLC_GENTYPE
+#undef __CLC_BIT_INTN
 #undef __CLC_INTN
 
 #define __CLC_GENTYPE double3
 #define __CLC_INTN int3
+#define __CLC

[clang] [clang][bytecode] Fix diagnosing replaceable global allocator functions (PR #126717)

2025-02-11 Thread Timm Baeder via cfe-commits


https://github.com/tbaederr created 
https://github.com/llvm/llvm-project/pull/126717

Don't return true here in InvalidNewDeleteExpr just because we are in C++26 
mode. This invalid there as well.

Testcase reduced from 
libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique_for_overwrite.pass.cpp

>From 8a271fbd81d5f0c9b13f169a30f7ab3add5cf7ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= 
Date: Tue, 11 Feb 2025 12:46:27 +0100
Subject: [PATCH] [clang][bytecode] Fix diagnosing replaceable global allocator
 functions

Don't return true here in InvalidNewDeleteExpr just because we are in
C++26 mode. This invalid there as well.

Testcase reduced from 
libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique_for_overwrite.pass.cpp
---
 clang/lib/AST/ByteCode/Interp.cpp | 32 +--
 clang/test/AST/ByteCode/cxx26.cpp | 23 --
 2 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Interp.cpp 
b/clang/lib/AST/ByteCode/Interp.cpp
index bf48139f57c0f09..c80be094856b086 100644
--- a/clang/lib/AST/ByteCode/Interp.cpp
+++ b/clang/lib/AST/ByteCode/Interp.cpp
@@ -1564,34 +1564,38 @@ bool CheckNewTypeMismatch(InterpState &S, CodePtr OpPC, 
const Expr *E,
 bool InvalidNewDeleteExpr(InterpState &S, CodePtr OpPC, const Expr *E) {
   assert(E);
 
-  if (S.getLangOpts().CPlusPlus26)
-return true;
-
-  const auto &Loc = S.Current->getSource(OpPC);
-
   if (const auto *NewExpr = dyn_cast(E)) {
 const FunctionDecl *OperatorNew = NewExpr->getOperatorNew();
 
-if (!S.getLangOpts().CPlusPlus26 && NewExpr->getNumPlacementArgs() > 0) {
+if (NewExpr->getNumPlacementArgs() > 0) {
   // This is allowed pre-C++26, but only an std function.
-  if (S.Current->isStdFunction())
+  if (S.getLangOpts().CPlusPlus26 || S.Current->isStdFunction())
 return true;
-  S.FFDiag(Loc, diag::note_constexpr_new_placement)
+  S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_new_placement)
   << /*C++26 feature*/ 1 << E->getSourceRange();
-} else if (NewExpr->getNumPlacementArgs() == 1 &&
-   !OperatorNew->isReservedGlobalPlacementOperator()) {
-  S.FFDiag(Loc, diag::note_constexpr_new_placement)
-  << /*Unsupported*/ 0 << E->getSourceRange();
 } else if (!OperatorNew->isReplaceableGlobalAllocationFunction()) {
-  S.FFDiag(Loc, diag::note_constexpr_new_non_replaceable)
+  S.FFDiag(S.Current->getSource(OpPC),
+   diag::note_constexpr_new_non_replaceable)
   << isa(OperatorNew) << OperatorNew;
+  return false;
+} else if (!S.getLangOpts().CPlusPlus26 &&
+   NewExpr->getNumPlacementArgs() == 1 &&
+   !OperatorNew->isReservedGlobalPlacementOperator()) {
+  if (!S.getLangOpts().CPlusPlus26) {
+S.FFDiag(S.Current->getSource(OpPC), 
diag::note_constexpr_new_placement)
+<< /*Unsupported*/ 0 << E->getSourceRange();
+return false;
+  }
+  return true;
 }
   } else {
 const auto *DeleteExpr = cast(E);
 const FunctionDecl *OperatorDelete = DeleteExpr->getOperatorDelete();
 if (!OperatorDelete->isReplaceableGlobalAllocationFunction()) {
-  S.FFDiag(Loc, diag::note_constexpr_new_non_replaceable)
+  S.FFDiag(S.Current->getSource(OpPC),
+   diag::note_constexpr_new_non_replaceable)
   << isa(OperatorDelete) << OperatorDelete;
+  return false;
 }
   }
 
diff --git a/clang/test/AST/ByteCode/cxx26.cpp 
b/clang/test/AST/ByteCode/cxx26.cpp
index 0b0e2b21e8201e7..2ac3b21695ddd80 100644
--- a/clang/test/AST/ByteCode/cxx26.cpp
+++ b/clang/test/AST/ByteCode/cxx26.cpp
@@ -1,10 +1,29 @@
 // RUN: %clang_cc1 -std=c++26 -fsyntax-only -fcxx-exceptions -verify=ref,both 
%s
 // RUN: %clang_cc1 -std=c++26 -fsyntax-only -fcxx-exceptions 
-verify=expected,both %s -fexperimental-new-constant-interpreter
 
-// both-no-diagnostics
-
 namespace VoidCast {
   constexpr void* p = nullptr;
   constexpr int* q = static_cast(p);
   static_assert(q == nullptr);
 }
+
+namespace ReplaceableAlloc {
+  struct F {
+static void* operator new(unsigned long n) {
+  return nullptr; // both-warning {{should not return a null pointer}}
+}
+  };
+
+  constexpr F *createF() {
+return new F(); // both-note {{call to class-specific 'operator new'}}
+  }
+
+  constexpr bool foo() {
+F *f = createF(); // both-note {{in call to}}
+
+delete f;
+return true;
+  }
+  static_assert(foo()); // both-error {{not an integral constant expression}} \
+// both-note {{in call to}}
+}

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][bytecode] Fix diagnosing replaceable global allocator functions (PR #126717)

2025-02-11 Thread via cfe-commits


llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Timm Baeder (tbaederr)


Changes

Don't return true here in InvalidNewDeleteExpr just because we are in C++26 
mode. This invalid there as well.

Testcase reduced from 
libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique_for_overwrite.pass.cpp

---
Full diff: https://github.com/llvm/llvm-project/pull/126717.diff


2 Files Affected:

- (modified) clang/lib/AST/ByteCode/Interp.cpp (+18-14) 
- (modified) clang/test/AST/ByteCode/cxx26.cpp (+21-2) 


``diff
diff --git a/clang/lib/AST/ByteCode/Interp.cpp 
b/clang/lib/AST/ByteCode/Interp.cpp
index bf48139f57c0f09..c80be094856b086 100644
--- a/clang/lib/AST/ByteCode/Interp.cpp
+++ b/clang/lib/AST/ByteCode/Interp.cpp
@@ -1564,34 +1564,38 @@ bool CheckNewTypeMismatch(InterpState &S, CodePtr OpPC, 
const Expr *E,
 bool InvalidNewDeleteExpr(InterpState &S, CodePtr OpPC, const Expr *E) {
   assert(E);
 
-  if (S.getLangOpts().CPlusPlus26)
-return true;
-
-  const auto &Loc = S.Current->getSource(OpPC);
-
   if (const auto *NewExpr = dyn_cast(E)) {
 const FunctionDecl *OperatorNew = NewExpr->getOperatorNew();
 
-if (!S.getLangOpts().CPlusPlus26 && NewExpr->getNumPlacementArgs() > 0) {
+if (NewExpr->getNumPlacementArgs() > 0) {
   // This is allowed pre-C++26, but only an std function.
-  if (S.Current->isStdFunction())
+  if (S.getLangOpts().CPlusPlus26 || S.Current->isStdFunction())
 return true;
-  S.FFDiag(Loc, diag::note_constexpr_new_placement)
+  S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_new_placement)
   << /*C++26 feature*/ 1 << E->getSourceRange();
-} else if (NewExpr->getNumPlacementArgs() == 1 &&
-   !OperatorNew->isReservedGlobalPlacementOperator()) {
-  S.FFDiag(Loc, diag::note_constexpr_new_placement)
-  << /*Unsupported*/ 0 << E->getSourceRange();
 } else if (!OperatorNew->isReplaceableGlobalAllocationFunction()) {
-  S.FFDiag(Loc, diag::note_constexpr_new_non_replaceable)
+  S.FFDiag(S.Current->getSource(OpPC),
+   diag::note_constexpr_new_non_replaceable)
   << isa(OperatorNew) << OperatorNew;
+  return false;
+} else if (!S.getLangOpts().CPlusPlus26 &&
+   NewExpr->getNumPlacementArgs() == 1 &&
+   !OperatorNew->isReservedGlobalPlacementOperator()) {
+  if (!S.getLangOpts().CPlusPlus26) {
+S.FFDiag(S.Current->getSource(OpPC), 
diag::note_constexpr_new_placement)
+<< /*Unsupported*/ 0 << E->getSourceRange();
+return false;
+  }
+  return true;
 }
   } else {
 const auto *DeleteExpr = cast(E);
 const FunctionDecl *OperatorDelete = DeleteExpr->getOperatorDelete();
 if (!OperatorDelete->isReplaceableGlobalAllocationFunction()) {
-  S.FFDiag(Loc, diag::note_constexpr_new_non_replaceable)
+  S.FFDiag(S.Current->getSource(OpPC),
+   diag::note_constexpr_new_non_replaceable)
   << isa(OperatorDelete) << OperatorDelete;
+  return false;
 }
   }
 
diff --git a/clang/test/AST/ByteCode/cxx26.cpp 
b/clang/test/AST/ByteCode/cxx26.cpp
index 0b0e2b21e8201e7..2ac3b21695ddd80 100644
--- a/clang/test/AST/ByteCode/cxx26.cpp
+++ b/clang/test/AST/ByteCode/cxx26.cpp
@@ -1,10 +1,29 @@
 // RUN: %clang_cc1 -std=c++26 -fsyntax-only -fcxx-exceptions -verify=ref,both 
%s
 // RUN: %clang_cc1 -std=c++26 -fsyntax-only -fcxx-exceptions 
-verify=expected,both %s -fexperimental-new-constant-interpreter
 
-// both-no-diagnostics
-
 namespace VoidCast {
   constexpr void* p = nullptr;
   constexpr int* q = static_cast(p);
   static_assert(q == nullptr);
 }
+
+namespace ReplaceableAlloc {
+  struct F {
+static void* operator new(unsigned long n) {
+  return nullptr; // both-warning {{should not return a null pointer}}
+}
+  };
+
+  constexpr F *createF() {
+return new F(); // both-note {{call to class-specific 'operator new'}}
+  }
+
+  constexpr bool foo() {
+F *f = createF(); // both-note {{in call to}}
+
+delete f;
+return true;
+  }
+  static_assert(foo()); // both-error {{not an integral constant expression}} \
+// both-note {{in call to}}
+}

``




https://github.com/llvm/llvm-project/pull/126717
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang-format] modified goto bool to enum (PR #65140)

2025-02-11 Thread via cfe-commits


mydeveloperday wrote:

"closed due to inactivity"

https://github.com/llvm/llvm-project/pull/65140
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Offload] Treat an empty packager archicture as 'generic' (PR #126655)

2025-02-11 Thread Michael Kruse via cfe-commits


Meinersbur wrote:

What happens with `-march` not specified? I only know about gcc falling back on 
`-march=x86-64`, but depends on the target triple.

https://github.com/llvm/llvm-project/pull/126655
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [TBAA] Refine pointer-tbaa for void pointers by pointer depth (PR #126047)

2025-02-11 Thread Bruno De Fraine via cfe-commits

brunodf-snps wrote:

> I'm not qualified to review this, so please make sure you understand changes 
> to CWG158 test and they are what you expect. Specifically, I'd like to avoid 
> updating the test just to make FileCheck happy.

All the updates to FileCheck patterns match what I expected. The affected test 
case of cwg158.cpp was added by @fhahn in commit 
decb87452d8e3b93b21ab9e4c3dd03d85cbebfa5, I hope he will still chime in.

https://github.com/llvm/llvm-project/pull/126047
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Offload] Treat an empty packager archicture as 'generic' (PR #126655)

2025-02-11 Thread Joseph Huber via cfe-commits

jhuber6 wrote:

> What happens with `-march` not specified? I only know about gcc falling back 
> on `-march=x86-64`, but depends on the target triple.

Same thing that happens when you do `--target=x86_64-unknown-linux-gnu` without 
any `-march` option. For the GPU it defaults to like gfx700 or something, but 
that's not really our fault since the user never specified one.

https://github.com/llvm/llvm-project/pull/126655
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Offload] Treat an empty packager archicture as 'generic' (PR #126655)

2025-02-11 Thread Joseph Huber via cfe-commits


https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/126655

>From 32907913dc72a0d314e9ea9a75f0d3191e7fcb7f Mon Sep 17 00:00:00 2001
From: Joseph Huber 
Date: Mon, 10 Feb 2025 20:30:41 -0600
Subject: [PATCH] [Offload] Treat an empty packager archicture as 'generic'

Summary:
The `clang-offload-packager` records the architecture of the job.
Currently there are cases where this will be empty. SYCL, CPU, and when
the user manually overrides it to be empty. In these cases we should
alwas consider it 'generic'. Adding this string both makes it clear how
it behaves and triggers the special handling for this architecture,
allowing it to bind to different architectures.
---
 clang/lib/Driver/ToolChains/Clang.cpp  |  2 +-
 clang/test/Driver/linker-wrapper.c |  2 +-
 clang/test/Driver/sycl-offload-jit.cpp |  2 +-
 .../clang-linker-wrapper/ClangLinkerWrapper.cpp| 14 --
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index ea376ac00d9108b..5deafa2ad0f4a68 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -9163,7 +9163,7 @@ void OffloadPackager::ConstructJob(Compilation &C, const 
JobAction &JA,
 SmallVector Parts{
 "file=" + File.str(),
 "triple=" + TC->getTripleString(),
-"arch=" + Arch.str(),
+"arch=" + (Arch.empty() ? "generic" : Arch.str()),
 "kind=" + Kind.str(),
 };
 
diff --git a/clang/test/Driver/linker-wrapper.c 
b/clang/test/Driver/linker-wrapper.c
index f416ee5f4463bcc..df0a1d1e9a84de1 100644
--- a/clang/test/Driver/linker-wrapper.c
+++ b/clang/test/Driver/linker-wrapper.c
@@ -59,7 +59,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN:   --linker-path=/usr/bin/ld.lld --whole-archive %t.a 
--no-whole-archive \
 // RUN:   %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CPU-LINK
 
-// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu 
-march=native -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic 
-shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive
+// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -O2 
-flto -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared 
-Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive
 
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o
 // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu 
-mllvm -openmp-opt-disable \
diff --git a/clang/test/Driver/sycl-offload-jit.cpp 
b/clang/test/Driver/sycl-offload-jit.cpp
index eb192e08a3bc0c5..e040f4ded18e92f 100644
--- a/clang/test/Driver/sycl-offload-jit.cpp
+++ b/clang/test/Driver/sycl-offload-jit.cpp
@@ -27,7 +27,7 @@
 // CHK-DEVICE-TRIPLE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHK-DEVICE-TRIPLE-SAME: "-fsycl-is-device"
 // CHK-DEVICE-TRIPLE-SAME: "-O2"
-// CHK-DEVICE-TRIPLE: clang-offload-packager{{.*}} 
"--image=file={{.*}}.bc,triple=spirv64-unknown-unknown,arch=,kind=sycl"
+// CHK-DEVICE-TRIPLE: clang-offload-packager{{.*}} 
"--image=file={{.*}}.bc,triple=spirv64-unknown-unknown,arch=generic,kind=sycl"
 
 /// Check -fsycl-is-device is passed when compiling for the device.
 /// Check -fsycl-is-host is passed when compiling for host.
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp 
b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index b189cfee674dd3e..aa43b2f5f2a1b32 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -474,8 +474,6 @@ Expected clang(ArrayRef InputFiles, 
const ArgList &Args) {
 
   const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
   StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
-  if (Arch.empty())
-Arch = "native";
   // Create a new file to write the linked device image to. Assume that the
   // input filename already has the device and architecture.
   auto TempFileOrErr =
@@ -492,11 +490,14 @@ Expected clang(ArrayRef InputFiles, 
const ArgList &Args) {
   "-o",
   *TempFileOrErr,
   Args.MakeArgString("--target=" + Triple.getTriple()),
-  Triple.isAMDGPU() ? Args.MakeArgString("-mcpu=" + Arch)
-: Args.MakeArgString("-march=" + Arch),
-  Args.MakeArgString("-" + OptLevel),
   };
 
+  if (!Arch.empty())
+Triple.isAMDGPU() ? CmdArgs.push_back(Args.MakeArgString("-mcpu=" + Arch))
+  : CmdArgs.push_back(Args.MakeArgString("-march=" + 
Arch));
+
+  CmdArgs.push_back(Args.MakeArgString("-" + OptLevel));
+
   // Forward all of the `--offload-opt` and similar options to the device.
   CmdArgs.push_back("-flto");
   for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm))
@@ -826,8 +827,9 @@ DerivedArgList getLinkerArgs(ArrayRef Input,
 
   // Set the subarchitecture and target triple for this co

[clang] [CodeGen][ObjC] Invalidate cached ObjC class layout information after parsing ObjC class implementations if new ivars are added to the interface (PR #126591)

2025-02-11 Thread via cfe-commits


AZero13 wrote:

> How often does this appear in practice?  Normally, `NSObject` is implemented 
> in a shared library and it is completely valid in a non-fragile ABI for a 
> future version to have an extra ivar added (and, in the wild, we've seen 
> people do this for extra state in debug builds).
> 
> 
> 
> I implemented this optimisation for the GNU runtime 15-20 years ago, but we 
> stopped shipping it around ten or so years ago because the conditions that 
> made it sound were triggered only if people statically linked the Foundation 
> framework and did LTO, and that basically never happened.
> 
> 

WebKit does this so it is worth shipping for the Apple runtime at least.

Also there are cases in foundation where this happens

https://github.com/llvm/llvm-project/pull/126591
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Offload] Treat an empty packager architecture as 'generic' (PR #126655)

2025-02-11 Thread Michael Kruse via cfe-commits


https://github.com/Meinersbur edited 
https://github.com/llvm/llvm-project/pull/126655
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Joseph Huber via cfe-commits



@@ -68,7 +68,9 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
+  // Skip sanitize options passed from the HostTC. The decision to instrument
+  // device code is computed only by 'shouldSkipSanitizeOption'.
+  if (!DAL && DAL->hasArg(options::OPT_fsanitize_EQ))

jhuber6 wrote:

Silently ignoring this sounds pretty awful to me, but if that's what ROCm wants 
I guess that's the behavior.

https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Joseph Huber via cfe-commits



@@ -68,7 +68,9 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
+  // Skip sanitize options passed from the HostTC. The decision to instrument
+  // device code is computed only by 'shouldSkipSanitizeOption'.
+  if (!DAL && DAL->hasArg(options::OPT_fsanitize_EQ))

jhuber6 wrote:

I just don't really understand what problem this is meant to be solving. We 
don't need to do this for *any* of the other options that the device toolchain 
uses, so what's special about this one? If the user does `-fsanitize=address` 
it will show up in the device toolchain args. if it's not legal for some case, 
then emit an error. If the user doesn't want it then they can do `-Xarch-host 
-fsanitize=address`.

https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HLSL] Appropriately set function attribute optnone (PR #125937)

2025-02-11 Thread S. Bharadwaj Yadavalli via cfe-commits


https://github.com/bharadwajy updated 
https://github.com/llvm/llvm-project/pull/125937

>From 63a728dd7f5efff32a2f90608adfdc13d540f34f Mon Sep 17 00:00:00 2001
From: "S. Bharadwaj Yadavalli" 
Date: Tue, 4 Feb 2025 12:48:09 -0500
Subject: [PATCH 1/4] [HLSL] Set function optnone attribute appropriately

When optimization is disabled, set optnone attribute
  - for all module functions when targetting Library shaders
  - only for entry function when targetting non-Library shaders

Update tests in accordance with the change.
---
 clang/lib/CodeGen/CGHLSLRuntime.cpp   |  10 +
 .../CodeGenHLSL/GlobalConstructorLib.hlsl |   8 +-
 clang/test/CodeGenHLSL/GlobalDestructors.hlsl |   4 +-
 .../test/CodeGenHLSL/inline-constructors.hlsl |  16 +-
 clang/test/CodeGenHLSL/inline-functions.hlsl  | 189 +-
 .../CodeGenHLSL/this-assignment-overload.hlsl |   6 +-
 6 files changed, 174 insertions(+), 59 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp 
b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 2ce54cc3c52ef..24acc9a559be2 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -345,6 +345,9 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
 WaveSizeAttr->getPreferred());
 Fn->addFnAttr(WaveSizeKindStr, WaveSizeStr);
   }
+  if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+Fn->addFnAttr(llvm::Attribute::OptimizeNone);
+  }
   Fn->addFnAttr(llvm::Attribute::NoInline);
 }
 
@@ -446,6 +449,13 @@ void CGHLSLRuntime::setHLSLFunctionAttributes(const 
FunctionDecl *FD,
 const StringRef ExportAttrKindStr = "hlsl.export";
 Fn->addFnAttr(ExportAttrKindStr);
   }
+  llvm::Triple T(Fn->getParent()->getTargetTriple());
+  if (T.getEnvironment() == llvm::Triple::EnvironmentType::Library) {
+if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+  Fn->addFnAttr(llvm::Attribute::OptimizeNone);
+  Fn->addFnAttr(llvm::Attribute::NoInline);
+}
+  }
 }
 
 static void gatherFunctions(SmallVectorImpl &Fns, llvm::Module &M,
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl 
b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
index 09c44f6242c53..39d7c73e832a1 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm 
-disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o 
- | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O1 %s -o 
- | FileCheck %s --check-prefixes=CHECK,INLINE
 
 // Make sure global variable for ctors exist for lib profile.
 // CHECK:@llvm.global_ctors
@@ -31,12 +31,12 @@ void SecondEntry() {}
 // CHECK: ret void
 
 
-// Verify the constructor is alwaysinline
-// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
+// Verify the constructor is optnone
+// NOINLINE: ; Function Attrs: {{.*}} optnone
 // NOINLINE-NEXT: define linkonce_odr void @_ZN4hlsl8RWBufferIfEC2Ev({{.*}} 
[[CtorAttr:\#[0-9]+]]
 
 // NOINLINE: ; Function Attrs: {{.*}}alwaysinline
 // NOINLINE-NEXT: define internal void 
@_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[InitAttr:\#[0-9]+]]
 
 // NOINLINE-DAG: attributes [[InitAttr]] = {{.*}} alwaysinline
-// NOINLINE-DAG: attributes [[CtorAttr]] = {{.*}} alwaysinline
+// NOINLINE-DAG: attributes [[CtorAttr]] = {{.*}} optnone
diff --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl 
b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
index f98318601134b..8961e1a7e59cd 100644
--- a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm 
-disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm 
-disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o 
- | FileCheck %s --check-prefixes=INLINE,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o 
- | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O1 %s -o 
- | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O1 %s -o 
- | FileCheck %s --check-prefixes=INLINE,CHECK
 
 // Tests that constructors and destructors are appropriately generated for 
globals
 // and that their calls are inlined when AlwaysInline is run
diff --git a/clang/test/CodeGenHLSL/inline-constructors.hlsl 
b/clang/test/CodeGenHLSL/inline-constructors.hlsl
index b0d5a783fb372..298d7d4272678 100644
--- a/clang/test/CodeGenHLSL/inline-constructors.hlsl
+++ b/clang/test/CodeGenHLSL/in

[clang] [HLSL] Change clang Driver Options to not set CXXOperatorNames (PR #126758)

2025-02-11 Thread Farzon Lotfi via cfe-commits





farzonl wrote:

Is it ok to keep the tests as check-label? Tests are still passing with Label? 
Or should this change to `// CHECK: FunctionDecl {{.*}}  NAME void ()` Which is 
how it seems other tests are setup.

https://github.com/llvm/llvm-project/pull/126758
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HLSL] Change clang Driver Options to not set CXXOperatorNames (PR #126758)

2025-02-11 Thread Farzon Lotfi via cfe-commits


https://github.com/farzonl updated 
https://github.com/llvm/llvm-project/pull/126758

>From 3b7e458bfeb2abab799789d30ebaa4b214e4168e Mon Sep 17 00:00:00 2001
From: Farzon Lotfi 
Date: Tue, 11 Feb 2025 11:07:23 -0500
Subject: [PATCH 1/2] [HLSL] Change clang Driver Options to not set
 CXXOperatorNames

---
 clang/include/clang/Driver/Options.td |  2 +-
 .../use-cxx-alt-operator-names.hlsl   | 41 +++
 2 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 1cf62ab4661345..618815db284340 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3397,7 +3397,7 @@ def fno_objc_weak : Flag<["-"], "fno-objc-weak">, 
Group,
 def fno_omit_frame_pointer : Flag<["-"], "fno-omit-frame-pointer">, 
Group,
   Visibility<[ClangOption, FlangOption]>;
 defm operator_names : BoolFOption<"operator-names",
-  LangOpts<"CXXOperatorNames">, Default,
+  LangOpts<"CXXOperatorNames">, Default,
   NegFlag,
   PosFlag>;
diff --git a/clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl 
b/clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl
new file mode 100644
index 00..8ae253a9f3c06c
--- /dev/null
+++ b/clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl
@@ -0,0 +1,41 @@
+
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s  \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-unknown-vulkan-compute %s \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK-LABEL: and
+void and() {}
+
+// CHECK-LABEL: and_eq
+void and_eq() {}
+
+// CHECK-LABEL: bitand
+void bitand() {}
+
+// CHECK-LABEL: bitor
+void bitor() {}
+
+// CHECK-LABEL: compl
+void compl() {}
+
+// CHECK-LABEL: not
+void not() {}
+
+// CHECK-LABEL: not_eq
+void not_eq() {}
+
+// CHECK-LABEL: or
+void or() {}
+
+// CHECK-LABEL: or_eq
+void or_eq() {}
+
+// CHECK-LABEL: xor
+void xor() {}
+
+// CHECK-LABEL: xor_eq
+void xor_eq() {}

>From 68b78917adceda57e83e7ebfc68053ed30698635 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi 
Date: Tue, 11 Feb 2025 11:27:10 -0500
Subject: [PATCH 2/2] address pr feedback

---
 .../use-cxx-alt-operator-names.hlsl  | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)
 rename clang/test/{CodeGenHLSL => SemaHLSL}/use-cxx-alt-operator-names.hlsl 
(55%)

diff --git a/clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl 
b/clang/test/SemaHLSL/use-cxx-alt-operator-names.hlsl
similarity index 55%
rename from clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl
rename to clang/test/SemaHLSL/use-cxx-alt-operator-names.hlsl
index 8ae253a9f3c06c..2ee7ae2a1b1fe5 100644
--- a/clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl
+++ b/clang/test/SemaHLSL/use-cxx-alt-operator-names.hlsl
@@ -1,11 +1,4 @@
-
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s  \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
-
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN: spirv-unknown-vulkan-compute %s \
-// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -ast-dump | 
FileCheck %s
 
 // CHECK-LABEL: and
 void and() {}

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Amit Kumar Pandey via cfe-commits



@@ -68,7 +68,9 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
+  // Skip sanitize options passed from the HostTC. The decision to instrument
+  // device code is computed only by 'shouldSkipSanitizeOption'.
+  if (!DAL && DAL->hasArg(options::OPT_fsanitize_EQ))

ampandey-1995 wrote:

> Sorry I'm still confused. If the user cannot enable `-fsanitize=address` if 
> the GPU target doesn't support `xnack` then that should be separate driver 
> check / error. Unless we're just silently ignoring that?

Yes we are silently ignoring that and that ```-fsanitize=address``` is there 
for the host only, but for device it is skipped.



https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HLSL] Change clang Driver Options to not set CXXOperatorNames (PR #126758)

2025-02-11 Thread via cfe-commits


llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Farzon Lotfi (farzonl)


Changes

- Disable `CXXOperatorNames` for HLSL
- Add tests to confirm we can use the alt names as functions

---
Full diff: https://github.com/llvm/llvm-project/pull/126758.diff


2 Files Affected:

- (modified) clang/include/clang/Driver/Options.td (+1-1) 
- (added) clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl (+41) 


``diff
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 1cf62ab46613456..618815db2843404 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3397,7 +3397,7 @@ def fno_objc_weak : Flag<["-"], "fno-objc-weak">, 
Group,
 def fno_omit_frame_pointer : Flag<["-"], "fno-omit-frame-pointer">, 
Group,
   Visibility<[ClangOption, FlangOption]>;
 defm operator_names : BoolFOption<"operator-names",
-  LangOpts<"CXXOperatorNames">, Default,
+  LangOpts<"CXXOperatorNames">, Default,
   NegFlag,
   PosFlag>;
diff --git a/clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl 
b/clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl
new file mode 100644
index 000..8ae253a9f3c06cb
--- /dev/null
+++ b/clang/test/CodeGenHLSL/use-cxx-alt-operator-names.hlsl
@@ -0,0 +1,41 @@
+
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s  \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-unknown-vulkan-compute %s \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK-LABEL: and
+void and() {}
+
+// CHECK-LABEL: and_eq
+void and_eq() {}
+
+// CHECK-LABEL: bitand
+void bitand() {}
+
+// CHECK-LABEL: bitor
+void bitor() {}
+
+// CHECK-LABEL: compl
+void compl() {}
+
+// CHECK-LABEL: not
+void not() {}
+
+// CHECK-LABEL: not_eq
+void not_eq() {}
+
+// CHECK-LABEL: or
+void or() {}
+
+// CHECK-LABEL: or_eq
+void or_eq() {}
+
+// CHECK-LABEL: xor
+void xor() {}
+
+// CHECK-LABEL: xor_eq
+void xor_eq() {}

``




https://github.com/llvm/llvm-project/pull/126758
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Joseph Huber via cfe-commits



@@ -68,7 +68,9 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
+  // Skip sanitize options passed from the HostTC. The decision to instrument
+  // device code is computed only by 'shouldSkipSanitizeOption'.
+  if (!DAL && DAL->hasArg(options::OPT_fsanitize_EQ))

jhuber6 wrote:

I just don't understand the rationale for removing it from the argument list 
instead of just guarding wherever we look up this flag.

https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Enable -Wunique-object-duplication inside templated code (PR #125902)

2025-02-11 Thread Devon Loehr via cfe-commits



@@ -3669,6 +3669,7 @@ class Sema final : public SemaBase {
   /// cause problems if the variable is mutable, its initialization is
   /// effectful, or its address is taken.
   bool GloballyUniqueObjectMightBeAccidentallyDuplicated(const VarDecl *Dcl);
+  void DiagnoseDangerousUniqueObjectDuplication(const VarDecl *Dcl);

DKLoehr wrote:

Not sure I agree, but not too important so changed it.

https://github.com/llvm/llvm-project/pull/125902
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Amit Kumar Pandey via cfe-commits


https://github.com/ampandey-1995 edited 
https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Joseph Huber via cfe-commits



@@ -68,7 +68,9 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
+  // Skip sanitize options passed from the HostTC. The decision to instrument
+  // device code is computed only by 'shouldSkipSanitizeOption'.
+  if (!DAL && DAL->hasArg(options::OPT_fsanitize_EQ))

jhuber6 wrote:

So, is it prohibitively difficult to do something more akin to.

```c
if (Args.hasFlag(OPT_fsanitize_address, OPT_fno_sanitize, false) &&
Args.hasFlag(OPT_fgpu_sanitize, OPT_fno_gpu_sanitize, true)) {
  if (isSanitizerLegal(Args))
// something
  else
Diags.warn("incompatible, ignoring");
}
``` 

https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Joseph Huber via cfe-commits



@@ -68,7 +68,9 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
+  // Skip sanitize options passed from the HostTC. The decision to instrument
+  // device code is computed only by 'shouldSkipSanitizeOption'.
+  if (!DAL && DAL->hasArg(options::OPT_fsanitize_EQ))

jhuber6 wrote:

If the goal is to ignore it, why can't we just instead be like `if 
(TCArgs.hasFlag(...) && isValid)` on every use instead of hacking around it in 
the argument list.

https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread via cfe-commits



@@ -68,7 +68,9 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
+  // Skip sanitize options passed from the HostTC. The decision to instrument
+  // device code is computed only by 'shouldSkipSanitizeOption'.
+  if (!DAL && DAL->hasArg(options::OPT_fsanitize_EQ))

b-sumner wrote:

> Silently ignoring this sounds pretty awful to me, but if that's what ROCm 
> wants I guess that's the behavior.

We're not (supposed to be) silently ignoring it.  We should be producing a 
warning that the device code has not been instrumented.

https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Amit Kumar Pandey via cfe-commits



@@ -68,7 +68,9 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
+  // Skip sanitize options passed from the HostTC. The decision to instrument
+  // device code is computed only by 'shouldSkipSanitizeOption'.
+  if (!DAL && DAL->hasArg(options::OPT_fsanitize_EQ))

ampandey-1995 wrote:

> if it's not legal for some case, then emit an error. If the user doesn't want 
> it then they can do `-Xarch-host -fsanitize=address`.

We actually emit the warning not error, since it's legal for the host to be 
asan instrumented but for device it depends based on ```--offload-arch=``` 
containing :xnack+. If :xnack+ then allow it otherwise emit warning silently 
skip it.

https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Amit Kumar Pandey via cfe-commits



@@ -68,7 +68,9 @@ llvm::opt::DerivedArgList 
*AMDGPUOpenMPToolChain::TranslateArgs(
 Action::OffloadKind DeviceOffloadKind) const {
   DerivedArgList *DAL =
   HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
-  if (!DAL || Args.hasArg(options::OPT_fsanitize_EQ))
+  // Skip sanitize options passed from the HostTC. The decision to instrument
+  // device code is computed only by 'shouldSkipSanitizeOption'.
+  if (!DAL && DAL->hasArg(options::OPT_fsanitize_EQ))

ampandey-1995 wrote:

We actually in check it in this part of code 
https://github.com/llvm/llvm-project/blob/752ebec054f8b0bf7c221813724984f10096355a/clang/lib/Driver/ToolChains/AMDGPU.cpp#L1118

https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [lld] [llvm] Integrated Distributed ThinLTO (DTLTO): Initial support (PR #126654)

2025-02-11 Thread Tobias Hieta via cfe-commits


https://github.com/tru edited https://github.com/llvm/llvm-project/pull/126654
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [lld] [llvm] Integrated Distributed ThinLTO (DTLTO): Initial support (PR #126654)

2025-02-11 Thread Tobias Hieta via cfe-commits



@@ -116,7 +117,18 @@ BitcodeCompiler::BitcodeCompiler(COFFLinkerContext &c) : 
ctx(c) {
 
   // Initialize ltoObj.
   lto::ThinBackend backend;
-  if (ctx.config.thinLTOIndexOnly) {
+  if (!ctx.config.DTLTODistributor.empty()) {
+StringRef version = getenv("LLD_VERSION"); // For testing only.
+if (version.empty())
+  version = ctx.saver.save(getLLDVersion());
+backend = lto::createOutOfProcessThinBackend(

tru wrote:

When I read this code at first I was confused why it needed to have the 
arguments for the parallelization, I looked at the comment in LTO.h later and 
realized that the arguments where not used for the actual codegen, but for 
generating the index. But I wonder if we even need to pass that as an argument 
then - it reads to me like it's controlling the external process in some way.

https://github.com/llvm/llvm-project/pull/126654
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [lld] [llvm] Integrated Distributed ThinLTO (DTLTO): Initial support (PR #126654)

2025-02-11 Thread Tobias Hieta via cfe-commits


https://github.com/tru commented:

Hello! I had a quick glance at this and while I am no expert in the actual LTO 
internal code, I was totally able to understand how we could integrate this 
with our build system (fastbuild) so I am really happy you have decided to 
upstream this. I am planning of doing an internal prototype of this soon to 
understand how it would affect our applications.

That said - I hope that someone that's more qualified in the code to review the 
actual implementation.

https://github.com/llvm/llvm-project/pull/126654
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [lld] [llvm] Integrated Distributed ThinLTO (DTLTO): Initial support (PR #126654)

2025-02-11 Thread Tobias Hieta via cfe-commits



@@ -969,6 +969,10 @@ def Xlinker : Separate<["-"], "Xlinker">, 
Flags<[LinkerInput, RenderAsInput]>,
   Visibility<[ClangOption, CLOption, FlangOption]>,
   HelpText<"Pass  to the linker">, MetaVarName<"">,
   Group;
+def Xdist : Separate<["-"], "Xdist">, Flags<[LinkOption]>,

tru wrote:

Not sure I am a fan of Xdist here. Since the feature is called DTLTO why not 
Xdtlo, dist is a bit ambiguous. 

https://github.com/llvm/llvm-project/pull/126654
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [lld] [llvm] Integrated Distributed ThinLTO (DTLTO): Initial support (PR #126654)

2025-02-11 Thread Tobias Hieta via cfe-commits


tru wrote:

> > ⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️
> > You can test this locally with the following command:
> > View the diff from clang-format here.
> 
> I believe this failure is OK as I have followed the (non-standard) formatting 
> in the flagged file which the code for the other ThinLTO backends use.

I agree.

https://github.com/llvm/llvm-project/pull/126654
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Enable -Wunique-object-duplication inside templated code (PR #125902)

2025-02-11 Thread Devon Loehr via cfe-commits



@@ -3669,6 +3669,7 @@ class Sema final : public SemaBase {
   /// cause problems if the variable is mutable, its initialization is
   /// effectful, or its address is taken.
   bool GloballyUniqueObjectMightBeAccidentallyDuplicated(const VarDecl *Dcl);
+  void DiagnoseDangerousUniqueObjectDuplication(const VarDecl *Dcl);

DKLoehr wrote:

I think it's meaningful because it's possible for objects to be duplicated 
"harmlessly", in which case we don't warn because the only problem is a little 
extra memory usage. This is the case for constants whose initializers don't 
have side effects.

https://github.com/llvm/llvm-project/pull/125902
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Enable -Wunique-object-duplication inside templated code (PR #125902)

2025-02-11 Thread Devon Loehr via cfe-commits


https://github.com/DKLoehr updated 
https://github.com/llvm/llvm-project/pull/125902

>From d95344cf393bcf0a8580e81f4848f5f72c67a652 Mon Sep 17 00:00:00 2001
From: Devon Loehr 
Date: Tue, 4 Feb 2025 16:47:01 +
Subject: [PATCH 1/5] Move into separate function, call in
 CheckCompleteVariableDeclaration

---
 clang/include/clang/Sema/Sema.h |  1 +
 clang/lib/Sema/SemaDecl.cpp | 94 +
 2 files changed, 50 insertions(+), 45 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 1870d1271c556..4aa5d82a7b535 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -3669,6 +3669,7 @@ class Sema final : public SemaBase {
   /// cause problems if the variable is mutable, its initialization is
   /// effectful, or its address is taken.
   bool GloballyUniqueObjectMightBeAccidentallyDuplicated(const VarDecl *Dcl);
+  void DiagnoseDangerousUniqueObjectDuplication(const VarDecl *Dcl);
 
   /// AddInitializerToDecl - Adds the initializer Init to the
   /// declaration dcl. If DirectInit is true, this is C++ direct
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 74e0fcec2d911..eb8fbf424d264 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -13436,6 +13436,53 @@ bool 
Sema::GloballyUniqueObjectMightBeAccidentallyDuplicated(
   return true;
 }
 
+void Sema::DiagnoseDangerousUniqueObjectDuplication(const VarDecl* VD) {
+  // If this object has external linkage and hidden visibility, it might be
+  // duplicated when built into a shared library, which causes problems if it's
+  // mutable (since the copies won't be in sync) or its initialization has side
+  // effects (since it will run once per copy instead of once globally)
+  // FIXME: Windows uses dllexport/dllimport instead of visibility, and we 
don't
+  // handle that yet. Disable the warning on Windows for now.
+  // FIXME: Checking templates can cause false positives if the template in
+  // question is never instantiated (e.g. only specialized templates are used).
+  if (!Context.getTargetInfo().shouldDLLImportComdatSymbols() &&
+  !VD->isTemplated() &&
+  GloballyUniqueObjectMightBeAccidentallyDuplicated(VD)) {
+// Check mutability. For pointers, ensure that both the pointer and the
+// pointee are (recursively) const.
+QualType Type = VD->getType().getNonReferenceType();
+if (!Type.isConstant(VD->getASTContext())) {
+  Diag(VD->getLocation(), diag::warn_possible_object_duplication_mutable)
+  << VD;
+} else {
+  while (Type->isPointerType()) {
+Type = Type->getPointeeType();
+if (Type->isFunctionType())
+  break;
+if (!Type.isConstant(VD->getASTContext())) {
+  Diag(VD->getLocation(),
+   diag::warn_possible_object_duplication_mutable)
+  << VD;
+  break;
+}
+  }
+}
+
+// To keep false positives low, only warn if we're certain that the
+// initializer has side effects. Don't warn on operator new, since a 
mutable
+// pointer will trigger the previous warning, and an immutable pointer
+// getting duplicated just results in a little extra memory usage.
+const Expr *Init = VD->getAnyInitializer();
+if (Init &&
+Init->HasSideEffects(VD->getASTContext(),
+ /*IncludePossibleEffects=*/false) &&
+!isa(Init->IgnoreParenImpCasts())) {
+  Diag(Init->getExprLoc(), diag::warn_possible_object_duplication_init)
+  << VD;
+}
+  }
+}
+
 void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
   // If there is no declaration, there was an error parsing it.  Just ignore
   // the initializer.
@@ -14655,6 +14702,8 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl 
*var) {
 return;
   }
 
+  DiagnoseDangerousUniqueObjectDuplication(var);
+
   // Require the destructor.
   if (!type->isDependentType())
 if (const RecordType *recordType = baseType->getAs())
@@ -14842,51 +14891,6 @@ void Sema::FinalizeDeclaration(Decl *ThisDecl) {
   if (DC->getRedeclContext()->isFileContext() && VD->isExternallyVisible())
 AddPushedVisibilityAttribute(VD);
 
-  // If this object has external linkage and hidden visibility, it might be
-  // duplicated when built into a shared library, which causes problems if it's
-  // mutable (since the copies won't be in sync) or its initialization has side
-  // effects (since it will run once per copy instead of once globally)
-  // FIXME: Windows uses dllexport/dllimport instead of visibility, and we 
don't
-  // handle that yet. Disable the warning on Windows for now.
-  // FIXME: Checking templates can cause false positives if the template in
-  // question is never instantiated (e.g. only specialized templates are used).
-  if (!Context.getTargetInfo().shouldDLLImportComdatSymbols() &&
-  !VD->isTemplated() &&
-  GloballyUniqueObjectMig

[clang] [Clang] Add __has_target_builtin macro (PR #126324)

2025-02-11 Thread Nick Sarnie via cfe-commits


https://github.com/sarnex edited 
https://github.com/llvm/llvm-project/pull/126324
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [analyzer][docs] Document how to use perf and uftrace to debug performance issues (PR #126724)

2025-02-11 Thread Arseniy Zaostrovnykh via cfe-commits



@@ -45,3 +47,91 @@ Note: Both Chrome-tracing and speedscope tools might 
struggle with time traces a
 Luckily, in most cases the default max-steps boundary of 225 000 produces the 
traces of approximately that size
 for a single entry point.
 You can use ``-analyze-function=get_global_options`` together with 
``-ftime-trace`` to narrow down analysis to a specific entry point.
+
+
+Performance analysis using ``perf``
+===
+
+`Perf `_ is a tool for conducting 
sampling-based profiling.
+It's easy to start profiling, you only have 2 prerequisites.
+Build with ``-fno-omit-frame-pointer`` and debug info (``-g``).
+You can use release builds, but probably the easiest is to set the 
``CMAKE_BUILD_TYPE=RelWithDebInfo``
+along with ``CMAKE_CXX_FLAGS="-fno-omit-frame-pointer"`` when configuring 
``llvm``.
+Here is how to `get started `_ 
if you are in trouble.
+
+.. code-block:: bash
+   :caption: Running the Clang Static Analyzer through ``perf`` to gather 
samples of the execution.
+
+   # -F: Sampling frequency, use `-F max` for maximal frequency
+   # -g: Enable call-graph recording for both kernel and user space
+   perf record -F 99 -g --  clang -cc1 -analyze -verify 
clang/test/Analysis/string.c \
+ -analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection
+
+Once you have the profile data, you can use it to produce a Flame graph.
+A Flame graph is a visual representation of the stack frames of the samples.
+Common stack frame prefixes are squashed together, making up a wider bar.
+The wider the bar, the more time was spent under that particular stack frame,
+giving a sense of how the overall execution time was spent.
+
+Clone the `FlameGraph `_ git 
repository,
+as we will use some scripts from there to convert the ``perf`` samples into a 
Flame graph.
+It's also useful to check out Brendan Gregg's (the author of FlameGraph)
+`homepage `_.
+
+
+.. code-block:: bash
+   :caption: Converting the ``perf`` profile into a Flamegraph, then opening 
it in Firefox.
+
+   perf script | /path/to/FlameGraph/stackcollapse-perf.pl > perf.folded
+   /path/to/FlameGraph/flamegraph.pl perf.folded  > perf.svg
+   firefox perf.svg
+
+.. image:: ../images/flamegraph.png
+
+
+Performance analysis using ``uftrace``
+==
+
+`uftrace `_ 
is a great tool to generate rich profile data
+that you can use to focus and drill down into the timeline of your application.
+We will use it to generate Chromium trace JSON.
+In contrast to ``perf``, this approach statically instruments every function, 
so it should be more precise and thorough than the sampling-based approaches 
like ``perf``.
+In contrast to using `-ftime-trace`, functions don't need to opt-in to be 
profiled using ``llvm::TimeTraceScope``.

necto wrote:

```suggestion
In contrast to using ``-ftime-trace``, functions don't need to opt-in to be 
profiled using ``llvm::TimeTraceScope``.
```

https://github.com/llvm/llvm-project/pull/126724
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [analyzer][docs] Document how to use perf and uftrace to debug performance issues (PR #126724)

2025-02-11 Thread Arseniy Zaostrovnykh via cfe-commits



@@ -45,3 +47,91 @@ Note: Both Chrome-tracing and speedscope tools might 
struggle with time traces a
 Luckily, in most cases the default max-steps boundary of 225 000 produces the 
traces of approximately that size
 for a single entry point.
 You can use ``-analyze-function=get_global_options`` together with 
``-ftime-trace`` to narrow down analysis to a specific entry point.
+
+
+Performance analysis using ``perf``
+===
+
+`Perf `_ is a tool for conducting 
sampling-based profiling.
+It's easy to start profiling, you only have 2 prerequisites.
+Build with ``-fno-omit-frame-pointer`` and debug info (``-g``).
+You can use release builds, but probably the easiest is to set the 
``CMAKE_BUILD_TYPE=RelWithDebInfo``
+along with ``CMAKE_CXX_FLAGS="-fno-omit-frame-pointer"`` when configuring 
``llvm``.
+Here is how to `get started `_ 
if you are in trouble.
+
+.. code-block:: bash
+   :caption: Running the Clang Static Analyzer through ``perf`` to gather 
samples of the execution.
+
+   # -F: Sampling frequency, use `-F max` for maximal frequency
+   # -g: Enable call-graph recording for both kernel and user space
+   perf record -F 99 -g --  clang -cc1 -analyze -verify 
clang/test/Analysis/string.c \
+ -analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection
+
+Once you have the profile data, you can use it to produce a Flame graph.
+A Flame graph is a visual representation of the stack frames of the samples.
+Common stack frame prefixes are squashed together, making up a wider bar.
+The wider the bar, the more time was spent under that particular stack frame,
+giving a sense of how the overall execution time was spent.
+
+Clone the `FlameGraph `_ git 
repository,
+as we will use some scripts from there to convert the ``perf`` samples into a 
Flame graph.
+It's also useful to check out Brendan Gregg's (the author of FlameGraph)
+`homepage `_.
+
+
+.. code-block:: bash
+   :caption: Converting the ``perf`` profile into a Flamegraph, then opening 
it in Firefox.
+
+   perf script | /path/to/FlameGraph/stackcollapse-perf.pl > perf.folded
+   /path/to/FlameGraph/flamegraph.pl perf.folded  > perf.svg
+   firefox perf.svg
+
+.. image:: ../images/flamegraph.png
+
+
+Performance analysis using ``uftrace``
+==
+
+`uftrace `_ 
is a great tool to generate rich profile data
+that you can use to focus and drill down into the timeline of your application.
+We will use it to generate Chromium trace JSON.
+In contrast to ``perf``, this approach statically instruments every function, 
so it should be more precise and thorough than the sampling-based approaches 
like ``perf``.
+In contrast to using `-ftime-trace`, functions don't need to opt-in to be 
profiled using ``llvm::TimeTraceScope``.
+All functions are profiled due to static instrumentation.

necto wrote:

`-ftime-trace` also uses static instrumentation, but *manual* one
```suggestion
All functions are profiled due to automatic static instrumentation.
```

https://github.com/llvm/llvm-project/pull/126724
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [analyzer][docs] Document how to use perf and uftrace to debug performance issues (PR #126724)

2025-02-11 Thread Arseniy Zaostrovnykh via cfe-commits


https://github.com/necto approved this pull request.


https://github.com/llvm/llvm-project/pull/126724
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 2f54223 - [Docs] Fix typo in TypeSanitizer.rst "tale" -> "table" (NFC) (#126721)

2025-02-11 Thread via cfe-commits


Author: sitrin
Date: 2025-02-11T08:58:56-08:00
New Revision: 2f54223247e8f9f0fc006b944de8351f376814af

URL: 
https://github.com/llvm/llvm-project/commit/2f54223247e8f9f0fc006b944de8351f376814af
DIFF: 
https://github.com/llvm/llvm-project/commit/2f54223247e8f9f0fc006b944de8351f376814af.diff

LOG: [Docs] Fix typo in TypeSanitizer.rst "tale" -> "table" (NFC) (#126721)

The word `table` is now in place of the word `tale`.

Fixes #126719.

Co-authored-by: sitrin 

Added: 


Modified: 
clang/docs/TypeSanitizer.rst

Removed: 




diff  --git a/clang/docs/TypeSanitizer.rst b/clang/docs/TypeSanitizer.rst
index 4d1dfc23a6c51..3c683a6c24bb4 100644
--- a/clang/docs/TypeSanitizer.rst
+++ b/clang/docs/TypeSanitizer.rst
@@ -27,7 +27,7 @@ reduce these impacts.
 The TypeSanitizer Algorithm
 ===
 For each TBAA type-access descriptor, encoded in LLVM IR using TBAA Metadata, 
the instrumentation 
-pass generates descriptor tales. Thus there is a unique pointer to each type 
(and access descriptor).
+pass generates descriptor tables. Thus there is a unique pointer to each type 
(and access descriptor).
 These tables are comdat (except for anonymous-namespace types), so the pointer 
values are unique 
 across the program.
 



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Docs] Fix typo in TypeSanitizer.rst "tale" -> "table" (NFC) (PR #126721)

2025-02-11 Thread Jon Roelofs via cfe-commits


https://github.com/jroelofs closed 
https://github.com/llvm/llvm-project/pull/126721
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Docs] Fix typo in TypeSanitizer.rst "tale" -> "table" (NFC) (PR #126721)

2025-02-11 Thread via cfe-commits


github-actions[bot] wrote:



@sitrin Congratulations on having your first Pull Request (PR) merged into the 
LLVM Project!

Your changes will be combined with recent changes from other authors, then 
tested by our [build bots](https://lab.llvm.org/buildbot/). If there is a 
problem with a build, you may receive a report in an email or a comment on this 
PR.

Please check whether problems have been caused by your change specifically, as 
the builds can include changes from many authors. It is not uncommon for your 
change to be included in a build that fails due to someone else's changes, or 
infrastructure issues.

How to do this, and the rest of the post-merge process, is covered in detail 
[here](https://llvm.org/docs/MyFirstTypoFix.html#myfirsttypofix-issues-after-landing-your-pr).

If your change does cause a problem, it may be reverted, or you can revert it 
yourself. This is a normal part of [LLVM 
development](https://llvm.org/docs/DeveloperPolicy.html#patch-reversion-policy).
 You can fix your changes and open a new PR to merge them again.

If you don't get any reports, no action is required from you. Your changes are 
working as expected, well done!


https://github.com/llvm/llvm-project/pull/126721
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Docs] Fix typo in TypeSanitizer.rst "tale" -> "table" (NFC) (PR #126721)

2025-02-11 Thread Jon Roelofs via cfe-commits


https://github.com/jroelofs approved this pull request.


https://github.com/llvm/llvm-project/pull/126721
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HLSL] Appropriately set function attribute optnone (PR #125937)

2025-02-11 Thread S. Bharadwaj Yadavalli via cfe-commits


https://github.com/bharadwajy edited 
https://github.com/llvm/llvm-project/pull/125937
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[libclc] [libclc] Move sign to the CLC builtins library (PR #115699)

2025-02-11 Thread Fraser Cormack via cfe-commits



@@ -0,0 +1,21 @@
+// TYPE sign(TYPE x) {
+//   if (isnan(x)) {
+// return 0.0F;
+//   }
+//   if (x > 0.0F) {
+// return 1.0F;
+//   }
+//   if (x < 0.0F) {
+// return -1.0F;
+//   }
+//   return x; /* -0.0 or +0.0 */
+// }
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_sign(__CLC_GENTYPE x) {
+  __CLC_BIT_INTN x_isnan = __clc_isnan(x);
+  __CLC_BIT_INTN x_isgreater_zero = x > __CLC_FP_LIT(0.0);
+  __CLC_BIT_INTN x_isless_zero = x < __CLC_FP_LIT(0.0);
+  __CLC_GENTYPE sel0 = __clc_select(x, __CLC_FP_LIT(1.0), x_isgreater_zero);
+  __CLC_GENTYPE sel1 = __clc_select(sel0, __CLC_FP_LIT(-1.0), x_isless_zero);
+  __CLC_GENTYPE sel2 = __clc_select(sel1, __CLC_FP_LIT(0.0), x_isnan);
+  return sel2;
+}

frasercrmck wrote:

Makes sense, yeah. I'll go with your suggestion. We can fix it up later if 
needed.

https://github.com/llvm/llvm-project/pull/115699
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Enable -Wunique-object-duplication inside templated code (PR #125902)

2025-02-11 Thread Hans Wennborg via cfe-commits



@@ -154,4 +154,89 @@ namespace GlobalTest {
   };
 
   inline float Test::disallowedStaticMember2 = 2.3; // hidden-warning 
{{'disallowedStaticMember2' may be duplicated when built into a shared library: 
it is mutable, has hidden visibility, and external linkage}}
-} // namespace GlobalTest
\ No newline at end of file
+} // namespace GlobalTest
+
+/**
+ * Case three: Inside templates
+ 
**/
+
+namespace TemplateTest {
+
+template 
+int disallowedTemplate1 = 0; // hidden-warning {{'disallowedTemplate1' 
may be duplicated when built into a shared library: it is mutable, has hidden 
visibility, and external linkage}}
+
+template int disallowedTemplate1; // hidden-note {{in instantiation of}}
+
+
+// Should work for implicit instantiation as well
+template 
+int disallowedTemplate2 = 0; // hidden-warning {{'disallowedTemplate2' 
may be duplicated when built into a shared library: it is mutable, has hidden 
visibility, and external linkage}}
+
+int implicit_instantiate() {
+  return disallowedTemplate2; // hidden-note {{in instantiation of}}
+}
+
+
+// Ensure we only get warnings for templates that are actually instantiated
+template 
+int maybeAllowedTemplate = 0; // Not instantiated, so no warning here
+
+template 
+int maybeAllowedTemplate = 1; // hidden-warning 
{{'maybeAllowedTemplate' may be duplicated when built into a shared 
library: it is mutable, has hidden visibility, and external linkage}}
+
+template <>
+int maybeAllowedTemplate = 2; // hidden-warning 
{{'maybeAllowedTemplate' may be duplicated when built into a shared 
library: it is mutable, has hidden visibility, and external linkage}}
+
+template int maybeAllowedTemplate; // hidden-note {{in instantiation of}}
+
+
+
+// Should work the same for static class members
+template 

zmodem wrote:

ultra nit: for the template parameter you used `typename` above and `class` 
here. It doesn't really matter, but I'd pick one throughout for consistency.

https://github.com/llvm/llvm-project/pull/125902
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Enable -Wunique-object-duplication inside templated code (PR #125902)

2025-02-11 Thread Hans Wennborg via cfe-commits


https://github.com/zmodem edited 
https://github.com/llvm/llvm-project/pull/125902
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Enable -Wunique-object-duplication inside templated code (PR #125902)

2025-02-11 Thread Hans Wennborg via cfe-commits



@@ -3669,6 +3669,7 @@ class Sema final : public SemaBase {
   /// cause problems if the variable is mutable, its initialization is
   /// effectful, or its address is taken.
   bool GloballyUniqueObjectMightBeAccidentallyDuplicated(const VarDecl *Dcl);
+  void DiagnoseDangerousUniqueObjectDuplication(const VarDecl *Dcl);

zmodem wrote:

nit: The "Dangerous" part seems redundant.

https://github.com/llvm/llvm-project/pull/125902
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Enable -Wunique-object-duplication inside templated code (PR #125902)

2025-02-11 Thread Hans Wennborg via cfe-commits


https://github.com/zmodem commented:

Seems reasonable to me. Just a few very minor nits.

https://github.com/llvm/llvm-project/pull/125902
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Matt Arsenault via cfe-commits



@@ -1014,17 +1014,20 @@ RocmInstallationDetector::getCommonBitcodeLibs(
 bool isOpenMP = false) const {
   llvm::SmallVector BCLibs;
 
-  auto GPUSanEnabled = [GPUSan]() { return std::get(GPUSan); };
+  // GPU Sanitizer currently only supports ASan and is enabled through host
+  // ASan.
+  auto GPUSanEnabled = [GPUSan]() {
+return std::get(GPUSan) &&
+   std::get(GPUSan).needsAsanRt();
+  };

arsenm wrote:

Why is this using a lambda and capture for a simple and? I'm also surprised 
`get` works instead of `get<0>`. 

https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [analyzer][docs] Document how to use perf and uftrace to debug performance issues (PR #126520)

2025-02-11 Thread Balazs Benics via cfe-commits



@@ -45,3 +48,91 @@ Note: Both Chrome-tracing and speedscope tools might 
struggle with time traces a
 Luckily, in most cases the default max-steps boundary of 225 000 produces the 
traces of approximately that size
 for a single entry point.
 You can use ``-analyze-function=get_global_options`` together with 
``-ftime-trace`` to narrow down analysis to a specific entry point.
+
+
+Performance analysis using ``perf``
+===
+
+`Perf `_ is an excellent tool for 
sampling-based profiling of an application.

steakhal wrote:

Rephrased into `Perf is a tool for conducting sampling-based profiling.`
Fixed in aa5a2855ab041f57c0fe96870fa596970936681e.

https://github.com/llvm/llvm-project/pull/126520
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [analyzer][docs] Document how to use perf and uftrace to debug performance issues (PR #126520)

2025-02-11 Thread Balazs Benics via cfe-commits



@@ -45,3 +48,91 @@ Note: Both Chrome-tracing and speedscope tools might 
struggle with time traces a
 Luckily, in most cases the default max-steps boundary of 225 000 produces the 
traces of approximately that size
 for a single entry point.
 You can use ``-analyze-function=get_global_options`` together with 
``-ftime-trace`` to narrow down analysis to a specific entry point.
+
+
+Performance analysis using ``perf``
+===
+
+`Perf `_ is an excellent tool for 
sampling-based profiling of an application.
+It's easy to start profiling, you only have 2 prerequisites.
+Build with ``-fno-omit-frame-pointer`` and debug info (``-g``).
+You can use release builds, but probably the easiest is to set the 
``CMAKE_BUILD_TYPE=RelWithDebInfo``
+along with ``CMAKE_CXX_FLAGS="-fno-omit-frame-pointer"`` when configuring 
``llvm``.
+Here is how to `get started `_ 
if you are in trouble.
+
+.. code-block:: bash
+   :caption: Running the Clang Static Analyzer through ``perf`` to gather 
samples of the execution.
+
+   # -F: Sampling frequency, use `-F max` for maximal frequency
+   # -g: Enable call-graph recording for both kernel and user space
+   perf record -F 99 -g --  clang -cc1 -nostdsysteminc -analyze 
-analyzer-constraints=range \
+ -setup-static-analyzer 
-analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection \
+ -verify ./clang/test/Analysis/string.c
+
+Once you have the profile data, you can use it to produce a Flame graph.
+A Flame graph is a visual representation of the stack frames of the samples.
+Common stack frame prefixes are squashed together, making up a wider bar.
+The wider the bar, the more time was spent under that particular stack frame,
+giving a sense of how the overall execution time was spent.
+
+Clone the `FlameGraph `_ git 
repository,
+as we will use some scripts from there to convert the ``perf`` samples into a 
Flame graph.
+It's also useful to check out Brendan Gregg's (the author of FlameGraph)
+`homepage `_.
+
+
+.. code-block:: bash
+   :caption: Converting the ``perf`` profile into a Flamegraph, then opening 
it in Firefox.
+
+   perf script | /path/to/FlameGraph/stackcollapse-perf.pl > perf.folded
+   /path/to/FlameGraph/flamegraph.pl perf.folded  > perf.svg
+   firefox perf.svg
+
+.. image:: ../images/flamegraph.svg
+
+
+Performance analysis using ``uftrace``
+==
+
+`uftrace `_ 
is a great tool to generate rich profile data
+that you could use to focus and drill down into the timeline of your 
application.
+We will use it to generate Chromium trace JSON.
+In contrast to ``perf``, this approach statically instruments every function, 
so it should be more precise and through than the sampling-based approaches 
like ``perf``.
+In contrast to using `-ftime-trace`, functions don't need to opt-in to be 
profiled using ``llvm::TimeTraceScope``.
+All functions are profiled due to static instrumentation.
+
+There is only one prerequisite to use this tool.
+You need to build the binary you are about to instrument using ``-pg`` or 
``-finstrument-functions``.
+This will make it run substantially slower but allows rich instrumentation.
+
+.. code-block:: bash
+   :caption: Recording with ``uftrace``, then dumping the result as a Chrome 
trace JSON.
+
+   uftrace record clang -cc1 -nostdsysteminc -analyze 
-analyzer-constraints=range \
+ -setup-static-analyzer 
-analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection \
+ -verify ./clang/test/Analysis/string.c
+   uftrace dump --filter=".*::AnalysisConsumer::HandleTranslationUnit" 
--time-filter=300 --chrome > trace.json
+
+.. image:: ../images/uftrace_detailed.png
+
+In this picture, you can see the functions below the Static Analyzer's entry 
point, which takes at least 300 nanoseconds to run, visualized by Chrome's 
``about:tracing`` page
+You can also see how deep function calls we may have due to AST visitors.
+
+Using different filters can reduce the number of functions to record.
+For the `common options 
`_,
 refer to the ``uftrace`` documentation.
+
+Similar filters could be applied for dumping too. That way you can reuse the 
same (detailed)
+recording to selectively focus on some special part using a refinement of the 
filter flags.
+Remember, the trace JSON needs to fit into Chrome's ``about:tracing`` or 
`speedscope `_,
+thus it needs to be of a limited size.
+In that case though, every dump operation would need to sieve through the 
whole recording if called repeatedly.

steakhal wrote:

Accepted as-is in 196dd507b48d68cf141

[clang] [analyzer][docs] Document how to use perf and uftrace to debug performance issues (PR #126520)

2025-02-11 Thread Balazs Benics via cfe-commits



@@ -45,3 +48,91 @@ Note: Both Chrome-tracing and speedscope tools might 
struggle with time traces a
 Luckily, in most cases the default max-steps boundary of 225 000 produces the 
traces of approximately that size
 for a single entry point.
 You can use ``-analyze-function=get_global_options`` together with 
``-ftime-trace`` to narrow down analysis to a specific entry point.
+
+
+Performance analysis using ``perf``
+===
+
+`Perf `_ is an excellent tool for 
sampling-based profiling of an application.
+It's easy to start profiling, you only have 2 prerequisites.
+Build with ``-fno-omit-frame-pointer`` and debug info (``-g``).
+You can use release builds, but probably the easiest is to set the 
``CMAKE_BUILD_TYPE=RelWithDebInfo``
+along with ``CMAKE_CXX_FLAGS="-fno-omit-frame-pointer"`` when configuring 
``llvm``.
+Here is how to `get started `_ 
if you are in trouble.
+
+.. code-block:: bash
+   :caption: Running the Clang Static Analyzer through ``perf`` to gather 
samples of the execution.
+
+   # -F: Sampling frequency, use `-F max` for maximal frequency
+   # -g: Enable call-graph recording for both kernel and user space
+   perf record -F 99 -g --  clang -cc1 -nostdsysteminc -analyze 
-analyzer-constraints=range \
+ -setup-static-analyzer 
-analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection \
+ -verify ./clang/test/Analysis/string.c
+
+Once you have the profile data, you can use it to produce a Flame graph.
+A Flame graph is a visual representation of the stack frames of the samples.
+Common stack frame prefixes are squashed together, making up a wider bar.
+The wider the bar, the more time was spent under that particular stack frame,
+giving a sense of how the overall execution time was spent.
+
+Clone the `FlameGraph `_ git 
repository,
+as we will use some scripts from there to convert the ``perf`` samples into a 
Flame graph.
+It's also useful to check out Brendan Gregg's (the author of FlameGraph)
+`homepage `_.
+
+
+.. code-block:: bash
+   :caption: Converting the ``perf`` profile into a Flamegraph, then opening 
it in Firefox.
+
+   perf script | /path/to/FlameGraph/stackcollapse-perf.pl > perf.folded
+   /path/to/FlameGraph/flamegraph.pl perf.folded  > perf.svg
+   firefox perf.svg
+
+.. image:: ../images/flamegraph.svg
+
+
+Performance analysis using ``uftrace``
+==
+
+`uftrace `_ 
is a great tool to generate rich profile data
+that you could use to focus and drill down into the timeline of your 
application.
+We will use it to generate Chromium trace JSON.
+In contrast to ``perf``, this approach statically instruments every function, 
so it should be more precise and through than the sampling-based approaches 
like ``perf``.
+In contrast to using `-ftime-trace`, functions don't need to opt-in to be 
profiled using ``llvm::TimeTraceScope``.
+All functions are profiled due to static instrumentation.
+
+There is only one prerequisite to use this tool.
+You need to build the binary you are about to instrument using ``-pg`` or 
``-finstrument-functions``.
+This will make it run substantially slower but allows rich instrumentation.
+
+.. code-block:: bash
+   :caption: Recording with ``uftrace``, then dumping the result as a Chrome 
trace JSON.
+
+   uftrace record clang -cc1 -nostdsysteminc -analyze 
-analyzer-constraints=range \
+ -setup-static-analyzer 
-analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection \
+ -verify ./clang/test/Analysis/string.c
+   uftrace dump --filter=".*::AnalysisConsumer::HandleTranslationUnit" 
--time-filter=300 --chrome > trace.json
+
+.. image:: ../images/uftrace_detailed.png
+
+In this picture, you can see the functions below the Static Analyzer's entry 
point, which takes at least 300 nanoseconds to run, visualized by Chrome's 
``about:tracing`` page
+You can also see how deep function calls we may have due to AST visitors.
+
+Using different filters can reduce the number of functions to record.
+For the `common options 
`_,
 refer to the ``uftrace`` documentation.
+
+Similar filters could be applied for dumping too. That way you can reuse the 
same (detailed)
+recording to selectively focus on some special part using a refinement of the 
filter flags.
+Remember, the trace JSON needs to fit into Chrome's ``about:tracing`` or 
`speedscope `_,
+thus it needs to be of a limited size.
+In that case though, every dump operation would need to sieve through the 
whole recording if called repeatedly.
+
+If the trace JSON is still too large to load, have a look at the dum

[clang] Revert "[analyzer] Remove some false negatives in StackAddrEscapeChec… (PR #126614)

2025-02-11 Thread Gábor Horváth via cfe-commits


Xazax-hun wrote:

Ah, sorry! Will include the log next time. 

https://github.com/llvm/llvm-project/pull/126614
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Amit Kumar Pandey via cfe-commits


https://github.com/ampandey-1995 edited 
https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Reapply "[Driver][ROCm][OpenMP] Fix default ockl linking for OpenMP."… (PR #126671)

2025-02-11 Thread Amit Kumar Pandey via cfe-commits


https://github.com/ampandey-1995 edited 
https://github.com/llvm/llvm-project/pull/126671
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [analyzer][docs] Document how to use perf and uftrace to debug performance issues (PR #126724)

2025-02-11 Thread via cfe-commits


llvmbot wrote:




@llvm/pr-subscribers-clang-static-analyzer-1

Author: Balazs Benics (steakhal)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/126724.diff


3 Files Affected:

- (modified) clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst 
(+93-3) 
- (added) clang/docs/analyzer/images/flamegraph.png () 
- (added) clang/docs/analyzer/images/uftrace_detailed.png () 


``diff
diff --git a/clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst 
b/clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst
index 3ee6e117a846528..6d1a5f126223d93 100644
--- a/clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst
+++ b/clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst
@@ -5,6 +5,9 @@ Performance Investigation
 Multiple factors contribute to the time it takes to analyze a file with Clang 
Static Analyzer.
 A translation unit contains multiple entry points, each of which take multiple 
steps to analyze.
 
+Performance analysis using ``-ftime-trace``
+===
+
 You can add the ``-ftime-trace=file.json`` option to break down the analysis 
time into individual entry points and steps within each entry point.
 You can explore the generated JSON file in a Chromium browser using the 
``chrome://tracing`` URL,
 or using `speedscope `_.
@@ -19,9 +22,8 @@ Here is an example of a time trace produced with
 .. code-block:: bash
:caption: Clang Static Analyzer invocation to generate a time trace of 
string.c analysis.
 
-   clang -cc1 -nostdsysteminc -analyze -analyzer-constraints=range \
- -setup-static-analyzer 
-analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection \
- -verify ./clang/test/Analysis/string.c \
+   clang -cc1 -analyze -verify clang/test/Analysis/string.c \
+ -analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection \
  -ftime-trace=trace.json -ftime-trace-granularity=1
 
 .. image:: ../images/speedscope.png
@@ -45,3 +47,91 @@ Note: Both Chrome-tracing and speedscope tools might 
struggle with time traces a
 Luckily, in most cases the default max-steps boundary of 225 000 produces the 
traces of approximately that size
 for a single entry point.
 You can use ``-analyze-function=get_global_options`` together with 
``-ftime-trace`` to narrow down analysis to a specific entry point.
+
+
+Performance analysis using ``perf``
+===
+
+`Perf `_ is a tool for conducting 
sampling-based profiling.
+It's easy to start profiling, you only have 2 prerequisites.
+Build with ``-fno-omit-frame-pointer`` and debug info (``-g``).
+You can use release builds, but probably the easiest is to set the 
``CMAKE_BUILD_TYPE=RelWithDebInfo``
+along with ``CMAKE_CXX_FLAGS="-fno-omit-frame-pointer"`` when configuring 
``llvm``.
+Here is how to `get started `_ 
if you are in trouble.
+
+.. code-block:: bash
+   :caption: Running the Clang Static Analyzer through ``perf`` to gather 
samples of the execution.
+
+   # -F: Sampling frequency, use `-F max` for maximal frequency
+   # -g: Enable call-graph recording for both kernel and user space
+   perf record -F 99 -g --  clang -cc1 -analyze -verify 
clang/test/Analysis/string.c \
+ -analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection
+
+Once you have the profile data, you can use it to produce a Flame graph.
+A Flame graph is a visual representation of the stack frames of the samples.
+Common stack frame prefixes are squashed together, making up a wider bar.
+The wider the bar, the more time was spent under that particular stack frame,
+giving a sense of how the overall execution time was spent.
+
+Clone the `FlameGraph `_ git 
repository,
+as we will use some scripts from there to convert the ``perf`` samples into a 
Flame graph.
+It's also useful to check out Brendan Gregg's (the author of FlameGraph)
+`homepage `_.
+
+
+.. code-block:: bash
+   :caption: Converting the ``perf`` profile into a Flamegraph, then opening 
it in Firefox.
+
+   perf script | /path/to/FlameGraph/stackcollapse-perf.pl > perf.folded
+   /path/to/FlameGraph/flamegraph.pl perf.folded  > perf.svg
+   firefox perf.svg
+
+.. image:: ../images/flamegraph.svg
+
+
+Performance analysis using ``uftrace``
+==
+
+`uftrace `_ 
is a great tool to generate rich profile data
+that you can use to focus and drill down into the timeline of your application.
+We will use it to generate Chromium trace JSON.
+In contrast to ``perf``, this approach statically instruments every function, 
so it should be more precise and thorough than the sampling-based approaches 
like ``perf``.
+In contrast to using

[clang] 560149b - [analyzer] Reapply recent stack addr escape checker changes + buildbot fix (#126620)

2025-02-11 Thread via cfe-commits


Author: Michael Flanders
Date: 2025-02-11T12:54:30Z
New Revision: 560149b5e3c891c64899e9912e29467a69dc3a4c

URL: 
https://github.com/llvm/llvm-project/commit/560149b5e3c891c64899e9912e29467a69dc3a4c
DIFF: 
https://github.com/llvm/llvm-project/commit/560149b5e3c891c64899e9912e29467a69dc3a4c.diff

LOG: [analyzer] Reapply recent stack addr escape checker changes + buildbot fix 
(#126620)

Reapplying changes from https://github.com/llvm/llvm-project/pull/125638
after buildbot failures.

Buildbot failures fixed in 029e7e98dc9956086adc6c1dfb0c655a273fbee6,
latest commit on this PR. It was a problem with a declared class member
with same name as its type. Sorry!

Added: 


Modified: 
clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
clang/test/Analysis/copy-elision.cpp
clang/test/Analysis/stack-addr-ps.cpp
clang/test/Analysis/stackaddrleak.c
clang/test/Analysis/stackaddrleak.cpp

Removed: 




diff  --git a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
index f4de3b500499c4..c9df15ceb3b409 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
@@ -54,8 +54,8 @@ class StackAddrEscapeChecker
   CheckerContext &C) const;
   void checkAsyncExecutedBlockCaptures(const BlockDataRegion &B,
CheckerContext &C) const;
-  void EmitStackError(CheckerContext &C, const MemRegion *R,
-  const Expr *RetE) const;
+  void EmitReturnLeakError(CheckerContext &C, const MemRegion *LeakedRegion,
+   const Expr *RetE) const;
   bool isSemaphoreCaptured(const BlockDecl &B) const;
   static SourceRange genName(raw_ostream &os, const MemRegion *R,
  ASTContext &Ctx);
@@ -147,9 +147,22 @@ StackAddrEscapeChecker::getCapturedStackRegions(const 
BlockDataRegion &B,
   return Regions;
 }
 
-void StackAddrEscapeChecker::EmitStackError(CheckerContext &C,
-const MemRegion *R,
-const Expr *RetE) const {
+static void EmitReturnedAsPartOfError(llvm::raw_ostream &OS, SVal ReturnedVal,
+  const MemRegion *LeakedRegion) {
+  if (const MemRegion *ReturnedRegion = ReturnedVal.getAsRegion()) {
+if (isa(ReturnedRegion)) {
+  OS << " is captured by a returned block";
+  return;
+}
+  }
+
+  // Generic message
+  OS << " returned to caller";
+}
+
+void StackAddrEscapeChecker::EmitReturnLeakError(CheckerContext &C,
+ const MemRegion *R,
+ const Expr *RetE) const {
   ExplodedNode *N = C.generateNonFatalErrorNode();
   if (!N)
 return;
@@ -157,11 +170,15 @@ void 
StackAddrEscapeChecker::EmitStackError(CheckerContext &C,
 BT_returnstack = std::make_unique(
 CheckNames[CK_StackAddrEscapeChecker],
 "Return of address to stack-allocated memory");
+
   // Generate a report for this bug.
   SmallString<128> buf;
   llvm::raw_svector_ostream os(buf);
+
+  // Error message formatting
   SourceRange range = genName(os, R, C.getASTContext());
-  os << " returned to caller";
+  EmitReturnedAsPartOfError(os, C.getSVal(RetE), R);
+
   auto report =
   std::make_unique(*BT_returnstack, os.str(), N);
   report->addRange(RetE->getSourceRange());
@@ -209,30 +226,6 @@ void 
StackAddrEscapeChecker::checkAsyncExecutedBlockCaptures(
   }
 }
 
-void StackAddrEscapeChecker::checkReturnedBlockCaptures(
-const BlockDataRegion &B, CheckerContext &C) const {
-  for (const MemRegion *Region : getCapturedStackRegions(B, C)) {
-if (isNotInCurrentFrame(Region, C))
-  continue;
-ExplodedNode *N = C.generateNonFatalErrorNode();
-if (!N)
-  continue;
-if (!BT_capturedstackret)
-  BT_capturedstackret = std::make_unique(
-  CheckNames[CK_StackAddrEscapeChecker],
-  "Address of stack-allocated memory is captured");
-SmallString<128> Buf;
-llvm::raw_svector_ostream Out(Buf);
-SourceRange Range = genName(Out, Region, C.getASTContext());
-Out << " is captured by a returned block";
-auto Report = 
std::make_unique(*BT_capturedstackret,
-   Out.str(), N);
-if (Range.isValid())
-  Report->addRange(Range);
-C.emitReport(std::move(Report));
-  }
-}
-
 void StackAddrEscapeChecker::checkPreCall(const CallEvent &Call,
   CheckerContext &C) const {
   if (!ChecksEnabled[CK_StackAddrAsyncEscapeChecker])
@@ -247,45 +240,128 @@ void StackAddrEscapeChecker::checkPreCall(const 
CallEvent &Call,
   }
 }
 
-void StackAddrEscapeChecker::checkPreStmt(const ReturnStmt

[clang] [analyzer][docs] Document how to use perf and uftrace to debug performance issues (PR #126724)

2025-02-11 Thread Balazs Benics via cfe-commits


steakhal wrote:

This is the continuation of #126520
Sorry for the complications again. -.-

https://github.com/llvm/llvm-project/pull/126724
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

1 2 3 4 5 >

1 - 100 of 460 matches

Mail list logo