https://github.com/yxsamliu updated 
https://github.com/llvm/llvm-project/pull/179701

>From 5efb56479a8cd5e2e32592e8fd6da2c96d6a6a95 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <[email protected]>
Date: Sat, 28 Feb 2026 21:09:59 -0500
Subject: [PATCH] [Driver] Enable -ftime-trace for CUDA/HIP device compilation

---
 clang/lib/Driver/Driver.cpp               | 50 +++++++++++++++++++----
 clang/test/Driver/ftime-trace-offload.cpp | 37 +++++++++++++++++
 2 files changed, 80 insertions(+), 7 deletions(-)
 create mode 100644 clang/test/Driver/ftime-trace-offload.cpp

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index e4ec28753c594..7f7ead445c3a4 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -5858,20 +5858,56 @@ static void handleTimeTrace(Compilation &C, const 
ArgList &Args,
       Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ);
   if (!A)
     return;
+
+  std::string OffloadingPrefix;
+  if (JA->getOffloadingDeviceKind() != Action::OFK_None) {
+    const ToolChain *TC = JA->getOffloadingToolChain();
+    OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+        JA->getOffloadingDeviceKind(), TC ? TC->getTriple().normalize() : "",
+        /*CreatePrefixForHost=*/false);
+    if (const char *Arch = JA->getOffloadingArch()) {
+      OffloadingPrefix += "-";
+      OffloadingPrefix += Arch;
+    }
+  } else if (JA->getOffloadingHostActiveKinds() != Action::OFK_None &&
+             C.getDriver().isSaveTempsEnabled()) {
+    OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+        Action::OFK_None, C.getDefaultToolChain().getTriple().normalize(),
+        /*CreatePrefixForHost=*/true);
+  }
+
   SmallString<128> Path;
   if (A->getOption().matches(options::OPT_ftime_trace_EQ)) {
     Path = A->getValue();
     if (llvm::sys::fs::is_directory(Path)) {
-      SmallString<128> Tmp(Result.getFilename());
-      llvm::sys::path::replace_extension(Tmp, "json");
-      llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp));
+      // When -ftime-trace=<dir> and it's a directory:
+      // - For host/non-offload: use the output filename stem
+      // - For offload: use input filename stem + offloading prefix
+      SmallString<128> Tmp;
+      if (OffloadingPrefix.empty()) {
+        Tmp = llvm::sys::path::stem(Result.getFilename());
+      } else {
+        Tmp = llvm::sys::path::stem(BaseInput);
+        Tmp += OffloadingPrefix;
+      }
+      Tmp += ".json";
+      llvm::sys::path::append(Path, Tmp);
     }
   } else {
     if (Arg *DumpDir = Args.getLastArgNoClaim(options::OPT_dumpdir)) {
-      // The trace file is ${dumpdir}${basename}.json. Note that dumpdir may 
not
-      // end with a path separator.
+      // The trace file is ${dumpdir}${basename}${offloadprefix}.json. Note
+      // that dumpdir may not end with a path separator.
       Path = DumpDir->getValue();
-      Path += llvm::sys::path::filename(BaseInput);
+      Path += llvm::sys::path::stem(BaseInput);
+      Path += OffloadingPrefix;
+    } else if (!OffloadingPrefix.empty()) {
+      // For offloading, derive path from -o output directory combined with
+      // the input filename and offload prefix.
+      SmallString<128> TraceName(llvm::sys::path::stem(BaseInput));
+      TraceName += OffloadingPrefix;
+      if (Arg *FinalOutput = Args.getLastArg(options::OPT_o))
+        Path = llvm::sys::path::parent_path(FinalOutput->getValue());
+      llvm::sys::path::append(Path, TraceName);
     } else {
       Path = Result.getFilename();
     }
@@ -6132,7 +6168,7 @@ InputInfoList Driver::BuildJobsForActionNoCache(
                                              AtTopLevel, MultipleArchs,
                                              OffloadingPrefix),
                        BaseInput);
-    if (T->canEmitIR() && OffloadingPrefix.empty())
+    if (T->canEmitIR())
       handleTimeTrace(C, Args, JA, BaseInput, Result);
   }
 
diff --git a/clang/test/Driver/ftime-trace-offload.cpp 
b/clang/test/Driver/ftime-trace-offload.cpp
new file mode 100644
index 0000000000000..224a21ca2173a
--- /dev/null
+++ b/clang/test/Driver/ftime-trace-offload.cpp
@@ -0,0 +1,37 @@
+// RUN: rm -rf %t && mkdir -p %t && cd %t
+// RUN: mkdir d e f && cp %s d/a.cpp
+
+/// Test HIP offloading: -ftime-trace should generate traces for both host and 
device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN:   -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP
+// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx90a.json"
+// HIP: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a.json"
+
+/// Test HIP offloading with new driver: same output as above.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN:   -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 
--offload-new-driver 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP
+
+/// Test HIP offloading with -ftime-trace=<dir>: traces go to specified 
directory.
+// RUN: %clang -### -ftime-trace=f -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 \
+// RUN:   -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP-DIR
+// HIP-DIR: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=f{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP-DIR: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} 
"-ftime-trace=f{{/|\\\\}}a.json"
+
+/// Test HIP offloading with --save-temps: both host and device get unique 
trace files.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 \
+// RUN:   -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 
--save-temps 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP-SAVE-TEMPS
+// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} 
"-ftime-trace=e/a-host-x86_64-unknown-linux-gnu.json"
+
+/// Test CUDA offloading: -ftime-trace should generate traces for both host 
and device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x cuda d/a.cpp 
--offload-arch=sm_70 --offload-arch=sm_80 \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu 
--cuda-path=%S/Inputs/CUDA_102/usr/local/cuda 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CUDA
+// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} 
"-ftime-trace=e/a-cuda-nvptx64-nvidia-cuda-sm_70.json"
+// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} 
"-ftime-trace=e/a-cuda-nvptx64-nvidia-cuda-sm_80.json"
+// CUDA: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a.json"

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to