Author: Joseph Huber Date: 2024-07-26T17:21:56-05:00 New Revision: dbb8b7a0f4eea1aa333cec9a38aa6eb7ecf6c1dc
URL: https://github.com/llvm/llvm-project/commit/dbb8b7a0f4eea1aa333cec9a38aa6eb7ecf6c1dc DIFF: https://github.com/llvm/llvm-project/commit/dbb8b7a0f4eea1aa333cec9a38aa6eb7ecf6c1dc.diff LOG: Reapply "[OpenMP][libc] Remove special handling for OpenMP printf (#98940)" This reverts commit fea5914c926e2f013a8b5e27eaa74c7047fb2c71. Added: Modified: clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CGGPUBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h libc/config/gpu/entrypoints.txt libc/spec/gpu_ext.td libc/src/gpu/CMakeLists.txt llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp offload/DeviceRTL/include/LibC.h offload/DeviceRTL/src/LibC.cpp Removed: clang/test/OpenMP/nvptx_target_printf_codegen.c libc/src/gpu/rpc_fprintf.cpp libc/src/gpu/rpc_fprintf.h ################################################################################ diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0c4d0efb70ea5..f0651c280ff95 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5986,8 +5986,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, getTarget().getTriple().isAMDGCN() || (getTarget().getTriple().isSPIRV() && getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) { - if (getLangOpts().OpenMPIsTargetDevice) - return EmitOpenMPDevicePrintfCallExpr(E); if (getTarget().getTriple().isNVPTX()) return EmitNVPTXDevicePrintfCallExpr(E); if ((getTarget().getTriple().isAMDGCN() || diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp index b2340732afeb5..84adf29e8db87 100644 --- a/clang/lib/CodeGen/CGGPUBuiltin.cpp +++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp @@ -42,28 +42,6 @@ llvm::Function *GetVprintfDeclaration(llvm::Module &M) { VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M); } -llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) { - const char *Name = "__llvm_omp_vprintf"; - llvm::Module &M = CGM.getModule(); - llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()), - llvm::PointerType::getUnqual(M.getContext()), - llvm::Type::getInt32Ty(M.getContext())}; - llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( - llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); - - if (auto *F = M.getFunction(Name)) { - if (F->getFunctionType() != VprintfFuncType) { - CGM.Error(SourceLocation(), - "Invalid type declaration for __llvm_omp_vprintf"); - return nullptr; - } - return F; - } - - return llvm::Function::Create( - VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M); -} - // Transforms a call to printf into a call to the NVPTX vprintf syscall (which // isn't particularly special; it's invoked just like a regular function). // vprintf takes two args: A format string, and a pointer to a buffer containing @@ -213,10 +191,3 @@ RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) { Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint()); return RValue::get(Printf); } - -RValue CodeGenFunction::EmitOpenMPDevicePrintfCallExpr(const CallExpr *E) { - assert(getTarget().getTriple().isNVPTX() || - getTarget().getTriple().isAMDGCN()); - return EmitDevicePrintfCallExpr(E, this, GetOpenMPVprintfDeclaration(CGM), - true); -} diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index bd62c65d8cce6..89cc819c43bb5 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4536,7 +4536,6 @@ class CodeGenFunction : public CodeGenTypeCache { RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E); RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E); - RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E); RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); diff --git a/clang/test/OpenMP/nvptx_target_printf_codegen.c b/clang/test/OpenMP/nvptx_target_printf_codegen.c deleted file mode 100644 index f53daf65205c9..0000000000000 --- a/clang/test/OpenMP/nvptx_target_printf_codegen.c +++ /dev/null @@ -1,179 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ -// Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -verify -fopenmp -x c -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -x c -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32 -// expected-no-diagnostics -extern int printf(const char *, ...); - - -// Check a simple call to printf end-to-end. -int CheckSimple(void) { -#pragma omp target - { - // printf in master-only basic block. - const char* fmt = "%d %lld %f"; - - printf(fmt, 1, 2ll, 3.0); - } - - return 0; -} - -void CheckNoArgs(void) { -#pragma omp target - { - // printf in master-only basic block. - printf("hello, world!"); - } -} - -// Check that printf's alloca happens in the entry block, not inside the if -// statement. -int foo; -void CheckAllocaIsInEntryBlock(void) { -#pragma omp target - { - if (foo) { - printf("%d", 42); - } - } -} -// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckSimple_l13 -// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-64-NEXT: [[FMT:%.*]] = alloca ptr, align 8 -// CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8 -// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckSimple_l13_kernel_environment, ptr [[DYN_PTR]]) -// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK-64: user_code.entry: -// CHECK-64-NEXT: store ptr @.str, ptr [[FMT]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[FMT]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 0 -// CHECK-64-NEXT: store i32 1, ptr [[TMP2]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 1 -// CHECK-64-NEXT: store i64 2, ptr [[TMP3]], align 8 -// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2 -// CHECK-64-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() -// CHECK-64-NEXT: ret void -// CHECK-64: worker.exit: -// CHECK-64-NEXT: ret void -// -// -// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25 -// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] { -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25_kernel_environment, ptr [[DYN_PTR]]) -// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit() -// CHECK-64-NEXT: ret void -// CHECK-64: worker.exit: -// CHECK-64-NEXT: ret void -// -// -// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckAllocaIsInEntryBlock_l36 -// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[FOO:%.*]]) #[[ATTR0]] { -// CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-64-NEXT: [[FOO_ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8 -// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[FOO]], ptr [[FOO_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckAllocaIsInEntryBlock_l36_kernel_environment, ptr [[DYN_PTR]]) -// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[FOO_ADDR]], align 4 -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -// CHECK-64: if.then: -// CHECK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PRINTF_ARGS_0]], ptr [[TMP]], i32 0, i32 0 -// CHECK-64-NEXT: store i32 42, ptr [[TMP2]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str2, ptr [[TMP]], i32 4) -// CHECK-64-NEXT: br label [[IF_END]] -// CHECK-64: worker.exit: -// CHECK-64-NEXT: ret void -// CHECK-64: if.end: -// CHECK-64-NEXT: call void @__kmpc_target_deinit() -// CHECK-64-NEXT: ret void -// -// -// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckSimple_l13 -// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-NEXT: [[FMT:%.*]] = alloca ptr, align 4 -// CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8 -// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckSimple_l13_kernel_environment, ptr [[DYN_PTR]]) -// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK-32: user_code.entry: -// CHECK-32-NEXT: store ptr @.str, ptr [[FMT]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[FMT]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 0 -// CHECK-32-NEXT: store i32 1, ptr [[TMP2]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 1 -// CHECK-32-NEXT: store i64 2, ptr [[TMP3]], align 8 -// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2 -// CHECK-32-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8 -// CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() -// CHECK-32-NEXT: ret void -// CHECK-32: worker.exit: -// CHECK-32-NEXT: ret void -// -// -// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25 -// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] { -// CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25_kernel_environment, ptr [[DYN_PTR]]) -// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit() -// CHECK-32-NEXT: ret void -// CHECK-32: worker.exit: -// CHECK-32-NEXT: ret void -// -// -// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckAllocaIsInEntryBlock_l36 -// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[FOO:%.*]]) #[[ATTR0]] { -// CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-NEXT: [[FOO_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8 -// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[FOO]], ptr [[FOO_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckAllocaIsInEntryBlock_l36_kernel_environment, ptr [[DYN_PTR]]) -// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[FOO_ADDR]], align 4 -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -// CHECK-32: if.then: -// CHECK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PRINTF_ARGS_0]], ptr [[TMP]], i32 0, i32 0 -// CHECK-32-NEXT: store i32 42, ptr [[TMP2]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str2, ptr [[TMP]], i32 4) -// CHECK-32-NEXT: br label [[IF_END]] -// CHECK-32: worker.exit: -// CHECK-32-NEXT: ret void -// CHECK-32: if.end: -// CHECK-32-NEXT: call void @__kmpc_target_deinit() -// CHECK-32-NEXT: ret void -// diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index df7aa9e319624..157f6f8af00a9 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -226,7 +226,6 @@ set(TARGET_LIBC_ENTRYPOINTS # gpu/rpc.h entrypoints libc.src.gpu.rpc_host_call - libc.src.gpu.rpc_fprintf ) set(TARGET_LIBM_ENTRYPOINTS diff --git a/libc/spec/gpu_ext.td b/libc/spec/gpu_ext.td index 5400e0afa7564..dce81ff778620 100644 --- a/libc/spec/gpu_ext.td +++ b/libc/spec/gpu_ext.td @@ -10,14 +10,6 @@ def GPUExtensions : StandardSpec<"GPUExtensions"> { RetValSpec<VoidType>, [ArgSpec<VoidPtr>, ArgSpec<VoidPtr>, ArgSpec<SizeTType>] >, - FunctionSpec< - "rpc_fprintf", - RetValSpec<IntType>, - [ArgSpec<FILERestrictedPtr>, - ArgSpec<ConstCharRestrictedPtr>, - ArgSpec<VoidPtr>, - ArgSpec<SizeTType>] - >, ] >; let Headers = [ diff --git a/libc/src/gpu/CMakeLists.txt b/libc/src/gpu/CMakeLists.txt index 4508abea7a888..e20228516b511 100644 --- a/libc/src/gpu/CMakeLists.txt +++ b/libc/src/gpu/CMakeLists.txt @@ -8,15 +8,3 @@ add_entrypoint_object( libc.src.__support.RPC.rpc_client libc.src.__support.GPU.utils ) - -add_entrypoint_object( - rpc_fprintf - SRCS - rpc_fprintf.cpp - HDRS - rpc_fprintf.h - DEPENDS - libc.src.stdio.gpu.gpu_file - libc.src.__support.RPC.rpc_client - libc.src.__support.GPU.utils -) diff --git a/libc/src/gpu/rpc_fprintf.cpp b/libc/src/gpu/rpc_fprintf.cpp deleted file mode 100644 index 70056daa25e2e..0000000000000 --- a/libc/src/gpu/rpc_fprintf.cpp +++ /dev/null @@ -1,75 +0,0 @@ -//===-- GPU implementation of fprintf -------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "rpc_fprintf.h" - -#include "src/__support/CPP/string_view.h" -#include "src/__support/GPU/utils.h" -#include "src/__support/RPC/rpc_client.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/stdio/gpu/file.h" - -namespace LIBC_NAMESPACE_DECL { - -template <uint16_t opcode> -int fprintf_impl(::FILE *__restrict file, const char *__restrict format, - size_t format_size, void *args, size_t args_size) { - uint64_t mask = gpu::get_lane_mask(); - rpc::Client::Port port = rpc::client.open<opcode>(); - - if constexpr (opcode == RPC_PRINTF_TO_STREAM) { - port.send([&](rpc::Buffer *buffer) { - buffer->data[0] = reinterpret_cast<uintptr_t>(file); - }); - } - - port.send_n(format, format_size); - port.recv([&](rpc::Buffer *buffer) { - args_size = static_cast<size_t>(buffer->data[0]); - }); - port.send_n(args, args_size); - - uint32_t ret = 0; - for (;;) { - const char *str = nullptr; - port.recv([&](rpc::Buffer *buffer) { - ret = static_cast<uint32_t>(buffer->data[0]); - str = reinterpret_cast<const char *>(buffer->data[1]); - }); - // If any lanes have a string argument it needs to be copied back. - if (!gpu::ballot(mask, str)) - break; - - uint64_t size = str ? internal::string_length(str) + 1 : 0; - port.send_n(str, size); - } - - port.close(); - return ret; -} - -// TODO: Delete this and port OpenMP to use `printf`. -// place of varargs. Once varargs support is added we will use that to -// implement the real version. -LLVM_LIBC_FUNCTION(int, rpc_fprintf, - (::FILE *__restrict stream, const char *__restrict format, - void *args, size_t size)) { - cpp::string_view str(format); - if (stream == stdout) - return fprintf_impl<RPC_PRINTF_TO_STDOUT>(stream, format, str.size() + 1, - args, size); - else if (stream == stderr) - return fprintf_impl<RPC_PRINTF_TO_STDERR>(stream, format, str.size() + 1, - args, size); - else - return fprintf_impl<RPC_PRINTF_TO_STREAM>(stream, format, str.size() + 1, - args, size); -} - -} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/gpu/rpc_fprintf.h b/libc/src/gpu/rpc_fprintf.h deleted file mode 100644 index 7658b214c07c2..0000000000000 --- a/libc/src/gpu/rpc_fprintf.h +++ /dev/null @@ -1,23 +0,0 @@ -//===-- Implementation header for RPC functions -----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H -#define LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H - -#include "hdr/types/FILE.h" -#include "src/__support/macros/config.h" -#include <stddef.h> - -namespace LIBC_NAMESPACE_DECL { - -int rpc_fprintf(::FILE *__restrict stream, const char *__restrict format, - void *argc, size_t size); - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp index 42a6bac4fa6f2..02b0d436451a3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -437,7 +437,8 @@ bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) { return false; auto PrintfFunction = M.getFunction("printf"); - if (!PrintfFunction || !PrintfFunction->isDeclaration()) + if (!PrintfFunction || !PrintfFunction->isDeclaration() || + M.getModuleFlag("openmp")) return false; for (auto &U : PrintfFunction->uses()) { diff --git a/offload/DeviceRTL/include/LibC.h b/offload/DeviceRTL/include/LibC.h index dde86af783af9..59a795cc62e0e 100644 --- a/offload/DeviceRTL/include/LibC.h +++ b/offload/DeviceRTL/include/LibC.h @@ -18,7 +18,6 @@ extern "C" { int memcmp(const void *lhs, const void *rhs, size_t count); void memset(void *dst, int C, size_t count); - int printf(const char *format, ...); } diff --git a/offload/DeviceRTL/src/LibC.cpp b/offload/DeviceRTL/src/LibC.cpp index 4bca5d29643fe..291ceb023a69c 100644 --- a/offload/DeviceRTL/src/LibC.cpp +++ b/offload/DeviceRTL/src/LibC.cpp @@ -11,44 +11,33 @@ #pragma omp begin declare target device_type(nohost) namespace impl { -int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t); +int32_t omp_vprintf(const char *Format, __builtin_va_list vlist); } +#ifndef OMPTARGET_HAS_LIBC +namespace impl { #pragma omp begin declare variant match( \ device = {arch(nvptx, nvptx64)}, \ implementation = {extension(match_any)}) -extern "C" int32_t vprintf(const char *, void *); -namespace impl { -int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) { - return vprintf(Format, Arguments); +extern "C" int vprintf(const char *format, ...); +int omp_vprintf(const char *Format, __builtin_va_list vlist) { + return vprintf(Format, vlist); } -} // namespace impl #pragma omp end declare variant #pragma omp begin declare variant match(device = {arch(amdgcn)}) - -#ifdef OMPTARGET_HAS_LIBC -// TODO: Remove this handling once we have varargs support. -extern "C" struct FILE *stdout; -extern "C" int32_t rpc_fprintf(FILE *, const char *, void *, uint64_t); - -namespace impl { -int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t Size) { - return rpc_fprintf(stdout, Format, Arguments, Size); -} +int omp_vprintf(const char *Format, __builtin_va_list) { return -1; } +#pragma omp end declare variant } // namespace impl -#else -// We do not have a vprintf implementation for AMD GPU so we use a stub. -namespace impl { -int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) { - return -1; + +extern "C" int printf(const char *Format, ...) { + __builtin_va_list vlist; + __builtin_va_start(vlist, Format); + return impl::omp_vprintf(Format, vlist); } -} // namespace impl -#endif -#pragma omp end declare variant +#endif // OMPTARGET_HAS_LIBC extern "C" { - [[gnu::weak]] int memcmp(const void *lhs, const void *rhs, size_t count) { auto *L = reinterpret_cast<const unsigned char *>(lhs); auto *R = reinterpret_cast<const unsigned char *>(rhs); @@ -65,11 +54,6 @@ extern "C" { for (size_t I = 0; I < count; ++I) dstc[I] = C; } - -/// printf() calls are rewritten by CGGPUBuiltin to __llvm_omp_vprintf -int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) { - return impl::omp_vprintf(Format, Arguments, Size); -} } #pragma omp end declare target _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits