sameerds created this revision. Herald added subscribers: llvm-commits, cfe-commits, hiraditya, t-tye, tpr, dstuttard, yaxunl, mgorny, nhaehnle, wdng, jvesely, kzhuravl. Herald added projects: clang, LLVM.
This change implements the expansion in two parts: - Add a utility function emitAMDGPUPrintfCall() in LLVM. - Invoke the above function from Clang CodeGen, when processing a HIP program for the AMDGPU target. The printf expansion has undefined behaviour if the format string is not a compile-time constant. As a sufficient condition, the HIP ToolChain now emits -Werror=format-nonliteral. clang format Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D71365 Files: clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CGGPUBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h clang/lib/Driver/ToolChains/HIP.cpp clang/test/CodeGenHIP/printf-aggregate.cpp clang/test/CodeGenHIP/printf.cpp clang/test/Driver/hip-printf.hip llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp llvm/lib/Transforms/Utils/CMakeLists.txt
Index: llvm/lib/Transforms/Utils/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Utils/CMakeLists.txt +++ llvm/lib/Transforms/Utils/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_component_library(LLVMTransformUtils + AMDGPUEmitPrintf.cpp ASanStackFrameLayout.cpp AddDiscriminators.cpp BasicBlockUtils.cpp Index: llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp @@ -0,0 +1,243 @@ +//===- AMDGPUEmitPrintf.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility function to lower a printf call into a series of device +// library calls on the AMDGPU target. +// +// WARNING: This file knows about certain library functions. It recognizes them +// by name, and hardwires knowledge of their semantics. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" + +#include <iostream> + +using namespace llvm; + +#define DEBUG_TYPE "amdgpu-emit-printf" + +static bool isCString(const Value *Arg) { + auto Ty = Arg->getType(); + auto PtrTy = dyn_cast<PointerType>(Ty); + if (!PtrTy) + return false; + + auto IntTy = dyn_cast<IntegerType>(PtrTy->getElementType()); + if (!IntTy) + return false; + + return IntTy->getBitWidth() == 8; +} + +static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) { + auto Int64Ty = Builder.getInt64Ty(); + auto Ty = Arg->getType(); + if (auto IntTy = dyn_cast<IntegerType>(Ty)) { + switch (IntTy->getBitWidth()) { + case 32: + return Builder.CreateZExt(Arg, Int64Ty); + case 64: + return Arg; + } + } else if (Ty->getTypeID() == Type::DoubleTyID) { + return Builder.CreateBitCast(Arg, Int64Ty); + } else if (auto PtrTy = dyn_cast<PointerType>(Ty)) { + return Builder.CreatePtrToInt(Arg, Int64Ty); + } + + llvm_unreachable("unexpected type"); + return Builder.getInt64(0); +} + +static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) { + auto Int64Ty = Builder.getInt64Ty(); + auto M = Builder.GetInsertBlock()->getModule(); + auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty); + return Builder.CreateCall(Fn, Version); +} + +static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs, + Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3, + Value *Arg4, Value *Arg5, Value *Arg6, + bool IsLast) { + auto Int64Ty = Builder.getInt64Ty(); + auto Int32Ty = Builder.getInt32Ty(); + auto M = Builder.GetInsertBlock()->getModule(); + auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty, + Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty, + Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty); + auto IsLastValue = Builder.getInt32(IsLast); + auto NumArgsValue = Builder.getInt32(NumArgs); + return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3, + Arg4, Arg5, Arg6, IsLastValue}); +} + +static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, + bool IsLast) { + auto Arg0 = fitArgInto64Bits(Builder, Arg); + auto Zero = Builder.getInt64(0); + return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero, + Zero, IsLast); +} + +// The device library does not provide strlen, so we build our own loop +// here. While we are at it, we also include the terminating null in the length. +static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) { + auto *Prev = Builder.GetInsertBlock(); + Module *M = Prev->getModule(); + + auto CharZero = Builder.getInt8(0); + auto One = Builder.getInt64(1); + auto Zero = Builder.getInt64(0); + auto Int64Ty = Builder.getInt64Ty(); + + // The length is either zero for a null pointer, or the computed value for an + // actual string. We need a join block for a phi that represents the final + // value. + // + // Strictly speaking, the zero does not matter since + // __ockl_printf_append_string_n ignores the length if the pointer is null. + BasicBlock *Join = nullptr; + if (Prev->getTerminator()) { + Join = Prev->splitBasicBlock(Builder.GetInsertPoint(), + Prev->getName() + ".strlen.join"); + Prev->getTerminator()->eraseFromParent(); + } else { + Join = BasicBlock::Create(M->getContext(), Prev->getName() + ".strlen.join", + Prev->getParent()); + } + BasicBlock *While = + BasicBlock::Create(M->getContext(), Prev->getName() + ".strlen.while", + Prev->getParent(), Join); + BasicBlock *WhileDone = BasicBlock::Create( + M->getContext(), Prev->getName() + ".strlen.while.done", + Prev->getParent(), Join); + + // Emit an early return for when the pointer is null. + Builder.SetInsertPoint(Prev); + auto CmpNull = + Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType())); + BranchInst::Create(Join, While, CmpNull, Prev); + + // Entry to the while loop. + Builder.SetInsertPoint(While); + auto Ptr = Builder.CreateAddrSpaceCast(Str, Builder.getInt8PtrTy()); + + auto PtrPhi = Builder.CreatePHI(Ptr->getType(), 2); + PtrPhi->addIncoming(Ptr, Prev); + auto PtrNext = Builder.CreateGEP(PtrPhi, One); + PtrPhi->addIncoming(PtrNext, While); + + // Condition for the while loop. + auto Data = Builder.CreateLoad(PtrPhi); + auto Cmp = Builder.CreateICmpEQ(Data, CharZero); + Builder.CreateCondBr(Cmp, WhileDone, While); + + // Add one to the computed length. + Builder.SetInsertPoint(WhileDone, WhileDone->begin()); + auto Begin = Builder.CreatePtrToInt(Ptr, Int64Ty); + auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty); + auto Len = Builder.CreateSub(End, Begin); + Len = Builder.CreateAdd(Len, One); + + // Final join. + BranchInst::Create(Join, WhileDone); + Builder.SetInsertPoint(Join, Join->begin()); + auto LenPhi = Builder.CreatePHI(Len->getType(), 2); + LenPhi->addIncoming(Len, WhileDone); + LenPhi->addIncoming(Zero, Prev); + + return LenPhi; +} + +static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str, + Value *Length, bool isLast) { + auto Int64Ty = Builder.getInt64Ty(); + auto CharPtrTy = Builder.getInt8PtrTy(); + auto Int32Ty = Builder.getInt32Ty(); + auto M = Builder.GetInsertBlock()->getModule(); + auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty, + Int64Ty, CharPtrTy, Int64Ty, Int32Ty); + auto IsLastInt32 = Builder.getInt32(isLast); + return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32}); +} + +static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg, + bool IsLast) { + auto Length = getStrlenWithNull(Builder, Arg); + return callAppendStringN(Builder, Desc, Arg, Length, IsLast); +} + +static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, + bool SpecIsCString, bool IsLast) { + if (SpecIsCString && isCString(Arg)) { + return appendString(Builder, Desc, Arg, IsLast); + } + // If the format specifies a string but the argument is not, the frontend will + // have printed a warning. We just rely on undefined behaviour and send the + // argument anyway. + return appendArg(Builder, Desc, Arg, IsLast); +} + +// Scan the format string to locate all specifiers, and mark the ones that +// specify a string, i.e, the "%s" specifier with optional '*' characters. +static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) { + StringRef Str; + if (!getConstantStringInfo(Fmt, Str) || Str.empty()) + return; + + static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn"; + size_t SpecPos = 0; + // Skip the first argument, the format string. + unsigned ArgIdx = 1; + + while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) { + if (Str[SpecPos + 1] == '%') { + SpecPos += 2; + continue; + } + auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos); + if (SpecEnd == StringRef::npos) + return; + auto Spec = Str.slice(SpecPos, SpecEnd + 1); + ArgIdx += Spec.count('*'); + if (Str[SpecEnd] == 's') { + BV.set(ArgIdx); + } + SpecPos = SpecEnd + 1; + ++ArgIdx; + } +} + +Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder, + ArrayRef<Value *> Args) { + auto NumOps = Args.size(); + assert(NumOps >= 1); + + auto Fmt = Args[0]; + SparseBitVector<8> SpecIsCString; + locateCStrings(SpecIsCString, Fmt); + + auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0)); + Desc = appendString(Builder, Desc, Fmt, NumOps == 1); + + // FIXME: This invokes hostcall once for each argument. We can pack up to + // seven scalar printf arguments in a single hostcall. See the signature of + // callAppendArgs(). + for (unsigned int i = 1; i != NumOps; ++i) { + bool IsLast = i == NumOps - 1; + bool IsCString = SpecIsCString.test(i); + Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast); + } + + return Builder.CreateTrunc(Desc, Builder.getInt32Ty()); +} Index: llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h @@ -0,0 +1,25 @@ +//===- AMDGPUEmitPrintf.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility function to lower a printf call into a series of device +// library calls on the AMDGPU target. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_AMDGPUEMITPRINTF_H +#define LLVM_TRANSFORMS_UTILS_AMDGPUEMITPRINTF_H + +#include "llvm/IR/IRBuilder.h" + +namespace llvm { + +Value *emitAMDGPUPrintfCall(IRBuilder<> &Builder, ArrayRef<Value *> Args); + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_AMDGPUEMITPRINTF_H Index: clang/test/Driver/hip-printf.hip =================================================================== --- /dev/null +++ clang/test/Driver/hip-printf.hip @@ -0,0 +1,9 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu -x hip --cuda-gpu-arch=gfx900 \ +// RUN: %s 2>&1 | FileCheck %s + +// CHECK: [[CLANG:".*clang.*"]] "-cc1" +// CHECK-SAME: "-Werror=format-nonliteral" Index: clang/test/CodeGenHIP/printf.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenHIP/printf.cpp @@ -0,0 +1,26 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm \ +// RUN: -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -DBUILTIN -emit-llvm \ +// RUN: -o - %s | FileCheck %s + +#ifdef BUILTIN +#define printf(...) __builtin_printf(__VA_ARGS__) +#else +extern "C" int printf(const char *format, ...); +#endif + +int foo() { + return printf("%d %s %f\n", 42, "hello world", 3.14159); +} + +// CHECK: [[BEGIN:%[0-9]+]] = call i64 @__ockl_printf_begin(i64 0) +// CHECK: [[STRLEN1:%[0-9]+]] = phi i64 [ %{{[^,]*}}, %{{[^ ]*}} ], [ 0, %{{[^ ]*}} ] +// CHECK: [[APPEND1:%[0-9]+]] = call i64 @__ockl_printf_append_string_n(i64 [[BEGIN]], {{.*}}, i64 [[STRLEN1]], i32 0) +// CHECK: [[APPEND2:%[0-9]+]] = call i64 @__ockl_printf_append_args(i64 [[APPEND1]], i32 1, i64 42, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 0) +// CHECK: [[STRLEN2:%[0-9]+]] = phi i64 [ %{{[^,]*}}, %{{[^ ]*}} ], [ 0, %{{[^ ]*}} ] +// CHECK: [[APPEND3:%[0-9]+]] = call i64 @__ockl_printf_append_string_n(i64 [[APPEND2]], {{.*}}, i64 [[STRLEN2]], i32 0) +// CHECK: [[APPEND4:%[0-9]+]] = call i64 @__ockl_printf_append_args(i64 [[APPEND3]], i32 1, i64 4614256650576692846, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 1) +// CHECK: [[RETURN:%[0-9]+]] = trunc i64 [[APPEND4]] to i32 +// CHECK: ret i32 [[RETURN]] Index: clang/test/CodeGenHIP/printf-aggregate.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenHIP/printf-aggregate.cpp @@ -0,0 +1,26 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip \ +// RUN: -verify -emit-llvm %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -DBUILTIN \ +// RUN: -verify -emit-llvm %s + +#ifdef BUILTIN +#define printf(...) __builtin_printf(__VA_ARGS__) +#else +extern "C" int printf(const char *format, ...); +#endif + +// Check that we don't crash when asked to printf a non-scalar arg. +struct Struct { + int x; + int y; +}; + +void PrintfNonScalar(const char *fmt) { + printf(fmt, 1); + // Ignore the warning about the %d not matching the struct argument + // expected-warning@+2 {{}} + // expected-error@+1 {{cannot compile this non-scalar arg to printf}} + printf("%d", Struct()); +} Index: clang/lib/Driver/ToolChains/HIP.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIP.cpp +++ clang/lib/Driver/ToolChains/HIP.cpp @@ -428,6 +428,7 @@ void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { HostTC.addClangWarningOptions(CC1Args); + CC1Args.push_back("-Werror=format-nonliteral"); } ToolChain::CXXStdlibType Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -3722,6 +3722,8 @@ RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue); + RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E, + ReturnValueSlot ReturnValue); RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); Index: clang/lib/CodeGen/CGGPUBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGGPUBuiltin.cpp +++ clang/lib/CodeGen/CGGPUBuiltin.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instruction.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h" using namespace clang; using namespace CodeGen; @@ -120,3 +121,36 @@ return RValue::get(Builder.CreateCall( VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr})); } + +RValue +CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E, + ReturnValueSlot ReturnValue) { + assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn); + assert(E->getBuiltinCallee() == Builtin::BIprintf || + E->getBuiltinCallee() == Builtin::BI__builtin_printf); + assert(E->getNumArgs() >= 1); // printf always has at least one arg. + + CallArgList CallArgs; + EmitCallArgs(CallArgs, + E->getDirectCallee()->getType()->getAs<FunctionProtoType>(), + E->arguments(), E->getDirectCallee(), + /* ParamsToSkip = */ 0); + + SmallVector<llvm::Value *, 8> Args; + for (auto A : CallArgs) { + // We don't know how to emit non-scalar varargs. + if (!A.getRValue(*this).isScalar()) { + CGM.ErrorUnsupported(E, "non-scalar arg to printf"); + return RValue::get(llvm::ConstantInt::get(IntTy, -1)); + } + + llvm::Value *Arg = A.getRValue(*this).getScalarVal(); + Args.push_back(Arg); + } + + llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint()); + IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation()); + auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args); + Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint()); + return RValue::get(Printf); +} Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -4106,8 +4106,12 @@ return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy())); } case Builtin::BIprintf: + case Builtin::BI__builtin_printf: if (getTarget().getTriple().isNVPTX()) return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); + if (getTarget().getTriple().getArch() == Triple::amdgcn && + getLangOpts().HIP) + return EmitAMDGPUDevicePrintfCallExpr(E, ReturnValue); break; case Builtin::BI__builtin_canonicalize: case Builtin::BI__builtin_canonicalizef:
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits