Author: Min-Yih Hsu Date: 2024-12-10T17:57:04-08:00 New Revision: ea76b2d8d83d6885bf5707832cbc4b7655e21b08
URL: https://github.com/llvm/llvm-project/commit/ea76b2d8d83d6885bf5707832cbc4b7655e21b08 DIFF: https://github.com/llvm/llvm-project/commit/ea76b2d8d83d6885bf5707832cbc4b7655e21b08.diff LOG: [XRay][RISCV] RISCV support for XRay (#117368) Add RISC-V support for XRay. The RV64 implementation has been tested in both QEMU and in our hardware environment. Currently this requires D and C extensions, but since both RV64GC and RVA22/RVA23 are becoming mainstream, I don't think this requirement will be a big problem. Based on the previous work by @a-poduval : https://reviews.llvm.org/D117929 --------- Co-authored-by: Ashwin Poduval <ashwin.podu...@gmail.com> Added: compiler-rt/lib/xray/xray_riscv.cpp compiler-rt/lib/xray/xray_trampoline_riscv32.S compiler-rt/lib/xray/xray_trampoline_riscv64.S compiler-rt/lib/xray/xray_trampoline_riscv_common.S llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll Modified: clang/lib/Driver/XRayArgs.cpp compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake compiler-rt/lib/xray/CMakeLists.txt compiler-rt/lib/xray/xray_interface.cpp compiler-rt/lib/xray/xray_tsc.h llvm/lib/CodeGen/XRayInstrumentation.cpp llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp llvm/lib/Target/RISCV/RISCVInstrInfo.cpp llvm/lib/Target/RISCV/RISCVSubtarget.h llvm/lib/XRay/InstrumentationMap.cpp Removed: ################################################################################ diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp index de5c38ebc3abbd..f8c213334a2b40 100644 --- a/clang/lib/Driver/XRayArgs.cpp +++ b/clang/lib/Driver/XRayArgs.cpp @@ -51,6 +51,8 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) { case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::systemz: + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: break; default: D.Diag(diag::err_drv_unsupported_opt_for_target) diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index b29ae179c2b4f4..5a1e8db61023b0 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -102,7 +102,7 @@ if(APPLE) set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM64}) else() set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} - powerpc64le ${HEXAGON} ${LOONGARCH64}) + powerpc64le ${HEXAGON} ${LOONGARCH64} ${RISCV32} ${RISCV64}) endif() set(ALL_XRAY_DSO_SUPPORTED_ARCH ${X86_64} ${ARM64}) set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64}) diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt index 7e3f1a0aa616e5..e7f01a2f4f1640 100644 --- a/compiler-rt/lib/xray/CMakeLists.txt +++ b/compiler-rt/lib/xray/CMakeLists.txt @@ -96,6 +96,16 @@ set(hexagon_SOURCES xray_trampoline_hexagon.S ) +set(riscv32_SOURCES + xray_riscv.cpp + xray_trampoline_riscv32.S + ) + +set(riscv64_SOURCES + xray_riscv.cpp + xray_trampoline_riscv64.S + ) + set(XRAY_SOURCE_ARCHS arm armhf @@ -156,6 +166,8 @@ set(XRAY_ALL_SOURCE_FILES ${mips64_SOURCES} ${mips64el_SOURCES} ${powerpc64le_SOURCES} + ${riscv32_SOURCES} + ${riscv64_SOURCES} ${XRAY_IMPL_HEADERS} ) list(REMOVE_DUPLICATES XRAY_ALL_SOURCE_FILES) diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp index b6f0e6762f1681..4ec492c266d809 100644 --- a/compiler-rt/lib/xray/xray_interface.cpp +++ b/compiler-rt/lib/xray/xray_interface.cpp @@ -57,6 +57,10 @@ static const int16_t cSledLength = 64; static const int16_t cSledLength = 8; #elif defined(__hexagon__) static const int16_t cSledLength = 20; +#elif defined(__riscv) && (__riscv_xlen == 64) +static const int16_t cSledLength = 68; +#elif defined(__riscv) && (__riscv_xlen == 32) +static const int16_t cSledLength = 52; #else #error "Unsupported CPU Architecture" #endif /* CPU architecture */ diff --git a/compiler-rt/lib/xray/xray_riscv.cpp b/compiler-rt/lib/xray/xray_riscv.cpp new file mode 100644 index 00000000000000..e3a7cdb18b6407 --- /dev/null +++ b/compiler-rt/lib/xray/xray_riscv.cpp @@ -0,0 +1,266 @@ +//===-- xray_riscv.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of RISC-V specific routines (32- and 64-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include <atomic> + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum PatchOpcodes : uint32_t { + PO_ADDI = 0x00000013, // addi rd, rs1, imm + PO_ADD = 0x00000033, // add rd, rs1, rs2 + PO_SW = 0x00002023, // sw rs2, imm(rs1) + PO_SD = 0x00003023, // sd rs2, imm(rs1) + PO_LUI = 0x00000037, // lui rd, imm + PO_OR = 0x00006033, // or rd, rs1, rs2 + PO_SLLI = 0x00001013, // slli rd, rs1, shamt + PO_JALR = 0x00000067, // jalr rd, rs1 + PO_LW = 0x00002003, // lw rd, imm(rs1) + PO_LD = 0x00003003, // ld rd, imm(rs1) + PO_J = 0x0000006f, // jal imm + PO_NOP = PO_ADDI, // addi x0, x0, 0 +}; + +enum RegNum : uint32_t { + RN_X0 = 0, + RN_RA = 1, + RN_SP = 2, + RN_T1 = 6, + RN_A0 = 10, +}; + +static inline uint32_t encodeRTypeInstruction(uint32_t Opcode, uint32_t Rs1, + uint32_t Rs2, uint32_t Rd) { + return Rs2 << 20 | Rs1 << 15 | Rd << 7 | Opcode; +} + +static inline uint32_t encodeITypeInstruction(uint32_t Opcode, uint32_t Rs1, + uint32_t Rd, uint32_t Imm) { + return Imm << 20 | Rs1 << 15 | Rd << 7 | Opcode; +} + +static inline uint32_t encodeSTypeInstruction(uint32_t Opcode, uint32_t Rs1, + uint32_t Rs2, uint32_t Imm) { + uint32_t ImmMSB = (Imm & 0xfe0) << 20; + uint32_t ImmLSB = (Imm & 0x01f) << 7; + return ImmMSB | Rs2 << 20 | Rs1 << 15 | ImmLSB | Opcode; +} + +static inline uint32_t encodeUTypeInstruction(uint32_t Opcode, uint32_t Rd, + uint32_t Imm) { + return Imm << 12 | Rd << 7 | Opcode; +} + +static inline uint32_t encodeJTypeInstruction(uint32_t Opcode, uint32_t Rd, + uint32_t Imm) { + uint32_t ImmMSB = (Imm & 0x100000) << 11; + uint32_t ImmLSB = (Imm & 0x7fe) << 20; + uint32_t Imm11 = (Imm & 0x800) << 9; + uint32_t Imm1912 = (Imm & 0xff000); + return ImmMSB | ImmLSB | Imm11 | Imm1912 | Rd << 7 | Opcode; +} + +static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; } +static uint32_t lo12(uint32_t val) { return val & 0xfff; } + +static inline bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // J .tmpN + // 21 or 33 C.NOPs (42 or 66 bytes) + // .tmpN + // + // With one of the following runtime patches: + // + // xray_sled_n (32-bit): + // addi sp, sp, -16 ;create stack frame + // sw ra, 12(sp) ;save return address + // sw a0, 8(sp) ;save register a0 + // lui ra, %hi(__xray_FunctionEntry/Exit) + // addi ra, ra, %lo(__xray_FunctionEntry/Exit) + // lui a0, %hi(function_id) + // addi a0, a0, %lo(function_id) ;pass function id + // jalr ra ;call Tracing hook + // lw a0, 8(sp) ;restore register a0 + // lw ra, 12(sp) ;restore return address + // addi sp, sp, 16 ;delete stack frame + // + // xray_sled_n (64-bit): + // addi sp, sp, -32 ;create stack frame + // sd ra, 24(sp) ;save return address + // sd a0, 16(sp) ;save register a0 + // sd t1, 8(sp) ;save register t1 + // lui t1, %highest(__xray_FunctionEntry/Exit) + // addi t1, t1, %higher(__xray_FunctionEntry/Exit) + // slli t1, t1, 32 + // lui ra, ra, %hi(__xray_FunctionEntry/Exit) + // addi ra, ra, %lo(__xray_FunctionEntry/Exit) + // add ra, t1, ra + // lui a0, %hi(function_id) + // addi a0, a0, %lo(function_id) ;pass function id + // jalr ra ;call Tracing hook + // ld t1, 8(sp) ;restore register t1 + // ld a0, 16(sp) ;restore register a0 + // ld ra, 24(sp) ;restore return address + // addi sp, sp, 32 ;delete stack frame + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // J 44 bytes (rv32) + // J 68 bytes (rv64) + + uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address()); + if (Enable) { +#if __riscv_xlen == 64 + // If the ISA is RV64, the Tracing Hook needs to be typecast to a 64 bit + // value. + uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint64_t>(TracingHook)); + uint32_t HiTracingHookAddr = hi20(reinterpret_cast<uint64_t>(TracingHook)); + uint32_t HigherTracingHookAddr = + lo12((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32); + uint32_t HighestTracingHookAddr = + hi20((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32); +#elif __riscv_xlen == 32 + // We typecast the Tracing Hook to a 32 bit value for RV32 + uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint32_t>(TracingHook)); + uint32_t HiTracingHookAddr = hi20((reinterpret_cast<uint32_t>(TracingHook)); +#endif + uint32_t LoFunctionID = lo12(FuncId); + uint32_t HiFunctionID = hi20(FuncId); + + // The sled that is patched in for RISCV64 defined below. We need the entire + // sleds corresponding to both ISAs to be protected by defines because the + // first few instructions are all diff erent, because we store doubles in + // case of RV64 and store words for RV32. Subsequently, we have LUI - and in + // case of RV64, we need extra instructions from this point on, so we see + // diff erences in addresses to which instructions are stored. + size_t Idx = 1U; + const uint32_t XLenBytes = __riscv_xlen / 8; +#if __riscv_xlen == 64 + const uint32_t LoadOp = PatchOpcodes::PO_LD; + const uint32_t StoreOp = PatchOpcodes::PO_SD; +#elif __riscv_xlen == 32 + const uint32_t LoadOp = PatchOpcodes::PO_LW; + const uint32_t StoreOp = PatchOpcodes::PO_SW; +#endif + + Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP, + RegNum::RN_RA, 3 * XLenBytes); + Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP, + RegNum::RN_A0, 2 * XLenBytes); + +#if __riscv_xlen == 64 + Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP, + RegNum::RN_T1, XLenBytes); + Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T1, + HighestTracingHookAddr); + Address[Idx++] = + encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_T1, + RegNum::RN_T1, HigherTracingHookAddr); + Address[Idx++] = encodeITypeInstruction(PatchOpcodes::PO_SLLI, + RegNum::RN_T1, RegNum::RN_T1, 32); +#endif + Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_RA, + HiTracingHookAddr); + Address[Idx++] = encodeITypeInstruction( + PatchOpcodes::PO_ADDI, RegNum::RN_RA, RegNum::RN_RA, LoTracingHookAddr); +#if __riscv_xlen == 64 + Address[Idx++] = encodeRTypeInstruction(PatchOpcodes::PO_ADD, RegNum::RN_RA, + RegNum::RN_T1, RegNum::RN_RA); +#endif + Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_A0, + HiFunctionID); + Address[Idx++] = encodeITypeInstruction( + PatchOpcodes::PO_ADDI, RegNum::RN_A0, RegNum::RN_A0, LoFunctionID); + Address[Idx++] = encodeITypeInstruction(PatchOpcodes::PO_JALR, + RegNum::RN_RA, RegNum::RN_RA, 0); + +#if __riscv_xlen == 64 + Address[Idx++] = + encodeITypeInstruction(LoadOp, RegNum::RN_SP, RegNum::RN_T1, XLenBytes); +#endif + Address[Idx++] = encodeITypeInstruction(LoadOp, RegNum::RN_SP, + RegNum::RN_A0, 2 * XLenBytes); + Address[Idx++] = encodeITypeInstruction(LoadOp, RegNum::RN_SP, + RegNum::RN_RA, 3 * XLenBytes); + Address[Idx++] = encodeITypeInstruction( + PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, 4 * XLenBytes); + + uint32_t CreateStackSpace = encodeITypeInstruction( + PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, -4 * XLenBytes); + + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateStackSpace, + std::memory_order_release); + } else { + uint32_t CreateBranch = encodeJTypeInstruction( + // Jump distance is diff erent in both ISAs due to diff erence in size of + // sleds +#if __riscv_xlen == 64 + PatchOpcodes::PO_J, RegNum::RN_X0, + 68); // jump encodes an offset of 68 +#elif __riscv_xlen == 32 + PatchOpcodes::PO_J, RegNum::RN_X0, + 44); // jump encodes an offset of 44 +#endif + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateBranch, + std::memory_order_release); + } + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + const XRayTrampolines &Trampolines, + bool LogArgs) XRAY_NEVER_INSTRUMENT { + // We don't support logging argument at this moment, so we always + // use EntryTrampoline. + return patchSled(Enable, FuncId, Sled, Trampolines.EntryTrampoline); +} + +bool patchFunctionExit( + const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled, + const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampolines.ExitTrampoline); +} + +bool patchFunctionTailExit( + const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled, + const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampolines.TailExitTrampoline); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return false; +} +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {} diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv32.S b/compiler-rt/lib/xray/xray_trampoline_riscv32.S new file mode 100644 index 00000000000000..05e3d61e5ef71b --- /dev/null +++ b/compiler-rt/lib/xray/xray_trampoline_riscv32.S @@ -0,0 +1,89 @@ +//===-- xray_trampoline_riscv32.s ----------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the riscv32-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + +#include "../sanitizer_common/sanitizer_asm.h" + +.macro SAVE_ARG_REGISTERS + // Push argument registers to stack + addi sp, sp, -112 + CFI_DEF_CFA_OFFSET(112) + sw ra, 108(sp) + sw a7, 104(sp) + sw a6, 100(sp) + sw a5, 96(sp) + sw a4, 92(sp) + sw a3, 88(sp) + sw a2, 84(sp) + sw a1, 80(sp) + sw a0, 76(sp) + fsd fa7, 64(sp) + fsd fa6, 56(sp) + fsd fa5, 48(sp) + fsd fa4, 40(sp) + fsd fa3, 32(sp) + fsd fa2, 24(sp) + fsd fa1, 16(sp) + fsd fa0, 8(sp) +.endm + +.macro RESTORE_ARG_REGISTERS + // Restore argument registers + fld fa0, 8(sp) + fld fa1, 16(sp) + fld fa2, 24(sp) + fld fa3, 32(sp) + fld fa4, 40(sp) + fld fa5, 48(sp) + fld fa6, 56(sp) + fld fa7, 64(sp) + lw a0, 76(sp) + lw a1, 80(sp) + lw a2, 84(sp) + lw a3, 88(sp) + lw a4, 92(sp) + lw a5, 96(sp) + lw a6, 100(sp) + lw a7, 104(sp) + lw ra, 108(sp) + addi sp, sp, 112 + CFI_DEF_CFA_OFFSET(0) +.endm + +.macro SAVE_RET_REGISTERS + // Push return registers to stack + addi sp, sp, -32 + CFI_DEF_CFA_OFFSET(32) + sw ra, 28(sp) + sw a1, 24(sp) + sw a0, 20(sp) + fsd fa1, 8(sp) + fsd fa0, 0(sp) +.endm + +.macro RESTORE_RET_REGISTERS + // Restore return registers + fld fa0, 0(sp) + fld fa1, 8(sp) + lw a0, 20(sp) + lw a1, 24(sp) + lw ra, 28(sp) + addi sp, sp, 32 + CFI_DEF_CFA_OFFSET(0) +.endm + +.macro LOAD_XLEN, rd, src + lw \rd, \src +.endm + +#include "xray_trampoline_riscv_common.S" diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv64.S b/compiler-rt/lib/xray/xray_trampoline_riscv64.S new file mode 100644 index 00000000000000..692350eaaa38e0 --- /dev/null +++ b/compiler-rt/lib/xray/xray_trampoline_riscv64.S @@ -0,0 +1,89 @@ +//===-- xray_trampoline_riscv64.s ----------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the riscv64-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + +#include "../sanitizer_common/sanitizer_asm.h" + +.macro SAVE_ARG_REGISTERS + // Push return registers to stack + addi sp, sp, -144 + CFI_DEF_CFA_OFFSET(144) + sd ra, 136(sp) + sd a7, 128(sp) + sd a6, 120(sp) + sd a5, 112(sp) + sd a4, 104(sp) + sd a3, 96(sp) + sd a2, 88(sp) + sd a1, 80(sp) + sd a0, 72(sp) + fsd fa7, 64(sp) + fsd fa6, 56(sp) + fsd fa5, 48(sp) + fsd fa4, 40(sp) + fsd fa3, 32(sp) + fsd fa2, 24(sp) + fsd fa1, 16(sp) + fsd fa0, 8(sp) +.endm + +.macro SAVE_RET_REGISTERS + // Push return registers to stack + addi sp, sp, -48 + CFI_DEF_CFA_OFFSET(48) + sd ra, 40(sp) + sd a1, 32(sp) + sd a0, 24(sp) + fsd fa1, 16(sp) + fsd fa0, 8(sp) +.endm + +.macro RESTORE_RET_REGISTERS + // Restore return registers + fld fa0, 8(sp) + fld fa1, 16(sp) + ld a0, 24(sp) + ld a1, 32(sp) + ld ra, 40(sp) + addi sp, sp, 48 + CFI_DEF_CFA_OFFSET(0) +.endm + +.macro RESTORE_ARG_REGISTERS + // Restore argument registers + fld fa0, 8(sp) + fld fa1, 16(sp) + fld fa2, 24(sp) + fld fa3, 32(sp) + fld fa4, 40(sp) + fld fa5, 48(sp) + fld fa6, 56(sp) + fld fa7, 64(sp) + ld a0, 72(sp) + ld a1, 80(sp) + ld a2, 88(sp) + ld a3, 96(sp) + ld a4, 104(sp) + ld a5, 112(sp) + ld a6, 120(sp) + ld a7, 128(sp) + ld ra, 136(sp) + addi sp, sp, 144 + CFI_DEF_CFA_OFFSET(0) +.endm + +.macro LOAD_XLEN, rd, src + ld \rd, \src +.endm + +#include "xray_trampoline_riscv_common.S" diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv_common.S b/compiler-rt/lib/xray/xray_trampoline_riscv_common.S new file mode 100644 index 00000000000000..746d612e982045 --- /dev/null +++ b/compiler-rt/lib/xray/xray_trampoline_riscv_common.S @@ -0,0 +1,96 @@ +//===-- xray_trampoline_riscv_common.s --------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the trampolines code shared between riscv32 and riscv64. +// +//===----------------------------------------------------------------------===// + +#include "../builtins/assembly.h" + + .text + .p2align 2 + .global ASM_SYMBOL(__xray_FunctionEntry) + ASM_TYPE_FUNCTION(__xray_FunctionEntry) +ASM_SYMBOL(__xray_FunctionEntry): + CFI_STARTPROC + SAVE_ARG_REGISTERS + + // Load the handler function pointer into a2 + la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) + LOAD_XLEN a2, 0(a2) + + // Handler address will be null if it is not set + beq a2, x0, 1f + + // If we reach here, we are tracing an event + // a0 already contains function id + // a1 = 0 means we are tracing an entry event + li a1, 0 + jalr a2 + +1: + RESTORE_ARG_REGISTERS + jr ra + ASM_SIZE(__xray_FunctionEntry) + CFI_ENDPROC + + .text + .p2align 2 + .global ASM_SYMBOL(__xray_FunctionExit) + ASM_TYPE_FUNCTION(__xray_FunctionExit) +ASM_SYMBOL(__xray_FunctionExit): + CFI_STARTPROC + SAVE_RET_REGISTERS + + // Load the handler function pointer into a2 + la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) + LOAD_XLEN a2, 0(a2) + + // Handler address will be null if it is not set + beq a2, x0, 1f + + // If we reach here, we are tracing an event + // a0 already contains function id + // a1 = 1 means we are tracing an exit event + li a1, 1 + jalr a2 + +1: + RESTORE_RET_REGISTERS + jr ra + ASM_SIZE(__xray_FunctionExit) + CFI_ENDPROC + + .text + .p2align 2 + .global ASM_SYMBOL(__xray_FunctionTailExit) + ASM_TYPE_FUNCTION(__xray_FunctionTailExit) +ASM_SYMBOL(__xray_FunctionTailExit): + CFI_STARTPROC + SAVE_ARG_REGISTERS + + // Load the handler function pointer into a2 + la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) + LOAD_XLEN a2, 0(a2) + + // Handler address will be null if it is not set + beq a2, x0, 1f + + // If we reach here, we are tracing an event + // a0 already contains function id + // a1 = 2 means we are tracing a tail exit event + li a1, 2 + jalr a2 + +1: + RESTORE_ARG_REGISTERS + jr ra + ASM_SIZE(__xray_FunctionTailExit) + CFI_ENDPROC diff --git a/compiler-rt/lib/xray/xray_tsc.h b/compiler-rt/lib/xray/xray_tsc.h index e1cafe1bf11d2d..b62a686d6ce0f2 100644 --- a/compiler-rt/lib/xray/xray_tsc.h +++ b/compiler-rt/lib/xray/xray_tsc.h @@ -43,7 +43,7 @@ inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { #elif defined(__powerpc64__) #include "xray_powerpc64.inc" #elif defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \ - defined(__hexagon__) || defined(__loongarch_lp64) + defined(__hexagon__) || defined(__loongarch_lp64) || defined(__riscv) // Emulated TSC. // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does // not have a constant frequency like TSC on x86(_64), it may go faster diff --git a/llvm/lib/CodeGen/XRayInstrumentation.cpp b/llvm/lib/CodeGen/XRayInstrumentation.cpp index 8f718d884cd067..8af16fa6249f41 100644 --- a/llvm/lib/CodeGen/XRayInstrumentation.cpp +++ b/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -233,10 +233,13 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { case Triple::ArchType::mips: case Triple::ArchType::mipsel: case Triple::ArchType::mips64: - case Triple::ArchType::mips64el: { + case Triple::ArchType::mips64el: + case Triple::ArchType::riscv32: + case Triple::ArchType::riscv64: { // For the architectures which don't have a single return instruction InstrumentationOptions op; - op.HandleTailcall = false; + // RISC-V supports patching tail calls. + op.HandleTailcall = MF.getTarget().getTargetTriple().isRISCV(); op.HandleAllReturns = true; prependRetWithPatchableExit(MF, TII, op); break; diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 0d818bc837fb70..b1990409754b08 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -113,6 +113,12 @@ class RISCVAsmPrinter : public AsmPrinter { void emitNTLHint(const MachineInstr *MI); + // XRay Support + void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr *MI); + void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr *MI); + void LowerPATCHABLE_TAIL_CALL(const MachineInstr *MI); + void emitSled(const MachineInstr *MI, SledKind Kind); + bool lowerToMCInst(const MachineInstr *MI, MCInst &OutMI); }; } @@ -316,6 +322,22 @@ void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) { return LowerPATCHPOINT(*OutStreamer, SM, *MI); case TargetOpcode::STATEPOINT: return LowerSTATEPOINT(*OutStreamer, SM, *MI); + case TargetOpcode::PATCHABLE_FUNCTION_ENTER: { + // patchable-function-entry is handled in lowerToMCInst + // Therefore, we break out of the switch statement if we encounter it here. + const Function &F = MI->getParent()->getParent()->getFunction(); + if (F.hasFnAttribute("patchable-function-entry")) + break; + + LowerPATCHABLE_FUNCTION_ENTER(MI); + return; + } + case TargetOpcode::PATCHABLE_FUNCTION_EXIT: + LowerPATCHABLE_FUNCTION_EXIT(MI); + return; + case TargetOpcode::PATCHABLE_TAIL_CALL: + LowerPATCHABLE_TAIL_CALL(MI); + return; } MCInst OutInst; @@ -453,11 +475,71 @@ bool RISCVAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); emitFunctionBody(); + // Emit the XRay table + emitXRayTable(); + if (EmittedOptionArch) RTS.emitDirectiveOptionPop(); return false; } +void RISCVAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr *MI) { + emitSled(MI, SledKind::FUNCTION_ENTER); +} + +void RISCVAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr *MI) { + emitSled(MI, SledKind::FUNCTION_EXIT); +} + +void RISCVAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr *MI) { + emitSled(MI, SledKind::TAIL_CALL); +} + +void RISCVAsmPrinter::emitSled(const MachineInstr *MI, SledKind Kind) { + // We want to emit the jump instruction and the nops constituting the sled. + // The format is as follows: + // .Lxray_sled_N + // ALIGN + // J .tmpN + // 21 or 33 C.NOP instructions + // .tmpN + + // The following variable holds the count of the number of NOPs to be patched + // in for XRay instrumentation during compilation. + // Note that RV64 and RV32 each has a sled of 68 and 44 bytes, respectively. + // Assuming we're using JAL to jump to .tmpN, then we only need + // (68 - 4)/2 = 32 NOPs for RV64 and (44 - 4)/2 = 20 for RV32. However, there + // is a chance that we'll use C.JAL instead, so an additional NOP is needed. + const uint8_t NoopsInSledCount = + MI->getParent()->getParent()->getSubtarget<RISCVSubtarget>().is64Bit() + ? 33 + : 21; + + OutStreamer->emitCodeAlignment(Align(4), &getSubtargetInfo()); + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->emitLabel(CurSled); + auto Target = OutContext.createTempSymbol(); + + const MCExpr *TargetExpr = MCSymbolRefExpr::create( + Target, MCSymbolRefExpr::VariantKind::VK_None, OutContext); + + // Emit "J bytes" instruction, which jumps over the nop sled to the actual + // start of function. + EmitToStreamer( + *OutStreamer, + MCInstBuilder(RISCV::JAL).addReg(RISCV::X0).addExpr(TargetExpr)); + + // Emit NOP instructions + for (int8_t I = 0; I < NoopsInSledCount; ++I) + EmitToStreamer(*OutStreamer, MCInstBuilder(RISCV::ADDI) + .addReg(RISCV::X0) + .addReg(RISCV::X0) + .addImm(0)); + + OutStreamer->emitLabel(Target); + recordSled(CurSled, *MI, Kind, 2); +} + void RISCVAsmPrinter::emitStartOfAsmFile(Module &M) { RISCVTargetStreamer &RTS = static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer()); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 47273d6bc06d65..6a3a89371b57a0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1576,6 +1576,26 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { // No patch bytes means at most a PseudoCall is emitted return std::max(NumBytes, 8U); } + case TargetOpcode::PATCHABLE_FUNCTION_ENTER: + case TargetOpcode::PATCHABLE_FUNCTION_EXIT: + case TargetOpcode::PATCHABLE_TAIL_CALL: { + const MachineFunction &MF = *MI.getParent()->getParent(); + const Function &F = MF.getFunction(); + if (Opcode == TargetOpcode::PATCHABLE_FUNCTION_ENTER && + F.hasFnAttribute("patchable-function-entry")) { + unsigned Num; + if (F.getFnAttribute("patchable-function-entry") + .getValueAsString() + .getAsInteger(10, Num)) + return get(Opcode).getSize(); + + // Number of C.NOP or NOP + return (STI.hasStdExtCOrZca() ? 2 : 4) * Num; + } + // XRay uses C.JAL + 21 or 33 C.NOP for each sled in RV32 and RV64, + // respectively. + return STI.is64Bit() ? 68 : 44; + } default: return get(Opcode).getSize(); } diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 5e775d2f87bd94..9a1881c2d39837 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -236,6 +236,9 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { return UserReservedRegister[i]; } + // XRay support - require D and C extensions. + bool isXRaySupported() const override { return hasStdExtD() && hasStdExtC(); } + // Vector codegen related methods. bool hasVInstructions() const { return HasStdExtZve32x; } bool hasVInstructionsI64() const { return HasStdExtZve64x; } diff --git a/llvm/lib/XRay/InstrumentationMap.cpp b/llvm/lib/XRay/InstrumentationMap.cpp index 800f0a0f47e425..0ebdcd5bac7526 100644 --- a/llvm/lib/XRay/InstrumentationMap.cpp +++ b/llvm/lib/XRay/InstrumentationMap.cpp @@ -63,7 +63,8 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile, ObjFile.getBinary()->getArch() == Triple::loongarch64 || ObjFile.getBinary()->getArch() == Triple::ppc64le || ObjFile.getBinary()->getArch() == Triple::arm || - ObjFile.getBinary()->getArch() == Triple::aarch64)) + ObjFile.getBinary()->getArch() == Triple::aarch64 || + ObjFile.getBinary()->getArch() == Triple::riscv64)) return make_error<StringError>( "File format not supported (only does ELF and Mach-O little endian " "64-bit).", diff --git a/llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll b/llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll new file mode 100644 index 00000000000000..ec2b986d174d91 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=riscv32-unknown-linux-gnu -mattr=+d,+c < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=riscv64-unknown-linux-gnu -mattr=+d,+c < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-RISCV64 %s + +define i32 @foo() nounwind "function-instrument"="xray-always" { +; CHECK: .p2align 2 +; CHECK-LABEL: .Lxray_sled_0: +; CHECK-NEXT: j .Ltmp0 +; CHECK-COUNT-21: nop +; CHECK-RISCV64-COUNT-12: nop +; CHECK-LABEL: .Ltmp0: + ret i32 0 +; CHECK: .p2align 2 +; CHECK-LABEL: .Lxray_sled_1: +; CHECK-NEXT: j .Ltmp1 +; CHECK-COUNT-21: nop +; CHECK-RISCV64-COUNT-12: nop +; CHECK-LABEL: .Ltmp1: +; CHECK-NEXT: ret +} +; CHECK: .section xray_instr_map,"ao",@progbits,foo +; CHECK-LABEL: .Lxray_sleds_start0: +; CHECK: .Lxray_sled_0-[[TMP:.Ltmp[0-9]+]] +; CHECK: .Lxray_sled_1-[[TMP:.Ltmp[0-9]+]] +; CHECK-LABEL: .Lxray_sleds_end0: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits