https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/117368
>From 599370a06008092f6aa883bf11600d0b66707bc0 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu <min....@sifive.com> Date: Wed, 20 Nov 2024 14:37:57 -0800 Subject: [PATCH 1/5] [XRay][RISCV] RISCV support for XRay Add RISC-V support for XRay. The RV64 implementation has been tested in both QEMU and in our production environment. Currently this requires D and C extensions, but since both RV64GC and RVA22/RVA23 are becoming mainstream, I don't think this requirement will be a big problem. Based on Ashwin Poduval's previous work: https://reviews.llvm.org/D117929 Co-authored-by: Ashwin Poduval <ashwin.podu...@gmail.com> --- clang/lib/Driver/XRayArgs.cpp | 2 + .../cmake/Modules/AllSupportedArchDefs.cmake | 2 +- compiler-rt/lib/xray/CMakeLists.txt | 12 + compiler-rt/lib/xray/xray_interface.cpp | 4 + compiler-rt/lib/xray/xray_riscv.cpp | 296 ++++++++++++++++++ .../lib/xray/xray_trampoline_riscv32.S | 83 +++++ .../lib/xray/xray_trampoline_riscv64.S | 83 +++++ .../lib/xray/xray_trampoline_riscv_common.S | 97 ++++++ compiler-rt/lib/xray/xray_tsc.h | 2 +- llvm/lib/CodeGen/XRayInstrumentation.cpp | 7 +- llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp | 82 +++++ llvm/lib/Target/RISCV/RISCVSubtarget.h | 3 + llvm/lib/XRay/InstrumentationMap.cpp | 3 +- .../RISCV/xray-attribute-instrumentation.ll | 24 ++ 14 files changed, 695 insertions(+), 5 deletions(-) create mode 100644 compiler-rt/lib/xray/xray_riscv.cpp create mode 100644 compiler-rt/lib/xray/xray_trampoline_riscv32.S create mode 100644 compiler-rt/lib/xray/xray_trampoline_riscv64.S create mode 100644 compiler-rt/lib/xray/xray_trampoline_riscv_common.S create mode 100644 llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp index de5c38ebc3abbd..f8c213334a2b40 100644 --- a/clang/lib/Driver/XRayArgs.cpp +++ b/clang/lib/Driver/XRayArgs.cpp @@ -51,6 +51,8 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) { case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::systemz: + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: break; default: D.Diag(diag::err_drv_unsupported_opt_for_target) diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index b29ae179c2b4f4..5a1e8db61023b0 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -102,7 +102,7 @@ if(APPLE) set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM64}) else() set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} - powerpc64le ${HEXAGON} ${LOONGARCH64}) + powerpc64le ${HEXAGON} ${LOONGARCH64} ${RISCV32} ${RISCV64}) endif() set(ALL_XRAY_DSO_SUPPORTED_ARCH ${X86_64} ${ARM64}) set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64}) diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt index 7e3f1a0aa616e5..e7f01a2f4f1640 100644 --- a/compiler-rt/lib/xray/CMakeLists.txt +++ b/compiler-rt/lib/xray/CMakeLists.txt @@ -96,6 +96,16 @@ set(hexagon_SOURCES xray_trampoline_hexagon.S ) +set(riscv32_SOURCES + xray_riscv.cpp + xray_trampoline_riscv32.S + ) + +set(riscv64_SOURCES + xray_riscv.cpp + xray_trampoline_riscv64.S + ) + set(XRAY_SOURCE_ARCHS arm armhf @@ -156,6 +166,8 @@ set(XRAY_ALL_SOURCE_FILES ${mips64_SOURCES} ${mips64el_SOURCES} ${powerpc64le_SOURCES} + ${riscv32_SOURCES} + ${riscv64_SOURCES} ${XRAY_IMPL_HEADERS} ) list(REMOVE_DUPLICATES XRAY_ALL_SOURCE_FILES) diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp index b6f0e6762f1681..e66736d9a344e1 100644 --- a/compiler-rt/lib/xray/xray_interface.cpp +++ b/compiler-rt/lib/xray/xray_interface.cpp @@ -57,6 +57,10 @@ static const int16_t cSledLength = 64; static const int16_t cSledLength = 8; #elif defined(__hexagon__) static const int16_t cSledLength = 20; +#elif SANITIZER_RISCV64 +static const int16_t cSledLength = 76; +#elif defined(__riscv) && (__riscv_xlen == 32) +static const int16_t cSledLength = 60; #else #error "Unsupported CPU Architecture" #endif /* CPU architecture */ diff --git a/compiler-rt/lib/xray/xray_riscv.cpp b/compiler-rt/lib/xray/xray_riscv.cpp new file mode 100644 index 00000000000000..89ce9305ef3dbe --- /dev/null +++ b/compiler-rt/lib/xray/xray_riscv.cpp @@ -0,0 +1,296 @@ +//===-- xray_riscv.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of riscv-specific routines (32- and 64-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include <atomic> + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum PatchOpcodes : uint32_t { + PO_ADDI = 0x00000013, // addi rd, rs1, imm + PO_ADD = 0x00000033, // add rd, rs1, rs2 + PO_SW = 0x00002023, // sw rt, base(offset) + PO_SD = 0x00003023, // sd rt, base(offset) + PO_LUI = 0x00000037, // lui rd, imm + PO_ORI = 0x00006013, // ori rd, rs1, imm + PO_OR = 0x00006033, // or rd, rs1, rs2 + PO_SLLI = 0x00001013, // slli rd, rs, shamt + PO_SRLI = 0x00005013, // srli rd, rs, shamt + PO_JALR = 0x00000067, // jalr rs + PO_LW = 0x00002003, // lw rd, base(offset) + PO_LD = 0x00003003, // ld rd, base(offset) + PO_J = 0x0000006f, // jal #n_bytes + PO_NOP = 0x00000013, // nop - pseduo-instruction, same as addi x0, x0, 0 +}; + +enum RegNum : uint32_t { + RN_R0 = 0x0, + RN_RA = 0x1, + RN_SP = 0x2, + RN_T0 = 0x5, + RN_T1 = 0x6, + RN_T2 = 0x7, + RN_A0 = 0xa, +}; + +static inline uint32_t encodeRTypeInstruction(uint32_t Opcode, uint32_t Rs1, + uint32_t Rs2, uint32_t Rd) { + return Rs2 << 20 | Rs1 << 15 | Rd << 7 | Opcode; +} + +static inline uint32_t encodeITypeInstruction(uint32_t Opcode, uint32_t Rs1, + uint32_t Rd, uint32_t Imm) { + return Imm << 20 | Rs1 << 15 | Rd << 7 | Opcode; +} + +static inline uint32_t encodeSTypeInstruction(uint32_t Opcode, uint32_t Rs1, + uint32_t Rs2, uint32_t Imm) { + uint32_t imm_msbs = (Imm & 0xfe0) << 25; + uint32_t imm_lsbs = (Imm & 0x01f) << 7; + return imm_msbs | Rs2 << 20 | Rs1 << 15 | imm_lsbs | Opcode; +} + +static inline uint32_t encodeUTypeInstruction(uint32_t Opcode, uint32_t Rd, + uint32_t Imm) { + return Imm << 12 | Rd << 7 | Opcode; +} + +static inline uint32_t encodeJTypeInstruction(uint32_t Opcode, uint32_t Rd, + uint32_t Imm) { + uint32_t imm_msb = (Imm & 0x80000) << 31; + uint32_t imm_lsbs = (Imm & 0x003ff) << 21; + uint32_t imm_11 = (Imm & 0x00400) << 20; + uint32_t imm_1912 = (Imm & 0x7f800) << 12; + return imm_msb | imm_lsbs | imm_11 | imm_1912 | Rd << 7 | Opcode; +} + +#if SANITIZER_RISCV64 +static uint32_t hi20(uint64_t val) { return (val + 0x800) >> 12; } +static uint32_t lo12(uint64_t val) { return val & 0xfff; } +#elif defined(__riscv) && (__riscv_xlen == 32) +static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; } +static uint32_t lo12(uint32_t val) { return val & 0xfff; } +#endif + +static inline bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // J .tmpN + // 29 or 37 C.NOPs (58 or 74 bytes) + // .tmpN + // + // With one of the following runtime patches: + // + // xray_sled_n (32-bit): + // addi sp, sp, -16 ;create stack frame + // sw ra, 12(sp) ;save return address + // sw t2, 8(sp) ;save register t2 + // sw t1, 4(sp) ;save register t1 + // sw a0, 0(sp) ;save register a0 + // lui t1, %hi(__xray_FunctionEntry/Exit) + // addi t1, t1, %lo(__xray_FunctionEntry/Exit) + // lui a0, %hi(function_id) + // addi a0, a0, %lo(function_id) ;pass function id + // jalr t1 ;call Tracing hook + // lw a0, 0(sp) ;restore register a0 + // lw t1, 4(sp) ;restore register t1 + // lw t2, 8(sp) ;restore register t2 + // lw ra, 12(sp) ;restore return address + // addi sp, sp, 16 ;delete stack frame + // + // xray_sled_n (64-bit): + // addi sp, sp, -32 ;create stack frame + // sd ra, 24(sp) ;save return address + // sd t2, 16(sp) ;save register t2 + // sd t1, 8(sp) ;save register t1 + // sd a0, 0(sp) ;save register a0 + // lui t2, %highest(__xray_FunctionEntry/Exit) + // addi t2, t2, %higher(__xray_FunctionEntry/Exit) + // slli t2, t2, 32 + // lui t1, t1, %hi(__xray_FunctionEntry/Exit) + // addi t1, t1, %lo(__xray_FunctionEntry/Exit) + // add t1, t2, t1 + // lui a0, %hi(function_id) + // addi a0, a0, %lo(function_id) ;pass function id + // jalr t1 ;call Tracing hook + // ld a0, 0(sp) ;restore register a0 + // ld t1, 8(sp) ;restore register t1 + // ld t2, 16(sp) ;restore register t2 + // ld ra, 24(sp) ;restore return address + // addi sp, sp, 32 ;delete stack frame + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // J 60 bytes (rv32) + // J 76 bytes (rv64) + + uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address()); + if (Enable) { + // If the ISA is RISCV 64, the Tracing Hook needs to be typecast to a 64 bit + // value +#if SANITIZER_RISCV64 + uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint64_t>(TracingHook)); + uint32_t HiTracingHookAddr = hi20(reinterpret_cast<uint64_t>(TracingHook)); + uint32_t HigherTracingHookAddr = + lo12((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32); + uint32_t HighestTracingHookAddr = + hi20((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32); + // We typecast the Tracing Hook to a 32 bit value for RISCV32 +#elif defined(__riscv) && (__riscv_xlen == 32) + uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint32_t>(TracingHook)); + uint32_t HiTracingHookAddr = hi20((reinterpret_cast<uint32_t>(TracingHook)); +#endif + uint32_t LoFunctionID = lo12(FuncId); + uint32_t HiFunctionID = hi20(FuncId); + // The sled that is patched in for RISCV64 defined below. We need the entire + // sleds corresponding to both ISAs to be protected by defines because the + // first few instructions are all different, because we store doubles in + // case of RV64 and store words for RV32. Subsequently, we have LUI - and in + // case of RV64, we need extra instructions from this point on, so we see + // differences in addresses to which instructions are stored. +#if SANITIZER_RISCV64 + Address[1] = encodeSTypeInstruction(PatchOpcodes::PO_SD, RegNum::RN_SP, + RegNum::RN_RA, 0x18); + Address[2] = encodeSTypeInstruction(PatchOpcodes::PO_SD, RegNum::RN_SP, + RegNum::RN_T2, 0x10); + Address[3] = encodeSTypeInstruction(PatchOpcodes::PO_SD, RegNum::RN_SP, + RegNum::RN_T1, 0x8); + Address[4] = encodeSTypeInstruction(PatchOpcodes::PO_SD, RegNum::RN_SP, + RegNum::RN_A0, 0x0); + Address[5] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T2, + HighestTracingHookAddr); + Address[6] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_T2, + RegNum::RN_T2, HigherTracingHookAddr); + Address[7] = encodeITypeInstruction(PatchOpcodes::PO_SLLI, RegNum::RN_T2, + RegNum::RN_T2, 0x20); + Address[8] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T1, + HiTracingHookAddr); + Address[9] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_T1, + RegNum::RN_T1, LoTracingHookAddr); + Address[10] = encodeRTypeInstruction(PatchOpcodes::PO_ADD, RegNum::RN_T1, + RegNum::RN_T2, RegNum::RN_T1); + Address[11] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_A0, + HiFunctionID); + Address[12] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_A0, + RegNum::RN_A0, LoFunctionID); + Address[13] = encodeITypeInstruction(PatchOpcodes::PO_JALR, RegNum::RN_T1, + RegNum::RN_RA, 0x0); + Address[14] = encodeITypeInstruction(PatchOpcodes::PO_LD, RegNum::RN_SP, + RegNum::RN_A0, 0x0); + Address[15] = encodeITypeInstruction(PatchOpcodes::PO_LD, RegNum::RN_SP, + RegNum::RN_T1, 0x8); + Address[16] = encodeITypeInstruction(PatchOpcodes::PO_LD, RegNum::RN_SP, + RegNum::RN_T2, 0x10); + Address[17] = encodeITypeInstruction(PatchOpcodes::PO_LD, RegNum::RN_SP, + RegNum::RN_RA, 0x18); + Address[18] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_SP, + RegNum::RN_SP, 0x20); + uint32_t CreateStackSpace = encodeITypeInstruction( + PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, 0xffe0); +#elif defined(__riscv) && (__riscv_xlen == 32) + Address[1] = encodeSTypeInstruction(PatchOpcodes::PO_SW, RegNum::RN_SP, + RegNum::RN_RA, 0x0c); + Address[2] = encodeSTypeInstruction(PatchOpcodes::PO_SW, RegNum::RN_SP, + RegNum::RN_T2, 0x08); + Address[3] = encodeSTypeInstruction(PatchOpcodes::PO_SW, RegNum::RN_SP, + RegNum::RN_T1, 0x4); + Address[4] = encodeSTypeInstruction(PatchOpcodes::PO_SW, RegNum::RN_SP, + RegNum::RN_A0, 0x0); + Address[5] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T1, + HiTracingHookAddr); + Address[6] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_T1, + RegNum::RN_T1, LoTracingHookAddr); + Address[7] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_A0, + HiFunctionID); + Address[8] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_A0, + RegNum::RN_A0, LoFunctionID); + Address[9] = encodeITypeInstruction(PatchOpcodes::PO_JALR, RegNum::RN_T1, + RegNum::RN_RA, 0x0); + Address[10] = encodeITypeInstruction(PatchOpcodes::PO_LW, RegNum::RN_SP, + RegNum::RN_A0, 0x0); + Address[11] = encodeITypeInstruction(PatchOpcodes::PO_LW, RegNum::RN_SP, + RegNum::RN_T1, 0x4); + Address[12] = encodeITypeInstruction(PatchOpcodes::PO_LW, RegNum::RN_SP, + RegNum::RN_T2, 0x08); + Address[13] = encodeITypeInstruction(PatchOpcodes::PO_LW, RegNum::RN_SP, + RegNum::RN_RA, 0x0c); + Address[14] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_SP, + RegNum::RN_SP, 0x10); + uint32_t CreateStackSpace = encodeITypeInstruction( + PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, 0xfff0); +#endif + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateStackSpace, + std::memory_order_release); + } else { + uint32_t CreateBranch = encodeJTypeInstruction( + // Jump distance is different in both ISAs due to difference in size of + // sleds +#if SANITIZER_RISCV64 + PatchOpcodes::PO_J, RegNum::RN_R0, + 0x026); // jump encodes an offset in multiples of 2 bytes. 38*2 = 76 +#elif defined(__riscv) && (__riscv_xlen == 32) + PatchOpcodes::PO_J, RegNum::RN_R0, + 0x01e); // jump encodes an offset in multiples of 2 bytes. 30*2 = 60 +#endif + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateBranch, + std::memory_order_release); + } + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + const XRayTrampolines &Trampolines, + bool LogArgs) XRAY_NEVER_INSTRUMENT { + // We don't support Logging argument at this moment, so we always + // use EntryTrampoline. + return patchSled(Enable, FuncId, Sled, Trampolines.EntryTrampoline); +} + +bool patchFunctionExit( + const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled, + const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampolines.ExitTrampoline); +} + +bool patchFunctionTailExit( + const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled, + const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampolines.TailExitTrampoline); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return false; +} +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {} diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv32.S b/compiler-rt/lib/xray/xray_trampoline_riscv32.S new file mode 100644 index 00000000000000..9916e0321d24fd --- /dev/null +++ b/compiler-rt/lib/xray/xray_trampoline_riscv32.S @@ -0,0 +1,83 @@ +//===-- xray_trampoline_riscv32.s ----------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the riscv32-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + +.macro SAVE_ARG_REGISTERS + // Push argument registers to stack + addi sp, sp, -100 + .cfi_def_cfa_offset 100 + sw ra, 96(sp) + .cfi_offset ra, -4 + sw a7, 92(sp) + sw a6, 88(sp) + sw a5, 84(sp) + sw a4, 80(sp) + sw a3, 76(sp) + sw a2, 72(sp) + sw a1, 68(sp) + sw a0, 64(sp) + fsd fa7, 56(sp) + fsd fa6, 48(sp) + fsd fa5, 40(sp) + fsd fa4, 32(sp) + fsd fa3, 24(sp) + fsd fa2, 16(sp) + fsd fa1, 8(sp) + fsd fa0, 0(sp) +.endm + +.macro RESTORE_ARG_REGISTERS + // Restore argument registers + fld fa0, 0(sp) + fld fa1, 8(sp) + fld fa2, 16(sp) + fld fa3, 24(sp) + fld fa4, 32(sp) + fld fa5, 40(sp) + fld fa6, 48(sp) + fld fa7, 56(sp) + lw a0, 64(sp) + lw a1, 68(sp) + lw a2, 72(sp) + lw a3, 76(sp) + lw a4, 80(sp) + lw a5, 84(sp) + lw a6, 88(sp) + lw a7, 92(sp) + lw ra, 96(sp) + addi sp, sp, 100 +.endm + +.macro SAVE_RET_REGISTERS + // Push return registers to stack + addi sp, sp, -28 + .cfi_def_cfa_offset 28 + sw ra, 24(sp) + .cfi_offset ra, -4 + sw a1, 20(sp) + sw a0, 16(sp) + fsd fa1, 8(sp) + fsd fa0, 0(sp) +.endm + +.macro RESTORE_RET_REGISTERS + // Restore return registers + fld fa0, 0(sp) + fld fa1, 8(sp) + lw a0, 16(sp) + lw a1, 20(sp) + lw ra, 24(sp) + addi sp, sp, 28 +.endm + +#include "xray_trampoline_riscv_common.S" diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv64.S b/compiler-rt/lib/xray/xray_trampoline_riscv64.S new file mode 100644 index 00000000000000..102b9881567d9a --- /dev/null +++ b/compiler-rt/lib/xray/xray_trampoline_riscv64.S @@ -0,0 +1,83 @@ +//===-- xray_trampoline_riscv64.s ----------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the riscv64-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + +.macro SAVE_ARG_REGISTERS + // Push return registers to stack + addi sp, sp, -136 + .cfi_def_cfa_offset 136 + sd ra, 128(sp) + .cfi_offset ra, -8 + sd a7, 120(sp) + sd a6, 112(sp) + sd a5, 104(sp) + sd a4, 96(sp) + sd a3, 88(sp) + sd a2, 80(sp) + sd a1, 72(sp) + sd a0, 64(sp) + fsd fa7, 56(sp) + fsd fa6, 48(sp) + fsd fa5, 40(sp) + fsd fa4, 32(sp) + fsd fa3, 24(sp) + fsd fa2, 16(sp) + fsd fa1, 8(sp) + fsd fa0, 0(sp) +.endm + +.macro SAVE_RET_REGISTERS + // Push return registers to stack + addi sp, sp, -40 + .cfi_def_cfa_offset 40 + sd ra, 32(sp) + .cfi_offset ra, -8 + sd a1, 24(sp) + sd a0, 16(sp) + fsd fa1, 8(sp) + fsd fa0, 0(sp) +.endm + +.macro RESTORE_RET_REGISTERS + // Restore return registers + fld fa0, 0(sp) + fld fa1, 8(sp) + ld a0, 16(sp) + ld a1, 24(sp) + ld ra, 32(sp) + addi sp, sp, 40 +.endm + +.macro RESTORE_ARG_REGISTERS + // Restore argument registers + fld fa0, 0(sp) + fld fa1, 8(sp) + fld fa2, 16(sp) + fld fa3, 24(sp) + fld fa4, 32(sp) + fld fa5, 40(sp) + fld fa6, 48(sp) + fld fa7, 56(sp) + ld a0, 64(sp) + ld a1, 72(sp) + ld a2, 80(sp) + ld a3, 88(sp) + ld a4, 96(sp) + ld a5, 104(sp) + ld a6, 112(sp) + ld a7, 120(sp) + ld ra, 128(sp) + addi sp, sp, 136 +.endm + +#include "xray_trampoline_riscv_common.S" diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv_common.S b/compiler-rt/lib/xray/xray_trampoline_riscv_common.S new file mode 100644 index 00000000000000..7ce6fa0dcda31b --- /dev/null +++ b/compiler-rt/lib/xray/xray_trampoline_riscv_common.S @@ -0,0 +1,97 @@ +//===-- xray_trampoline_riscv_common.s --------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the trampolines code shared between riscv32 and riscv64. +// +//===----------------------------------------------------------------------===// + +#include "../builtins/assembly.h" +#include "../sanitizer_common/sanitizer_asm.h" + + .text + .p2align 2 + .global ASM_SYMBOL(__xray_FunctionEntry) + ASM_TYPE_FUNCTION(__xray_FunctionEntry) +ASM_SYMBOL(__xray_FunctionEntry): + CFI_STARTPROC + SAVE_ARG_REGISTERS + + // Load the handler function pointer into a2 + la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) + ld a2, 0(a2) + + // Handler address will be null if it is not set + beq a2, x0, 1f + + // If we reach here, we are tracing an event + // a0 already contains function id + // a1 = 0 means we are tracing an entry event + mv a1, x0 + jalr a2 + +1: + RESTORE_ARG_REGISTERS + jr ra + ASM_SIZE(__xray_FunctionEntry) + CFI_ENDPROC + + .text + .p2align 2 + .global ASM_SYMBOL(__xray_FunctionExit) + ASM_TYPE_FUNCTION(__xray_FunctionExit) +ASM_SYMBOL(__xray_FunctionExit): + CFI_STARTPROC + SAVE_RET_REGISTERS + + // Load the handler function pointer into a2 + la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) + ld a2, 0(a2) + + // Handler address will be null if it is not set + beq a2, x0, 1f + + // If we reach here, we are tracing an event + // a0 already contains function id + // a1 = 1 means we are tracing an exit event + addi a1, x0, 1 + jalr a2 + +1: + RESTORE_RET_REGISTERS + jr ra + ASM_SIZE(__xray_FunctionExit) + CFI_ENDPROC + + .text + .p2align 2 + .global ASM_SYMBOL(__xray_FunctionTailExit) + ASM_TYPE_FUNCTION(__xray_FunctionTailExit) +ASM_SYMBOL(__xray_FunctionTailExit): + CFI_STARTPROC + SAVE_ARG_REGISTERS + + // Load the handler function pointer into a2 + la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) + ld a2, 0(a2) + + // Handler address will be null if it is not set + beq a2, x0, 1f + + // If we reach here, we are tracing an event + // a0 already contains function id + // a1 = 2 means we are tracing a tail exit event + addi a1, x0, 2 + jalr a2 + +1: + RESTORE_ARG_REGISTERS + jr ra + ASM_SIZE(__xray_FunctionTailExit) + CFI_ENDPROC diff --git a/compiler-rt/lib/xray/xray_tsc.h b/compiler-rt/lib/xray/xray_tsc.h index e1cafe1bf11d2d..b62a686d6ce0f2 100644 --- a/compiler-rt/lib/xray/xray_tsc.h +++ b/compiler-rt/lib/xray/xray_tsc.h @@ -43,7 +43,7 @@ inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { #elif defined(__powerpc64__) #include "xray_powerpc64.inc" #elif defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \ - defined(__hexagon__) || defined(__loongarch_lp64) + defined(__hexagon__) || defined(__loongarch_lp64) || defined(__riscv) // Emulated TSC. // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does // not have a constant frequency like TSC on x86(_64), it may go faster diff --git a/llvm/lib/CodeGen/XRayInstrumentation.cpp b/llvm/lib/CodeGen/XRayInstrumentation.cpp index 8f718d884cd067..1191d70f77f3e0 100644 --- a/llvm/lib/CodeGen/XRayInstrumentation.cpp +++ b/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -233,10 +233,13 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { case Triple::ArchType::mips: case Triple::ArchType::mipsel: case Triple::ArchType::mips64: - case Triple::ArchType::mips64el: { + case Triple::ArchType::mips64el: + case Triple::ArchType::riscv32: + case Triple::ArchType::riscv64: { // For the architectures which don't have a single return instruction InstrumentationOptions op; - op.HandleTailcall = false; + // RISC-V support patching tail calls. + op.HandleTailcall = MF.getTarget().getTargetTriple().isRISCV(); op.HandleAllReturns = true; prependRetWithPatchableExit(MF, TII, op); break; diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 0d818bc837fb70..5382751b0d8502 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -113,6 +113,12 @@ class RISCVAsmPrinter : public AsmPrinter { void emitNTLHint(const MachineInstr *MI); + // XRay Support + void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr *MI); + void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr *MI); + void LowerPATCHABLE_TAIL_CALL(const MachineInstr *MI); + void emitSled(const MachineInstr *MI, SledKind Kind); + bool lowerToMCInst(const MachineInstr *MI, MCInst &OutMI); }; } @@ -316,6 +322,22 @@ void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) { return LowerPATCHPOINT(*OutStreamer, SM, *MI); case TargetOpcode::STATEPOINT: return LowerSTATEPOINT(*OutStreamer, SM, *MI); + case TargetOpcode::PATCHABLE_FUNCTION_ENTER: { + // patchable-function-entry is handled in lowerToMCInst + // Therefore, we break out of the switch statement if we encounter it here. + const Function &F = MI->getParent()->getParent()->getFunction(); + if (F.hasFnAttribute("patchable-function-entry")) + break; + + LowerPATCHABLE_FUNCTION_ENTER(MI); + return; + } + case TargetOpcode::PATCHABLE_FUNCTION_EXIT: + LowerPATCHABLE_FUNCTION_EXIT(MI); + return; + case TargetOpcode::PATCHABLE_TAIL_CALL: + LowerPATCHABLE_TAIL_CALL(MI); + return; } MCInst OutInst; @@ -453,11 +475,71 @@ bool RISCVAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); emitFunctionBody(); + // Emit the XRay table + emitXRayTable(); + if (EmittedOptionArch) RTS.emitDirectiveOptionPop(); return false; } +void RISCVAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr *MI) { + emitSled(MI, SledKind::FUNCTION_ENTER); +} + +void RISCVAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr *MI) { + emitSled(MI, SledKind::FUNCTION_EXIT); +} + +void RISCVAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr *MI) { + emitSled(MI, SledKind::TAIL_CALL); +} + +void RISCVAsmPrinter::emitSled(const MachineInstr *MI, SledKind Kind) { + // We want to emit the jump instruction and the nops constituting the sled. + // The format is as follows: + // .Lxray_sled_N + // ALIGN + // J .tmpN + // 29 or 37 C.NOP instructions + // .tmpN + + // The following variable holds the count of the number of NOPs to be patched + // in for XRay instrumentation during compilation. + // Note that RV64 and RV32 each has a sled of 76 and 60 bytes, respectively. + // Assuming we're using JAL to jump to .tmpN, then we only need + // (76 - 4)/2 = 36 NOPs for RV64 and (60 - 4)/2 = 28 for RV32. However, there + // is a chance that we'll use C.JAL instead, so an additional NOP is needed. + const uint8_t NoopsInSledCount = + MI->getParent()->getParent()->getSubtarget<RISCVSubtarget>().is64Bit() + ? 37 + : 29; + + OutStreamer->emitCodeAlignment(Align(4), &getSubtargetInfo()); + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->emitLabel(CurSled); + auto Target = OutContext.createTempSymbol(); + + const MCExpr *TargetExpr = MCSymbolRefExpr::create( + Target, MCSymbolRefExpr::VariantKind::VK_None, OutContext); + + // Emit "J bytes" instruction, which jumps over the nop sled to the actual + // start of function. + EmitToStreamer( + *OutStreamer, + MCInstBuilder(RISCV::JAL).addReg(RISCV::X0).addExpr(TargetExpr)); + + // Emit NOP instructions + for (int8_t I = 0; I < NoopsInSledCount; ++I) + EmitToStreamer(*OutStreamer, MCInstBuilder(RISCV::ADDI) + .addReg(RISCV::X0) + .addReg(RISCV::X0) + .addImm(0)); + + OutStreamer->emitLabel(Target); + recordSled(CurSled, *MI, Kind, 2); +} + void RISCVAsmPrinter::emitStartOfAsmFile(Module &M) { RISCVTargetStreamer &RTS = static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer()); diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index f2c0a3d85c998a..f8a8e03fd28d16 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -223,6 +223,9 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { return UserReservedRegister[i]; } + // XRay support - require D and C extensions. + bool isXRaySupported() const override { return hasStdExtD() && hasStdExtC(); } + // Vector codegen related methods. bool hasVInstructions() const { return HasStdExtZve32x; } bool hasVInstructionsI64() const { return HasStdExtZve64x; } diff --git a/llvm/lib/XRay/InstrumentationMap.cpp b/llvm/lib/XRay/InstrumentationMap.cpp index 800f0a0f47e425..0ebdcd5bac7526 100644 --- a/llvm/lib/XRay/InstrumentationMap.cpp +++ b/llvm/lib/XRay/InstrumentationMap.cpp @@ -63,7 +63,8 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile, ObjFile.getBinary()->getArch() == Triple::loongarch64 || ObjFile.getBinary()->getArch() == Triple::ppc64le || ObjFile.getBinary()->getArch() == Triple::arm || - ObjFile.getBinary()->getArch() == Triple::aarch64)) + ObjFile.getBinary()->getArch() == Triple::aarch64 || + ObjFile.getBinary()->getArch() == Triple::riscv64)) return make_error<StringError>( "File format not supported (only does ELF and Mach-O little endian " "64-bit).", diff --git a/llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll b/llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll new file mode 100644 index 00000000000000..a8fc3bff0b0f5b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=riscv32-unknown-linux-gnu -mattr=+d,+c < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=riscv64-unknown-linux-gnu -mattr=+d,+c < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-RISCV64 %s + +define i32 @foo() nounwind "function-instrument"="xray-always" { +; CHECK: .p2align 2 +; CHECK-LABEL: .Lxray_sled_0: +; CHECK-NEXT: j .Ltmp0 +; CHECK-COUNT-29: nop +; CHECK-RISCV64-COUNT-8: nop +; CHECK-LABEL: .Ltmp0: + ret i32 0 +; CHECK: .p2align 2 +; CHECK-LABEL: .Lxray_sled_1: +; CHECK-NEXT: j .Ltmp1 +; CHECK-COUNT-29: nop +; CHECK-RISCV64-COUNT-8: nop +; CHECK-LABEL: .Ltmp1: +; CHECK-NEXT: ret +} +; CHECK: .section xray_instr_map,"ao",@progbits,foo +; CHECK-LABEL: .Lxray_sleds_start0: +; CHECK: .Lxray_sled_0-[[TMP:.Ltmp[0-9]+]] +; CHECK: .Lxray_sled_1-[[TMP:.Ltmp[0-9]+]] +; CHECK-LABEL: .Lxray_sleds_end0: >From d43a31ca80ce513a4db06572406ad6f910aec119 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu <min....@sifive.com> Date: Mon, 25 Nov 2024 14:08:28 -0800 Subject: [PATCH 2/5] Address review comments --- compiler-rt/lib/xray/xray_interface.cpp | 4 +- compiler-rt/lib/xray/xray_riscv.cpp | 234 ++++++++---------- .../lib/xray/xray_trampoline_riscv32.S | 96 +++---- .../lib/xray/xray_trampoline_riscv64.S | 104 ++++---- .../lib/xray/xray_trampoline_riscv_common.S | 6 +- llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp | 10 +- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 20 ++ .../RISCV/xray-attribute-instrumentation.ll | 4 +- 8 files changed, 238 insertions(+), 240 deletions(-) diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp index e66736d9a344e1..637972856ecaa7 100644 --- a/compiler-rt/lib/xray/xray_interface.cpp +++ b/compiler-rt/lib/xray/xray_interface.cpp @@ -58,9 +58,9 @@ static const int16_t cSledLength = 8; #elif defined(__hexagon__) static const int16_t cSledLength = 20; #elif SANITIZER_RISCV64 -static const int16_t cSledLength = 76; +static const int16_t cSledLength = 68; #elif defined(__riscv) && (__riscv_xlen == 32) -static const int16_t cSledLength = 60; +static const int16_t cSledLength = 52; #else #error "Unsupported CPU Architecture" #endif /* CPU architecture */ diff --git a/compiler-rt/lib/xray/xray_riscv.cpp b/compiler-rt/lib/xray/xray_riscv.cpp index 89ce9305ef3dbe..644c65029b8c71 100644 --- a/compiler-rt/lib/xray/xray_riscv.cpp +++ b/compiler-rt/lib/xray/xray_riscv.cpp @@ -8,7 +8,7 @@ // // This file is a part of XRay, a dynamic runtime instrumentation system. // -// Implementation of riscv-specific routines (32- and 64-bit). +// Implementation of RISC-V specific routines (32- and 64-bit). // //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_common.h" @@ -22,28 +22,24 @@ namespace __xray { enum PatchOpcodes : uint32_t { PO_ADDI = 0x00000013, // addi rd, rs1, imm PO_ADD = 0x00000033, // add rd, rs1, rs2 - PO_SW = 0x00002023, // sw rt, base(offset) - PO_SD = 0x00003023, // sd rt, base(offset) + PO_SW = 0x00002023, // sw rs2, imm(rs1) + PO_SD = 0x00003023, // sd rs2, imm(rs1) PO_LUI = 0x00000037, // lui rd, imm - PO_ORI = 0x00006013, // ori rd, rs1, imm PO_OR = 0x00006033, // or rd, rs1, rs2 - PO_SLLI = 0x00001013, // slli rd, rs, shamt - PO_SRLI = 0x00005013, // srli rd, rs, shamt - PO_JALR = 0x00000067, // jalr rs - PO_LW = 0x00002003, // lw rd, base(offset) - PO_LD = 0x00003003, // ld rd, base(offset) - PO_J = 0x0000006f, // jal #n_bytes - PO_NOP = 0x00000013, // nop - pseduo-instruction, same as addi x0, x0, 0 + PO_SLLI = 0x00001013, // slli rd, rs1, shamt + PO_JALR = 0x00000067, // jalr rd, rs1 + PO_LW = 0x00002003, // lw rd, imm(rs1) + PO_LD = 0x00003003, // ld rd, imm(rs1) + PO_J = 0x0000006f, // jal imm + PO_NOP = PO_ADDI, // addi x0, x0, 0 }; enum RegNum : uint32_t { - RN_R0 = 0x0, - RN_RA = 0x1, - RN_SP = 0x2, - RN_T0 = 0x5, - RN_T1 = 0x6, - RN_T2 = 0x7, - RN_A0 = 0xa, + RN_X0 = 0, + RN_RA = 1, + RN_SP = 2, + RN_T1 = 6, + RN_A0 = 10, }; static inline uint32_t encodeRTypeInstruction(uint32_t Opcode, uint32_t Rs1, @@ -58,9 +54,9 @@ static inline uint32_t encodeITypeInstruction(uint32_t Opcode, uint32_t Rs1, static inline uint32_t encodeSTypeInstruction(uint32_t Opcode, uint32_t Rs1, uint32_t Rs2, uint32_t Imm) { - uint32_t imm_msbs = (Imm & 0xfe0) << 25; - uint32_t imm_lsbs = (Imm & 0x01f) << 7; - return imm_msbs | Rs2 << 20 | Rs1 << 15 | imm_lsbs | Opcode; + uint32_t ImmMSB = (Imm & 0xfe0) << 25; + uint32_t ImmLSB = (Imm & 0x01f) << 7; + return ImmMSB | Rs2 << 20 | Rs1 << 15 | ImmLSB | Opcode; } static inline uint32_t encodeUTypeInstruction(uint32_t Opcode, uint32_t Rd, @@ -70,20 +66,15 @@ static inline uint32_t encodeUTypeInstruction(uint32_t Opcode, uint32_t Rd, static inline uint32_t encodeJTypeInstruction(uint32_t Opcode, uint32_t Rd, uint32_t Imm) { - uint32_t imm_msb = (Imm & 0x80000) << 31; - uint32_t imm_lsbs = (Imm & 0x003ff) << 21; - uint32_t imm_11 = (Imm & 0x00400) << 20; - uint32_t imm_1912 = (Imm & 0x7f800) << 12; - return imm_msb | imm_lsbs | imm_11 | imm_1912 | Rd << 7 | Opcode; + uint32_t ImmMSB = (Imm & 0x100000) << 31; + uint32_t ImmLSB = (Imm & 0x7fe) << 21; + uint32_t Imm11 = (Imm & 0x800) << 20; + uint32_t Imm1912 = (Imm & 0xff000) << 12; + return ImmMSB | ImmLSB | Imm11 | Imm1912 | Rd << 7 | Opcode; } -#if SANITIZER_RISCV64 -static uint32_t hi20(uint64_t val) { return (val + 0x800) >> 12; } -static uint32_t lo12(uint64_t val) { return val & 0xfff; } -#elif defined(__riscv) && (__riscv_xlen == 32) static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; } static uint32_t lo12(uint32_t val) { return val & 0xfff; } -#endif static inline bool patchSled(const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled, @@ -93,7 +84,7 @@ static inline bool patchSled(const bool Enable, const uint32_t FuncId, // // xray_sled_n: // J .tmpN - // 29 or 37 C.NOPs (58 or 74 bytes) + // 25 or 33 C.NOPs (50 or 66 bytes) // .tmpN // // With one of the following runtime patches: @@ -101,38 +92,34 @@ static inline bool patchSled(const bool Enable, const uint32_t FuncId, // xray_sled_n (32-bit): // addi sp, sp, -16 ;create stack frame // sw ra, 12(sp) ;save return address - // sw t2, 8(sp) ;save register t2 - // sw t1, 4(sp) ;save register t1 - // sw a0, 0(sp) ;save register a0 - // lui t1, %hi(__xray_FunctionEntry/Exit) - // addi t1, t1, %lo(__xray_FunctionEntry/Exit) + // sw t1, 8(sp) ;save register t1 + // sw a0, 4(sp) ;save register a0 + // lui ra, %hi(__xray_FunctionEntry/Exit) + // addi ra, ra, %lo(__xray_FunctionEntry/Exit) // lui a0, %hi(function_id) // addi a0, a0, %lo(function_id) ;pass function id - // jalr t1 ;call Tracing hook - // lw a0, 0(sp) ;restore register a0 - // lw t1, 4(sp) ;restore register t1 - // lw t2, 8(sp) ;restore register t2 + // jalr ra ;call Tracing hook + // lw a0, 4(sp) ;restore register a0 + // lw t1, 8(sp) ;restore register t1 // lw ra, 12(sp) ;restore return address // addi sp, sp, 16 ;delete stack frame // // xray_sled_n (64-bit): // addi sp, sp, -32 ;create stack frame // sd ra, 24(sp) ;save return address - // sd t2, 16(sp) ;save register t2 - // sd t1, 8(sp) ;save register t1 - // sd a0, 0(sp) ;save register a0 - // lui t2, %highest(__xray_FunctionEntry/Exit) - // addi t2, t2, %higher(__xray_FunctionEntry/Exit) - // slli t2, t2, 32 - // lui t1, t1, %hi(__xray_FunctionEntry/Exit) - // addi t1, t1, %lo(__xray_FunctionEntry/Exit) - // add t1, t2, t1 + // sd t1, 16(sp) ;save register t1 + // sd a0, 8(sp) ;save register a0 + // lui t1, %highest(__xray_FunctionEntry/Exit) + // addi t1, t1, %higher(__xray_FunctionEntry/Exit) + // slli t1, t1, 32 + // lui ra, ra, %hi(__xray_FunctionEntry/Exit) + // addi ra, ra, %lo(__xray_FunctionEntry/Exit) + // add ra, t1, ra // lui a0, %hi(function_id) // addi a0, a0, %lo(function_id) ;pass function id - // jalr t1 ;call Tracing hook - // ld a0, 0(sp) ;restore register a0 - // ld t1, 8(sp) ;restore register t1 - // ld t2, 16(sp) ;restore register t2 + // jalr ra ;call Tracing hook + // ld a0, 8(sp) ;restore register a0 + // ld t1, 16(sp) ;restore register t1 // ld ra, 24(sp) ;restore return address // addi sp, sp, 32 ;delete stack frame // @@ -142,104 +129,87 @@ static inline bool patchSled(const bool Enable, const uint32_t FuncId, // latter is ready. // // When |Enable|==false, we set back the first instruction in the sled to be - // J 60 bytes (rv32) - // J 76 bytes (rv64) + // J 52 bytes (rv32) + // J 68 bytes (rv64) uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address()); if (Enable) { - // If the ISA is RISCV 64, the Tracing Hook needs to be typecast to a 64 bit - // value #if SANITIZER_RISCV64 + // If the ISA is RV64, the Tracing Hook needs to be typecast to a 64 bit + // value. uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint64_t>(TracingHook)); uint32_t HiTracingHookAddr = hi20(reinterpret_cast<uint64_t>(TracingHook)); uint32_t HigherTracingHookAddr = lo12((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32); uint32_t HighestTracingHookAddr = hi20((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32); - // We typecast the Tracing Hook to a 32 bit value for RISCV32 #elif defined(__riscv) && (__riscv_xlen == 32) + // We typecast the Tracing Hook to a 32 bit value for RV32 uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint32_t>(TracingHook)); uint32_t HiTracingHookAddr = hi20((reinterpret_cast<uint32_t>(TracingHook)); #endif uint32_t LoFunctionID = lo12(FuncId); uint32_t HiFunctionID = hi20(FuncId); + // The sled that is patched in for RISCV64 defined below. We need the entire // sleds corresponding to both ISAs to be protected by defines because the // first few instructions are all different, because we store doubles in // case of RV64 and store words for RV32. Subsequently, we have LUI - and in // case of RV64, we need extra instructions from this point on, so we see // differences in addresses to which instructions are stored. + size_t Idx = 1U; + const uint32_t XLenBytes = __riscv_xlen / 8; #if SANITIZER_RISCV64 - Address[1] = encodeSTypeInstruction(PatchOpcodes::PO_SD, RegNum::RN_SP, - RegNum::RN_RA, 0x18); - Address[2] = encodeSTypeInstruction(PatchOpcodes::PO_SD, RegNum::RN_SP, - RegNum::RN_T2, 0x10); - Address[3] = encodeSTypeInstruction(PatchOpcodes::PO_SD, RegNum::RN_SP, - RegNum::RN_T1, 0x8); - Address[4] = encodeSTypeInstruction(PatchOpcodes::PO_SD, RegNum::RN_SP, - RegNum::RN_A0, 0x0); - Address[5] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T2, - HighestTracingHookAddr); - Address[6] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_T2, - RegNum::RN_T2, HigherTracingHookAddr); - Address[7] = encodeITypeInstruction(PatchOpcodes::PO_SLLI, RegNum::RN_T2, - RegNum::RN_T2, 0x20); - Address[8] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T1, - HiTracingHookAddr); - Address[9] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_T1, - RegNum::RN_T1, LoTracingHookAddr); - Address[10] = encodeRTypeInstruction(PatchOpcodes::PO_ADD, RegNum::RN_T1, - RegNum::RN_T2, RegNum::RN_T1); - Address[11] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_A0, - HiFunctionID); - Address[12] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_A0, - RegNum::RN_A0, LoFunctionID); - Address[13] = encodeITypeInstruction(PatchOpcodes::PO_JALR, RegNum::RN_T1, - RegNum::RN_RA, 0x0); - Address[14] = encodeITypeInstruction(PatchOpcodes::PO_LD, RegNum::RN_SP, - RegNum::RN_A0, 0x0); - Address[15] = encodeITypeInstruction(PatchOpcodes::PO_LD, RegNum::RN_SP, - RegNum::RN_T1, 0x8); - Address[16] = encodeITypeInstruction(PatchOpcodes::PO_LD, RegNum::RN_SP, - RegNum::RN_T2, 0x10); - Address[17] = encodeITypeInstruction(PatchOpcodes::PO_LD, RegNum::RN_SP, - RegNum::RN_RA, 0x18); - Address[18] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_SP, - RegNum::RN_SP, 0x20); - uint32_t CreateStackSpace = encodeITypeInstruction( - PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, 0xffe0); + const unsigned LoadOp = PatchOpcodes::PO_LD; + const unsigned StoreOp = PatchOpcodes::PO_SD; #elif defined(__riscv) && (__riscv_xlen == 32) - Address[1] = encodeSTypeInstruction(PatchOpcodes::PO_SW, RegNum::RN_SP, - RegNum::RN_RA, 0x0c); - Address[2] = encodeSTypeInstruction(PatchOpcodes::PO_SW, RegNum::RN_SP, - RegNum::RN_T2, 0x08); - Address[3] = encodeSTypeInstruction(PatchOpcodes::PO_SW, RegNum::RN_SP, - RegNum::RN_T1, 0x4); - Address[4] = encodeSTypeInstruction(PatchOpcodes::PO_SW, RegNum::RN_SP, - RegNum::RN_A0, 0x0); - Address[5] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T1, - HiTracingHookAddr); - Address[6] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_T1, - RegNum::RN_T1, LoTracingHookAddr); - Address[7] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_A0, - HiFunctionID); - Address[8] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_A0, - RegNum::RN_A0, LoFunctionID); - Address[9] = encodeITypeInstruction(PatchOpcodes::PO_JALR, RegNum::RN_T1, - RegNum::RN_RA, 0x0); - Address[10] = encodeITypeInstruction(PatchOpcodes::PO_LW, RegNum::RN_SP, - RegNum::RN_A0, 0x0); - Address[11] = encodeITypeInstruction(PatchOpcodes::PO_LW, RegNum::RN_SP, - RegNum::RN_T1, 0x4); - Address[12] = encodeITypeInstruction(PatchOpcodes::PO_LW, RegNum::RN_SP, - RegNum::RN_T2, 0x08); - Address[13] = encodeITypeInstruction(PatchOpcodes::PO_LW, RegNum::RN_SP, - RegNum::RN_RA, 0x0c); - Address[14] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_SP, - RegNum::RN_SP, 0x10); - uint32_t CreateStackSpace = encodeITypeInstruction( - PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, 0xfff0); + const unsigned LoadOp = PatchOpcodes::PO_LW; + const unsigned StoreOp = PatchOpcodes::PO_SW; +#endif + + Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP, + RegNum::RN_RA, 3 * XLenBytes); + Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP, + RegNum::RN_T1, 2 * XLenBytes); + Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP, + RegNum::RN_A0, XLenBytes); + +#if SANITIZER_RISCV64 + Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T1, + HighestTracingHookAddr); + Address[Idx++] = + encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_T1, + RegNum::RN_T1, HigherTracingHookAddr); + Address[Idx++] = encodeITypeInstruction(PatchOpcodes::PO_SLLI, + RegNum::RN_T1, RegNum::RN_T1, 32); +#endif + Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_RA, + HiTracingHookAddr); + Address[Idx++] = encodeITypeInstruction( + PatchOpcodes::PO_ADDI, RegNum::RN_RA, RegNum::RN_RA, LoTracingHookAddr); +#if SANITIZER_RISCV64 + Address[Idx++] = encodeRTypeInstruction(PatchOpcodes::PO_ADD, RegNum::RN_RA, + RegNum::RN_T1, RegNum::RN_RA); #endif + Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_A0, + HiFunctionID); + Address[Idx++] = encodeITypeInstruction( + PatchOpcodes::PO_ADDI, RegNum::RN_A0, RegNum::RN_A0, LoFunctionID); + Address[Idx++] = encodeITypeInstruction(PatchOpcodes::PO_JALR, + RegNum::RN_RA, RegNum::RN_RA, 0); + + Address[Idx++] = + encodeITypeInstruction(LoadOp, RegNum::RN_SP, RegNum::RN_A0, XLenBytes); + Address[Idx++] = encodeITypeInstruction(LoadOp, RegNum::RN_SP, + RegNum::RN_T1, 2 * XLenBytes); + Address[Idx++] = encodeITypeInstruction(LoadOp, RegNum::RN_SP, + RegNum::RN_RA, 3 * XLenBytes); + Address[Idx++] = encodeITypeInstruction( + PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, 4 * XLenBytes); + + uint32_t CreateStackSpace = encodeITypeInstruction( + PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, -4 * XLenBytes); + std::atomic_store_explicit( reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateStackSpace, std::memory_order_release); @@ -248,11 +218,11 @@ static inline bool patchSled(const bool Enable, const uint32_t FuncId, // Jump distance is different in both ISAs due to difference in size of // sleds #if SANITIZER_RISCV64 - PatchOpcodes::PO_J, RegNum::RN_R0, - 0x026); // jump encodes an offset in multiples of 2 bytes. 38*2 = 76 + PatchOpcodes::PO_J, RegNum::RN_X0, + 68); // jump encodes an offset of 68 #elif defined(__riscv) && (__riscv_xlen == 32) - PatchOpcodes::PO_J, RegNum::RN_R0, - 0x01e); // jump encodes an offset in multiples of 2 bytes. 30*2 = 60 + PatchOpcodes::PO_J, RegNum::RN_X0, + 52); // jump encodes an offset of 52 #endif std::atomic_store_explicit( reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateBranch, @@ -265,7 +235,7 @@ bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled, const XRayTrampolines &Trampolines, bool LogArgs) XRAY_NEVER_INSTRUMENT { - // We don't support Logging argument at this moment, so we always + // We don't support logging argument at this moment, so we always // use EntryTrampoline. return patchSled(Enable, FuncId, Sled, Trampolines.EntryTrampoline); } diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv32.S b/compiler-rt/lib/xray/xray_trampoline_riscv32.S index 9916e0321d24fd..f23457bb8c2822 100644 --- a/compiler-rt/lib/xray/xray_trampoline_riscv32.S +++ b/compiler-rt/lib/xray/xray_trampoline_riscv32.S @@ -14,58 +14,58 @@ .macro SAVE_ARG_REGISTERS // Push argument registers to stack - addi sp, sp, -100 - .cfi_def_cfa_offset 100 - sw ra, 96(sp) + addi sp, sp, -112 + .cfi_def_cfa_offset 112 + sw ra, 108(sp) .cfi_offset ra, -4 - sw a7, 92(sp) - sw a6, 88(sp) - sw a5, 84(sp) - sw a4, 80(sp) - sw a3, 76(sp) - sw a2, 72(sp) - sw a1, 68(sp) - sw a0, 64(sp) - fsd fa7, 56(sp) - fsd fa6, 48(sp) - fsd fa5, 40(sp) - fsd fa4, 32(sp) - fsd fa3, 24(sp) - fsd fa2, 16(sp) - fsd fa1, 8(sp) - fsd fa0, 0(sp) + sw a7, 104(sp) + sw a6, 100(sp) + sw a5, 96(sp) + sw a4, 92(sp) + sw a3, 88(sp) + sw a2, 84(sp) + sw a1, 80(sp) + sw a0, 76(sp) + fsd fa7, 64(sp) + fsd fa6, 56(sp) + fsd fa5, 48(sp) + fsd fa4, 40(sp) + fsd fa3, 32(sp) + fsd fa2, 24(sp) + fsd fa1, 16(sp) + fsd fa0, 8(sp) .endm .macro RESTORE_ARG_REGISTERS // Restore argument registers - fld fa0, 0(sp) - fld fa1, 8(sp) - fld fa2, 16(sp) - fld fa3, 24(sp) - fld fa4, 32(sp) - fld fa5, 40(sp) - fld fa6, 48(sp) - fld fa7, 56(sp) - lw a0, 64(sp) - lw a1, 68(sp) - lw a2, 72(sp) - lw a3, 76(sp) - lw a4, 80(sp) - lw a5, 84(sp) - lw a6, 88(sp) - lw a7, 92(sp) - lw ra, 96(sp) - addi sp, sp, 100 + fld fa0, 8(sp) + fld fa1, 16(sp) + fld fa2, 24(sp) + fld fa3, 32(sp) + fld fa4, 40(sp) + fld fa5, 48(sp) + fld fa6, 56(sp) + fld fa7, 64(sp) + lw a0, 76(sp) + lw a1, 80(sp) + lw a2, 84(sp) + lw a3, 88(sp) + lw a4, 92(sp) + lw a5, 96(sp) + lw a6, 100(sp) + lw a7, 104(sp) + lw ra, 108(sp) + addi sp, sp, 112 .endm .macro SAVE_RET_REGISTERS // Push return registers to stack - addi sp, sp, -28 - .cfi_def_cfa_offset 28 - sw ra, 24(sp) + addi sp, sp, -32 + .cfi_def_cfa_offset 32 + sw ra, 28(sp) .cfi_offset ra, -4 - sw a1, 20(sp) - sw a0, 16(sp) + sw a1, 24(sp) + sw a0, 20(sp) fsd fa1, 8(sp) fsd fa0, 0(sp) .endm @@ -74,10 +74,14 @@ // Restore return registers fld fa0, 0(sp) fld fa1, 8(sp) - lw a0, 16(sp) - lw a1, 20(sp) - lw ra, 24(sp) - addi sp, sp, 28 + lw a0, 20(sp) + lw a1, 24(sp) + lw ra, 28(sp) + addi sp, sp, 32 +.endm + +.macro LOAD_XLEN, rd, src + lw \rd, \src .endm #include "xray_trampoline_riscv_common.S" diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv64.S b/compiler-rt/lib/xray/xray_trampoline_riscv64.S index 102b9881567d9a..90c47a953eb4c0 100644 --- a/compiler-rt/lib/xray/xray_trampoline_riscv64.S +++ b/compiler-rt/lib/xray/xray_trampoline_riscv64.S @@ -14,70 +14,74 @@ .macro SAVE_ARG_REGISTERS // Push return registers to stack - addi sp, sp, -136 - .cfi_def_cfa_offset 136 - sd ra, 128(sp) + addi sp, sp, -144 + .cfi_def_cfa_offset 144 + sd ra, 136(sp) .cfi_offset ra, -8 - sd a7, 120(sp) - sd a6, 112(sp) - sd a5, 104(sp) - sd a4, 96(sp) - sd a3, 88(sp) - sd a2, 80(sp) - sd a1, 72(sp) - sd a0, 64(sp) - fsd fa7, 56(sp) - fsd fa6, 48(sp) - fsd fa5, 40(sp) - fsd fa4, 32(sp) - fsd fa3, 24(sp) - fsd fa2, 16(sp) - fsd fa1, 8(sp) - fsd fa0, 0(sp) + sd a7, 128(sp) + sd a6, 120(sp) + sd a5, 112(sp) + sd a4, 104(sp) + sd a3, 96(sp) + sd a2, 88(sp) + sd a1, 80(sp) + sd a0, 72(sp) + fsd fa7, 64(sp) + fsd fa6, 56(sp) + fsd fa5, 48(sp) + fsd fa4, 40(sp) + fsd fa3, 32(sp) + fsd fa2, 24(sp) + fsd fa1, 16(sp) + fsd fa0, 8(sp) .endm .macro SAVE_RET_REGISTERS // Push return registers to stack - addi sp, sp, -40 - .cfi_def_cfa_offset 40 - sd ra, 32(sp) + addi sp, sp, -48 + .cfi_def_cfa_offset 48 + sd ra, 40(sp) .cfi_offset ra, -8 - sd a1, 24(sp) - sd a0, 16(sp) - fsd fa1, 8(sp) - fsd fa0, 0(sp) + sd a1, 32(sp) + sd a0, 24(sp) + fsd fa1, 16(sp) + fsd fa0, 8(sp) .endm .macro RESTORE_RET_REGISTERS // Restore return registers - fld fa0, 0(sp) - fld fa1, 8(sp) - ld a0, 16(sp) - ld a1, 24(sp) - ld ra, 32(sp) - addi sp, sp, 40 + fld fa0, 8(sp) + fld fa1, 16(sp) + ld a0, 24(sp) + ld a1, 32(sp) + ld ra, 40(sp) + addi sp, sp, 48 .endm .macro RESTORE_ARG_REGISTERS // Restore argument registers - fld fa0, 0(sp) - fld fa1, 8(sp) - fld fa2, 16(sp) - fld fa3, 24(sp) - fld fa4, 32(sp) - fld fa5, 40(sp) - fld fa6, 48(sp) - fld fa7, 56(sp) - ld a0, 64(sp) - ld a1, 72(sp) - ld a2, 80(sp) - ld a3, 88(sp) - ld a4, 96(sp) - ld a5, 104(sp) - ld a6, 112(sp) - ld a7, 120(sp) - ld ra, 128(sp) - addi sp, sp, 136 + fld fa0, 8(sp) + fld fa1, 16(sp) + fld fa2, 24(sp) + fld fa3, 32(sp) + fld fa4, 40(sp) + fld fa5, 48(sp) + fld fa6, 56(sp) + fld fa7, 64(sp) + ld a0, 72(sp) + ld a1, 80(sp) + ld a2, 88(sp) + ld a3, 96(sp) + ld a4, 104(sp) + ld a5, 112(sp) + ld a6, 120(sp) + ld a7, 128(sp) + ld ra, 136(sp) + addi sp, sp, 144 +.endm + +.macro LOAD_XLEN, rd, src + ld \rd, \src .endm #include "xray_trampoline_riscv_common.S" diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv_common.S b/compiler-rt/lib/xray/xray_trampoline_riscv_common.S index 7ce6fa0dcda31b..95f5a9b1189a88 100644 --- a/compiler-rt/lib/xray/xray_trampoline_riscv_common.S +++ b/compiler-rt/lib/xray/xray_trampoline_riscv_common.S @@ -25,7 +25,7 @@ ASM_SYMBOL(__xray_FunctionEntry): // Load the handler function pointer into a2 la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) - ld a2, 0(a2) + LOAD_XLEN a2, 0(a2) // Handler address will be null if it is not set beq a2, x0, 1f @@ -52,7 +52,7 @@ ASM_SYMBOL(__xray_FunctionExit): // Load the handler function pointer into a2 la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) - ld a2, 0(a2) + LOAD_XLEN a2, 0(a2) // Handler address will be null if it is not set beq a2, x0, 1f @@ -79,7 +79,7 @@ ASM_SYMBOL(__xray_FunctionTailExit): // Load the handler function pointer into a2 la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) - ld a2, 0(a2) + LOAD_XLEN a2, 0(a2) // Handler address will be null if it is not set beq a2, x0, 1f diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 5382751b0d8502..0d1047aba5546d 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -501,19 +501,19 @@ void RISCVAsmPrinter::emitSled(const MachineInstr *MI, SledKind Kind) { // .Lxray_sled_N // ALIGN // J .tmpN - // 29 or 37 C.NOP instructions + // 25 or 33 C.NOP instructions // .tmpN // The following variable holds the count of the number of NOPs to be patched // in for XRay instrumentation during compilation. - // Note that RV64 and RV32 each has a sled of 76 and 60 bytes, respectively. + // Note that RV64 and RV32 each has a sled of 68 and 52 bytes, respectively. // Assuming we're using JAL to jump to .tmpN, then we only need - // (76 - 4)/2 = 36 NOPs for RV64 and (60 - 4)/2 = 28 for RV32. However, there + // (68 - 4)/2 = 32 NOPs for RV64 and (52 - 4)/2 = 24 for RV32. However, there // is a chance that we'll use C.JAL instead, so an additional NOP is needed. const uint8_t NoopsInSledCount = MI->getParent()->getParent()->getSubtarget<RISCVSubtarget>().is64Bit() - ? 37 - : 29; + ? 33 + : 25; OutStreamer->emitCodeAlignment(Align(4), &getSubtargetInfo()); auto CurSled = OutContext.createTempSymbol("xray_sled_", true); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 933e776da47404..a1d68bce4a8213 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1575,6 +1575,26 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { // No patch bytes means at most a PseudoCall is emitted return std::max(NumBytes, 8U); } + case TargetOpcode::PATCHABLE_FUNCTION_ENTER: + case TargetOpcode::PATCHABLE_FUNCTION_EXIT: + case TargetOpcode::PATCHABLE_TAIL_CALL: { + const MachineFunction &MF = *MI.getParent()->getParent(); + const Function &F = MF.getFunction(); + if (Opcode == TargetOpcode::PATCHABLE_FUNCTION_ENTER && + F.hasFnAttribute("patchable-function-entry")) { + unsigned Num; + if (F.getFnAttribute("patchable-function-entry") + .getValueAsString() + .getAsInteger(10, Num)) + return get(Opcode).getSize(); + + // Number of C.NOP or NOP + return (STI.hasStdExtCOrZca() ? 2 : 4) * Num; + } + // XRay uses C.JAL + 25 or 33 C.NOP for each sled in RV32 and RV64, + // respectively. + return STI.is64Bit() ? 68 : 52; + } default: return get(Opcode).getSize(); } diff --git a/llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll b/llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll index a8fc3bff0b0f5b..185dab38b33f87 100644 --- a/llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll +++ b/llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll @@ -5,14 +5,14 @@ define i32 @foo() nounwind "function-instrument"="xray-always" { ; CHECK: .p2align 2 ; CHECK-LABEL: .Lxray_sled_0: ; CHECK-NEXT: j .Ltmp0 -; CHECK-COUNT-29: nop +; CHECK-COUNT-25: nop ; CHECK-RISCV64-COUNT-8: nop ; CHECK-LABEL: .Ltmp0: ret i32 0 ; CHECK: .p2align 2 ; CHECK-LABEL: .Lxray_sled_1: ; CHECK-NEXT: j .Ltmp1 -; CHECK-COUNT-29: nop +; CHECK-COUNT-25: nop ; CHECK-RISCV64-COUNT-8: nop ; CHECK-LABEL: .Ltmp1: ; CHECK-NEXT: ret >From 8e07007f3207a4b60fd5ec61972d70ac0701a287 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu <min....@sifive.com> Date: Tue, 3 Dec 2024 15:14:49 -0800 Subject: [PATCH 3/5] Do not use SANITIZER_RISCV64 Use defined (__riscv) && (__riscv_xlen == 32/64) instead. --- compiler-rt/lib/xray/xray_interface.cpp | 2 +- compiler-rt/lib/xray/xray_riscv.cpp | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp index 637972856ecaa7..4ec492c266d809 100644 --- a/compiler-rt/lib/xray/xray_interface.cpp +++ b/compiler-rt/lib/xray/xray_interface.cpp @@ -57,7 +57,7 @@ static const int16_t cSledLength = 64; static const int16_t cSledLength = 8; #elif defined(__hexagon__) static const int16_t cSledLength = 20; -#elif SANITIZER_RISCV64 +#elif defined(__riscv) && (__riscv_xlen == 64) static const int16_t cSledLength = 68; #elif defined(__riscv) && (__riscv_xlen == 32) static const int16_t cSledLength = 52; diff --git a/compiler-rt/lib/xray/xray_riscv.cpp b/compiler-rt/lib/xray/xray_riscv.cpp index 644c65029b8c71..bd71c794b7771e 100644 --- a/compiler-rt/lib/xray/xray_riscv.cpp +++ b/compiler-rt/lib/xray/xray_riscv.cpp @@ -134,7 +134,7 @@ static inline bool patchSled(const bool Enable, const uint32_t FuncId, uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address()); if (Enable) { -#if SANITIZER_RISCV64 +#if __riscv_xlen == 64 // If the ISA is RV64, the Tracing Hook needs to be typecast to a 64 bit // value. uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint64_t>(TracingHook)); @@ -143,7 +143,7 @@ static inline bool patchSled(const bool Enable, const uint32_t FuncId, lo12((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32); uint32_t HighestTracingHookAddr = hi20((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32); -#elif defined(__riscv) && (__riscv_xlen == 32) +#elif __riscv_xlen == 32 // We typecast the Tracing Hook to a 32 bit value for RV32 uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint32_t>(TracingHook)); uint32_t HiTracingHookAddr = hi20((reinterpret_cast<uint32_t>(TracingHook)); @@ -159,10 +159,10 @@ static inline bool patchSled(const bool Enable, const uint32_t FuncId, // differences in addresses to which instructions are stored. size_t Idx = 1U; const uint32_t XLenBytes = __riscv_xlen / 8; -#if SANITIZER_RISCV64 +#if __riscv_xlen == 64 const unsigned LoadOp = PatchOpcodes::PO_LD; const unsigned StoreOp = PatchOpcodes::PO_SD; -#elif defined(__riscv) && (__riscv_xlen == 32) +#elif __riscv_xlen == 32 const unsigned LoadOp = PatchOpcodes::PO_LW; const unsigned StoreOp = PatchOpcodes::PO_SW; #endif @@ -174,7 +174,7 @@ static inline bool patchSled(const bool Enable, const uint32_t FuncId, Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP, RegNum::RN_A0, XLenBytes); -#if SANITIZER_RISCV64 +#if __riscv_xlen == 64 Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T1, HighestTracingHookAddr); Address[Idx++] = @@ -187,7 +187,7 @@ static inline bool patchSled(const bool Enable, const uint32_t FuncId, HiTracingHookAddr); Address[Idx++] = encodeITypeInstruction( PatchOpcodes::PO_ADDI, RegNum::RN_RA, RegNum::RN_RA, LoTracingHookAddr); -#if SANITIZER_RISCV64 +#if __riscv_xlen == 64 Address[Idx++] = encodeRTypeInstruction(PatchOpcodes::PO_ADD, RegNum::RN_RA, RegNum::RN_T1, RegNum::RN_RA); #endif @@ -217,10 +217,10 @@ static inline bool patchSled(const bool Enable, const uint32_t FuncId, uint32_t CreateBranch = encodeJTypeInstruction( // Jump distance is different in both ISAs due to difference in size of // sleds -#if SANITIZER_RISCV64 +#if __riscv_xlen == 64 PatchOpcodes::PO_J, RegNum::RN_X0, 68); // jump encodes an offset of 68 -#elif defined(__riscv) && (__riscv_xlen == 32) +#elif __riscv_xlen == 32 PatchOpcodes::PO_J, RegNum::RN_X0, 52); // jump encodes an offset of 52 #endif >From bc7f73948278218b80fc3acbeb2f4fa8c9ae358f Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu <min....@sifive.com> Date: Wed, 4 Dec 2024 10:50:27 -0800 Subject: [PATCH 4/5] Remove CFI directives on register locations And use CFA macros provided by sanitizer_common --- compiler-rt/lib/xray/xray_trampoline_riscv32.S | 8 ++++---- compiler-rt/lib/xray/xray_trampoline_riscv64.S | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv32.S b/compiler-rt/lib/xray/xray_trampoline_riscv32.S index f23457bb8c2822..c64476c2a9f496 100644 --- a/compiler-rt/lib/xray/xray_trampoline_riscv32.S +++ b/compiler-rt/lib/xray/xray_trampoline_riscv32.S @@ -15,9 +15,8 @@ .macro SAVE_ARG_REGISTERS // Push argument registers to stack addi sp, sp, -112 - .cfi_def_cfa_offset 112 + CFI_DEF_CFA_OFFSET(112) sw ra, 108(sp) - .cfi_offset ra, -4 sw a7, 104(sp) sw a6, 100(sp) sw a5, 96(sp) @@ -56,14 +55,14 @@ lw a7, 104(sp) lw ra, 108(sp) addi sp, sp, 112 + CFI_DEF_CFA_OFFSET(0) .endm .macro SAVE_RET_REGISTERS // Push return registers to stack addi sp, sp, -32 - .cfi_def_cfa_offset 32 + CFI_DEF_CFA_OFFSET(32) sw ra, 28(sp) - .cfi_offset ra, -4 sw a1, 24(sp) sw a0, 20(sp) fsd fa1, 8(sp) @@ -78,6 +77,7 @@ lw a1, 24(sp) lw ra, 28(sp) addi sp, sp, 32 + CFI_DEF_CFA_OFFSET(0) .endm .macro LOAD_XLEN, rd, src diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv64.S b/compiler-rt/lib/xray/xray_trampoline_riscv64.S index 90c47a953eb4c0..80e30691e44795 100644 --- a/compiler-rt/lib/xray/xray_trampoline_riscv64.S +++ b/compiler-rt/lib/xray/xray_trampoline_riscv64.S @@ -15,9 +15,8 @@ .macro SAVE_ARG_REGISTERS // Push return registers to stack addi sp, sp, -144 - .cfi_def_cfa_offset 144 + CFI_DEF_CFA_OFFSET(144) sd ra, 136(sp) - .cfi_offset ra, -8 sd a7, 128(sp) sd a6, 120(sp) sd a5, 112(sp) @@ -39,9 +38,8 @@ .macro SAVE_RET_REGISTERS // Push return registers to stack addi sp, sp, -48 - .cfi_def_cfa_offset 48 + CFI_DEF_CFA_OFFSET(48) sd ra, 40(sp) - .cfi_offset ra, -8 sd a1, 32(sp) sd a0, 24(sp) fsd fa1, 16(sp) @@ -56,6 +54,7 @@ ld a1, 32(sp) ld ra, 40(sp) addi sp, sp, 48 + CFI_DEF_CFA_OFFSET(0) .endm .macro RESTORE_ARG_REGISTERS @@ -78,6 +77,7 @@ ld a7, 128(sp) ld ra, 136(sp) addi sp, sp, 144 + CFI_DEF_CFA_OFFSET(0) .endm .macro LOAD_XLEN, rd, src >From 3e000a804c05641e7646d0c32497feaab643cfdc Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu <min....@sifive.com> Date: Wed, 4 Dec 2024 10:54:18 -0800 Subject: [PATCH 5/5] fixup! Remove CFI directives on register locations --- compiler-rt/lib/xray/xray_trampoline_riscv32.S | 2 ++ compiler-rt/lib/xray/xray_trampoline_riscv64.S | 2 ++ compiler-rt/lib/xray/xray_trampoline_riscv_common.S | 1 - 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv32.S b/compiler-rt/lib/xray/xray_trampoline_riscv32.S index c64476c2a9f496..05e3d61e5ef71b 100644 --- a/compiler-rt/lib/xray/xray_trampoline_riscv32.S +++ b/compiler-rt/lib/xray/xray_trampoline_riscv32.S @@ -12,6 +12,8 @@ // //===----------------------------------------------------------------------===// +#include "../sanitizer_common/sanitizer_asm.h" + .macro SAVE_ARG_REGISTERS // Push argument registers to stack addi sp, sp, -112 diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv64.S b/compiler-rt/lib/xray/xray_trampoline_riscv64.S index 80e30691e44795..692350eaaa38e0 100644 --- a/compiler-rt/lib/xray/xray_trampoline_riscv64.S +++ b/compiler-rt/lib/xray/xray_trampoline_riscv64.S @@ -12,6 +12,8 @@ // //===----------------------------------------------------------------------===// +#include "../sanitizer_common/sanitizer_asm.h" + .macro SAVE_ARG_REGISTERS // Push return registers to stack addi sp, sp, -144 diff --git a/compiler-rt/lib/xray/xray_trampoline_riscv_common.S b/compiler-rt/lib/xray/xray_trampoline_riscv_common.S index 95f5a9b1189a88..96739d6a1b8804 100644 --- a/compiler-rt/lib/xray/xray_trampoline_riscv_common.S +++ b/compiler-rt/lib/xray/xray_trampoline_riscv_common.S @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "../builtins/assembly.h" -#include "../sanitizer_common/sanitizer_asm.h" .text .p2align 2 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits