https://github.com/SharonXSharon created https://github.com/llvm/llvm-project/pull/142884
The patch has two main parts, 1) add support for memprof on Darwin platform 2) add a new binary access profile, including the format definition, the serialization from the memprof compiler-rt, and the code in llvm-profdata to deserialize. For 1) @manman-ren tried to upstream earlier on at https://github.com/llvm/llvm-project/pull/69640/files, this patch has some changes compared with it. Currently, in order to properly upstream this patch, we need to resolve 3 issues, a) The Darwin support may have some issues in the interception part, especially the `llvm-project/compiler-rt/lib/memprof/memprof_interceptors.cpp/.h` files, given interceptions on Apple is a bit different. (For our internal testing, we are commenting out a lot of the interceptions). In theory we should be able to follow what Asan is doing for Apple, but I am not super familiar with asan interception either, so having a little learning curve here. b) We need to discuss how to integrate the binary access profile format into the existing raw profile format, rather than creating a new format c) we have been using the `MEM_GRANULARITY` = 8 bytes before the `HISTOGRAM_GRANULARITY` was added, now we should be able to just reuse the histogram code? Does the histogram just effectively make the profile granularity being 8 bytes and then show the access count of every 8 bytes memblock in a histogram? Please let me know your thoughts/insights on the above 3 issues, thanks a lot! @snehasish @teresajohnson @mingmingl-llvm @manman-ren >From 0704f57845fbcd9dac8e8b26a54474f552ee2a60 Mon Sep 17 00:00:00 2001 From: Sharon Xu <sharo...@fb.com> Date: Wed, 4 Jun 2025 17:55:27 -0700 Subject: [PATCH] [memprof][darwin] Support memprof on Darwin platform and add binary access profiles --- clang/lib/Driver/ToolChains/Darwin.cpp | 95 +++--- clang/test/Driver/fmemprof-darwin.c | 11 + .../cmake/Modules/AllSupportedArchDefs.cmake | 2 +- compiler-rt/cmake/config-ix.cmake | 2 +- .../profile/MemProfBinaryAccessData.inc | 41 +++ compiler-rt/include/profile/MemProfData.inc | 2 + compiler-rt/lib/memprof/CMakeLists.txt | 237 ++++++++------- compiler-rt/lib/memprof/memprof_allocator.cpp | 38 ++- compiler-rt/lib/memprof/memprof_allocator.h | 29 +- compiler-rt/lib/memprof/memprof_flags.inc | 2 + .../lib/memprof/memprof_interceptors.cpp | 2 + .../lib/memprof/memprof_interceptors.h | 5 + compiler-rt/lib/memprof/memprof_linux.cpp | 5 +- compiler-rt/lib/memprof/memprof_mac.cpp | 56 ++++ .../lib/memprof/memprof_malloc_linux.cpp | 5 +- .../lib/memprof/memprof_malloc_mac.cpp | 72 +++++ compiler-rt/lib/memprof/memprof_mapping.h | 14 +- .../lib/memprof/memprof_new_delete.cpp | 41 ++- .../lib/memprof/memprof_rawprofile.cpp | 134 ++++++++ compiler-rt/lib/memprof/memprof_rawprofile.h | 6 + compiler-rt/test/memprof/CMakeLists.txt | 89 +++++- compiler-rt/test/memprof/lit.cfg.py | 2 +- .../MemProfBinaryAccessData.inc.cpp | 47 +++ llvm/include/llvm/ProfileData/MemProfData.inc | 2 + llvm/include/module.modulemap | 1 + llvm/tools/llvm-profdata/llvm-profdata.cpp | 286 ++++++++++++++++-- 26 files changed, 1007 insertions(+), 219 deletions(-) create mode 100644 clang/test/Driver/fmemprof-darwin.c create mode 100644 compiler-rt/include/profile/MemProfBinaryAccessData.inc create mode 100644 compiler-rt/lib/memprof/memprof_mac.cpp create mode 100644 compiler-rt/lib/memprof/memprof_malloc_mac.cpp create mode 100644 llvm/include/llvm/ProfileData/MemProfBinaryAccessData.inc.cpp diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 26e24ad0ab17c..8f7036c77b1d1 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -207,8 +207,8 @@ static bool shouldLinkerNotDedup(bool IsLinkerOnlyAction, const ArgList &Args) { return true; if (A->getOption().matches(options::OPT_O)) return llvm::StringSwitch<bool>(A->getValue()) - .Case("1", true) - .Default(false); + .Case("1", true) + .Default(false); return false; // OPT_Ofast & OPT_O4 } @@ -310,8 +310,8 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, if ((A = Args.getLastArg(options::OPT_compatibility__version)) || (A = Args.getLastArg(options::OPT_current__version)) || (A = Args.getLastArg(options::OPT_install__name))) - D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) - << "-dynamiclib"; + D.Diag(diag::err_drv_argument_only_allowed_with) + << A->getAsString(Args) << "-dynamiclib"; Args.AddLastArg(CmdArgs, options::OPT_force__flat__namespace); Args.AddLastArg(CmdArgs, options::OPT_keep__private__externs); @@ -326,8 +326,8 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, (A = Args.getLastArg(options::OPT_force__flat__namespace)) || (A = Args.getLastArg(options::OPT_keep__private__externs)) || (A = Args.getLastArg(options::OPT_private__bundle))) - D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) - << "-dynamiclib"; + D.Diag(diag::err_drv_argument_not_allowed_with) + << A->getAsString(Args) << "-dynamiclib"; Args.AddAllArgsTranslated(CmdArgs, options::OPT_compatibility__version, "-dylib_compatibility_version"); @@ -1109,7 +1109,7 @@ VersionTuple MachO::getLinkerVersion(const llvm::opt::ArgList &Args) const { if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) if (NewLinkerVersion.tryParse(A->getValue())) getDriver().Diag(diag::err_drv_invalid_version_number) - << A->getAsString(Args); + << A->getAsString(Args); LinkerVersion = NewLinkerVersion; return *LinkerVersion; @@ -1387,20 +1387,20 @@ std::string Darwin::getCompilerRT(const ArgList &, StringRef Component, StringRef Darwin::getPlatformFamily() const { switch (TargetPlatform) { - case DarwinPlatformKind::MacOS: + case DarwinPlatformKind::MacOS: + return "MacOSX"; + case DarwinPlatformKind::IPhoneOS: + if (TargetEnvironment == MacCatalyst) return "MacOSX"; - case DarwinPlatformKind::IPhoneOS: - if (TargetEnvironment == MacCatalyst) - return "MacOSX"; - return "iPhone"; - case DarwinPlatformKind::TvOS: - return "AppleTV"; - case DarwinPlatformKind::WatchOS: - return "Watch"; - case DarwinPlatformKind::DriverKit: - return "DriverKit"; - case DarwinPlatformKind::XROS: - return "XR"; + return "iPhone"; + case DarwinPlatformKind::TvOS: + return "AppleTV"; + case DarwinPlatformKind::WatchOS: + return "Watch"; + case DarwinPlatformKind::DriverKit: + return "DriverKit"; + case DarwinPlatformKind::XROS: + return "XR"; } llvm_unreachable("Unsupported platform"); } @@ -1522,9 +1522,9 @@ void DarwinClang::AddLinkSanitizerLibArgs(const ArgList &Args, AddLinkRuntimeLib(Args, CmdArgs, Sanitizer, RLO, Shared); } -ToolChain::RuntimeLibType DarwinClang::GetRuntimeLibType( - const ArgList &Args) const { - if (Arg* A = Args.getLastArg(options::OPT_rtlib_EQ)) { +ToolChain::RuntimeLibType +DarwinClang::GetRuntimeLibType(const ArgList &Args) const { + if (Arg *A = Args.getLastArg(options::OPT_rtlib_EQ)) { StringRef Value = A->getValue(); if (Value != "compiler-rt" && Value != "platform") getDriver().Diag(clang::diag::err_drv_unsupported_rtlib_for_platform) @@ -1612,13 +1612,15 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args, if (Sanitize.needsFuzzer() && !Args.hasArg(options::OPT_dynamiclib)) { AddLinkSanitizerLibArgs(Args, CmdArgs, "fuzzer", /*shared=*/false); - // Libfuzzer is written in C++ and requires libcxx. - AddCXXStdlibLibArgs(Args, CmdArgs); + // Libfuzzer is written in C++ and requires libcxx. + AddCXXStdlibLibArgs(Args, CmdArgs); } if (Sanitize.needsStatsRt()) { AddLinkRuntimeLib(Args, CmdArgs, "stats_client", RLO_AlwaysLink); AddLinkSanitizerLibArgs(Args, CmdArgs, "stats"); } + if (Sanitize.needsMemProfRt()) + AddLinkSanitizerLibArgs(Args, CmdArgs, "memprof"); } if (Sanitize.needsMemProfRt()) @@ -1964,8 +1966,9 @@ getDeploymentTargetFromOSVersionArg(DerivedArgList &Args, if (iOSVersion || TvOSVersion || WatchOSVersion) { TheDriver.Diag(diag::err_drv_argument_not_allowed_with) << macOSVersion->getAsString(Args) - << (iOSVersion ? iOSVersion - : TvOSVersion ? TvOSVersion : WatchOSVersion) + << (iOSVersion ? iOSVersion + : TvOSVersion ? TvOSVersion + : WatchOSVersion) ->getAsString(Args); } return DarwinPlatform::createOSVersionArg(Darwin::MacOS, macOSVersion, @@ -2005,13 +2008,9 @@ getDeploymentTargetFromEnvironmentVariables(const Driver &TheDriver, const llvm::Triple &Triple) { std::string Targets[Darwin::LastDarwinPlatform + 1]; const char *EnvVars[] = { - "MACOSX_DEPLOYMENT_TARGET", - "IPHONEOS_DEPLOYMENT_TARGET", - "TVOS_DEPLOYMENT_TARGET", - "WATCHOS_DEPLOYMENT_TARGET", - "DRIVERKIT_DEPLOYMENT_TARGET", - "XROS_DEPLOYMENT_TARGET" - }; + "MACOSX_DEPLOYMENT_TARGET", "IPHONEOS_DEPLOYMENT_TARGET", + "TVOS_DEPLOYMENT_TARGET", "WATCHOS_DEPLOYMENT_TARGET", + "DRIVERKIT_DEPLOYMENT_TARGET", "XROS_DEPLOYMENT_TARGET"}; static_assert(std::size(EnvVars) == Darwin::LastDarwinPlatform + 1, "Missing platform"); for (const auto &I : llvm::enumerate(llvm::ArrayRef(EnvVars))) { @@ -2544,9 +2543,9 @@ void AppleMachO::AddClangSystemIncludeArgs( // Add <sysroot>/usr/local/include if (!NoStdInc && !NoStdlibInc) { - SmallString<128> P(Sysroot); - llvm::sys::path::append(P, "usr", "local", "include"); - addSystemInclude(DriverArgs, CC1Args, P); + SmallString<128> P(Sysroot); + llvm::sys::path::append(P, "usr", "local", "include"); + addSystemInclude(DriverArgs, CC1Args, P); } // Add the Clang builtin headers (<resource>/include) @@ -2597,12 +2596,10 @@ void DarwinClang::AddClangSystemIncludeArgs( addSystemFrameworkIncludes(DriverArgs, CC1Args, {P1, P2, P3}); } -bool DarwinClang::AddGnuCPlusPlusIncludePaths(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::SmallString<128> Base, - llvm::StringRef Version, - llvm::StringRef ArchDir, - llvm::StringRef BitDir) const { +bool DarwinClang::AddGnuCPlusPlusIncludePaths( + const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, + llvm::SmallString<128> Base, llvm::StringRef Version, + llvm::StringRef ArchDir, llvm::StringRef BitDir) const { llvm::sys::path::append(Base, Version); // Add the base dir @@ -2962,8 +2959,7 @@ DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args, return DAL; } -void MachO::AddLinkRuntimeLibArgs(const ArgList &Args, - ArgStringList &CmdArgs, +void MachO::AddLinkRuntimeLibArgs(const ArgList &Args, ArgStringList &CmdArgs, bool ForceLinkBuiltinRT) const { // Embedded targets are simple at the moment, not supporting sanitizers and // with different libraries for each member of the product { static, PIC } x @@ -3151,8 +3147,8 @@ void Darwin::addClangTargetOptions( // the _Builtin_ modules. e.g. <inttypes.h> on darwin includes <stdint.h>. // The builtin <stdint.h> include-nexts <stdint.h>. When both of those // darwin headers are in the Darwin module, there's a module cycle Darwin -> - // _Builtin_stdint -> Darwin (i.e. inttypes.h (darwin) -> stdint.h (builtin) -> - // stdint.h (darwin)). This is fixed in later versions of the darwin SDK, + // _Builtin_stdint -> Darwin (i.e. inttypes.h (darwin) -> stdint.h (builtin) + // -> stdint.h (darwin)). This is fixed in later versions of the darwin SDK, // but until then, the builtin headers need to join the system modules. // i.e. when the builtin stdint.h is in the Darwin module too, the cycle // goes away. Note that -fbuiltin-headers-in-system-modules does nothing @@ -3271,7 +3267,7 @@ Darwin::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, // it is set here. if (isTargetWatchOSBased() || isTargetDriverKit() || isTargetXROS() || (isTargetIOSBased() && !isIPhoneOSVersionLT(6, 0))) { - for (ArgList::iterator it = DAL->begin(), ie = DAL->end(); it != ie; ) { + for (ArgList::iterator it = DAL->begin(), ie = DAL->end(); it != ie;) { Arg *A = *it; ++it; if (A->getOption().getID() != options::OPT_mkernel && @@ -3297,7 +3293,8 @@ Darwin::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, return DAL; } -ToolChain::UnwindTableLevel MachO::getDefaultUnwindTableLevel(const ArgList &Args) const { +ToolChain::UnwindTableLevel +MachO::getDefaultUnwindTableLevel(const ArgList &Args) const { // Unwind tables are not emitted if -fno-exceptions is supplied (except when // targeting x86_64). if (getArch() == llvm::Triple::x86_64 || diff --git a/clang/test/Driver/fmemprof-darwin.c b/clang/test/Driver/fmemprof-darwin.c new file mode 100644 index 0000000000000..2b1ad67d176ff --- /dev/null +++ b/clang/test/Driver/fmemprof-darwin.c @@ -0,0 +1,11 @@ + +// Test sanitizer link flags on Darwin. + +// RUN: %clang -### --target=x86_64-darwin \ +// RUN: -stdlib=platform -fmemory-profile %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-MEMPROF %s + +// CHECK-MEMPROF: "{{.*}}ld{{(.exe)?}}" +// CHECK-MEMPROF-SAME: libclang_rt.memprof_osx_dynamic.dylib" +// CHECK-MEMPROF-SAME: "-rpath" "@executable_path" +// CHECK-MEMPROF-SAME: "-rpath" "{{[^"]*}}lib{{[^"]*}}darwin" diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index ca45d7bd2af7f..d89c07f690740 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -78,7 +78,7 @@ else() endif() set(ALL_NSAN_SUPPORTED_ARCH ${X86_64}) set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64} ${RISCV64}) -set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64}) +set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64} ${ARM64}) set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC32} ${PPC64} ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} ${RISCV32} ${RISCV64} ${LOONGARCH64} ${WASM32}) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index e3310b1ff0e2c..305dbadde71a9 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -829,7 +829,7 @@ else() endif() if (COMPILER_RT_HAS_SANITIZER_COMMON AND MEMPROF_SUPPORTED_ARCH AND - OS_NAME MATCHES "Linux") + OS_NAME MATCHES "Linux|Darwin") set(COMPILER_RT_HAS_MEMPROF TRUE) else() set(COMPILER_RT_HAS_MEMPROF FALSE) diff --git a/compiler-rt/include/profile/MemProfBinaryAccessData.inc b/compiler-rt/include/profile/MemProfBinaryAccessData.inc new file mode 100644 index 0000000000000..25333a8f7fcc5 --- /dev/null +++ b/compiler-rt/include/profile/MemProfBinaryAccessData.inc @@ -0,0 +1,41 @@ +/*===-- MemProfBinaryAccessData.inc - MemProf profiling runtime macros -*- C++ +-*-======== *\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ +/* + * This file defines the macros for memprof profiling data structures + * for binary access. + * + * This file has two identical copies. The primary copy lives in LLVM and + * the other one sits in compiler-rt/include/profile directory. To make changes + * in this file, first modify the primary copy and copy it over to compiler-rt. + * Testing of any change in this file can start only after the two copies are + * synced up. + * +\*===----------------------------------------------------------------------===*/ + +// A 64-bit magic number to uniquely identify the raw binary memprof binary +// access profile file. +#define MEMPROF_BINARY_ACCESS_RAW_MAGIC_64 \ + ((uint64_t)255 << 56 | (uint64_t)'f' << 48 | (uint64_t)'b' << 40 | \ + (uint64_t)'m' << 32 | (uint64_t)'b' << 24 | (uint64_t)'i' << 16 | \ + (uint64_t)'n' << 8 | (uint64_t)129) + +// The version number of the raw binary format. +#define MEMPROF_BINARY_ACCESS_RAW_VERSION 1ULL + +// A struct describing the header used for the raw binary memprof profile +// format. +PACKED(struct BinaryAccessHeader { + uint64_t Magic; + uint64_t Version; + uint64_t TotalSize; + uint64_t SegmentOffset; + uint64_t NumSegments; + uint64_t MemAddressOffset; + uint64_t NumMemBlockAddresses; +}); diff --git a/compiler-rt/include/profile/MemProfData.inc b/compiler-rt/include/profile/MemProfData.inc index 3f785bd23fce3..f30e95bc51111 100644 --- a/compiler-rt/include/profile/MemProfData.inc +++ b/compiler-rt/include/profile/MemProfData.inc @@ -45,6 +45,8 @@ namespace llvm { namespace memprof { +#include "MemProfBinaryAccessData.inc" + // A struct describing the header used for the raw binary memprof profile format. PACKED(struct Header { uint64_t Magic; diff --git a/compiler-rt/lib/memprof/CMakeLists.txt b/compiler-rt/lib/memprof/CMakeLists.txt index e6d99daca6ee7..d1c4d607ef396 100644 --- a/compiler-rt/lib/memprof/CMakeLists.txt +++ b/compiler-rt/lib/memprof/CMakeLists.txt @@ -7,7 +7,9 @@ set(MEMPROF_SOURCES memprof_interceptors.cpp memprof_interceptors_memintrinsics.cpp memprof_linux.cpp + memprof_mac.cpp memprof_malloc_linux.cpp + memprof_malloc_mac.cpp memprof_mibmap.cpp memprof_posix.cpp memprof_rawprofile.cpp @@ -86,124 +88,145 @@ add_compiler_rt_object_libraries(RTMemprof_dynamic DEFS ${MEMPROF_DYNAMIC_DEFINITIONS} DEPS ${MEMPROF_DEPS}) -add_compiler_rt_object_libraries(RTMemprof - ARCHS ${MEMPROF_SUPPORTED_ARCH} - SOURCES ${MEMPROF_SOURCES} - ADDITIONAL_HEADERS ${MEMPROF_HEADERS} - CFLAGS ${MEMPROF_CFLAGS} - DEFS ${MEMPROF_COMMON_DEFINITIONS} - DEPS ${MEMPROF_DEPS}) -add_compiler_rt_object_libraries(RTMemprof_cxx - ARCHS ${MEMPROF_SUPPORTED_ARCH} - SOURCES ${MEMPROF_CXX_SOURCES} - ADDITIONAL_HEADERS ${MEMPROF_HEADERS} - CFLAGS ${MEMPROF_CFLAGS} - DEFS ${MEMPROF_COMMON_DEFINITIONS} - DEPS ${MEMPROF_DEPS}) -add_compiler_rt_object_libraries(RTMemprof_preinit - ARCHS ${MEMPROF_SUPPORTED_ARCH} - SOURCES ${MEMPROF_PREINIT_SOURCES} - ADDITIONAL_HEADERS ${MEMPROF_HEADERS} - CFLAGS ${MEMPROF_CFLAGS} - DEFS ${MEMPROF_COMMON_DEFINITIONS} - DEPS ${MEMPROF_DEPS}) - -file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "") -add_compiler_rt_object_libraries(RTMemprof_dynamic_version_script_dummy - ARCHS ${MEMPROF_SUPPORTED_ARCH} - SOURCES ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp - CFLAGS ${MEMPROF_DYNAMIC_CFLAGS} - DEFS ${MEMPROF_DYNAMIC_DEFINITIONS} - DEPS ${MEMPROF_DEPS}) +if(NOT APPLE) + add_compiler_rt_object_libraries(RTMemprof + ARCHS ${MEMPROF_SUPPORTED_ARCH} + SOURCES ${MEMPROF_SOURCES} + ADDITIONAL_HEADERS ${MEMPROF_HEADERS} + CFLAGS ${MEMPROF_CFLAGS} + DEFS ${MEMPROF_COMMON_DEFINITIONS} + DEPS ${MEMPROF_DEPS}) + add_compiler_rt_object_libraries(RTMemprof_cxx + ARCHS ${MEMPROF_SUPPORTED_ARCH} + SOURCES ${MEMPROF_CXX_SOURCES} + ADDITIONAL_HEADERS ${MEMPROF_HEADERS} + CFLAGS ${MEMPROF_CFLAGS} + DEFS ${MEMPROF_COMMON_DEFINITIONS} + DEPS ${MEMPROF_DEPS}) + add_compiler_rt_object_libraries(RTMemprof_preinit + ARCHS ${MEMPROF_SUPPORTED_ARCH} + SOURCES ${MEMPROF_PREINIT_SOURCES} + ADDITIONAL_HEADERS ${MEMPROF_HEADERS} + CFLAGS ${MEMPROF_CFLAGS} + DEFS ${MEMPROF_COMMON_DEFINITIONS} + DEPS ${MEMPROF_DEPS}) + + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "") + add_compiler_rt_object_libraries(RTMemprof_dynamic_version_script_dummy + ARCHS ${MEMPROF_SUPPORTED_ARCH} + SOURCES ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp + CFLAGS ${MEMPROF_DYNAMIC_CFLAGS} + DEFS ${MEMPROF_DYNAMIC_DEFINITIONS} + DEPS ${MEMPROF_DEPS}) +endif() # Build MemProf runtimes shipped with Clang. add_compiler_rt_component(memprof) -# Build separate libraries for each target. - -set(MEMPROF_COMMON_RUNTIME_OBJECT_LIBS - RTInterception - RTSanitizerCommon - RTSanitizerCommonLibc - RTSanitizerCommonCoverage - RTSanitizerCommonSymbolizer - # FIXME: hangs. - # RTSanitizerCommonSymbolizerInternal -) - -add_compiler_rt_runtime(clang_rt.memprof - STATIC - ARCHS ${MEMPROF_SUPPORTED_ARCH} - OBJECT_LIBS RTMemprof_preinit - RTMemprof - ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS} - CFLAGS ${MEMPROF_CFLAGS} - DEFS ${MEMPROF_COMMON_DEFINITIONS} - PARENT_TARGET memprof) - -add_compiler_rt_runtime(clang_rt.memprof_cxx - STATIC - ARCHS ${MEMPROF_SUPPORTED_ARCH} - OBJECT_LIBS RTMemprof_cxx - CFLAGS ${MEMPROF_CFLAGS} - DEFS ${MEMPROF_COMMON_DEFINITIONS} - PARENT_TARGET memprof) - -add_compiler_rt_runtime(clang_rt.memprof-preinit - STATIC - ARCHS ${MEMPROF_SUPPORTED_ARCH} - OBJECT_LIBS RTMemprof_preinit - CFLAGS ${MEMPROF_CFLAGS} - DEFS ${MEMPROF_COMMON_DEFINITIONS} - PARENT_TARGET memprof) - -foreach(arch ${MEMPROF_SUPPORTED_ARCH}) - if (UNIX) - add_sanitizer_rt_version_list(clang_rt.memprof-dynamic-${arch} - LIBS clang_rt.memprof-${arch} clang_rt.memprof_cxx-${arch} - EXTRA memprof.syms.extra) - set(VERSION_SCRIPT_FLAG - -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers) - set_property(SOURCE - ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp - APPEND PROPERTY - OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers) - else() - set(VERSION_SCRIPT_FLAG) - endif() - - set(MEMPROF_DYNAMIC_WEAK_INTERCEPTION) - +if(APPLE) + # following asan + add_weak_symbols("memprof" WEAK_SYMBOL_LINK_FLAGS) + add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS) add_compiler_rt_runtime(clang_rt.memprof SHARED - ARCHS ${arch} - OBJECT_LIBS ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS} - RTMemprof_dynamic - # The only purpose of RTMemprof_dynamic_version_script_dummy is to - # carry a dependency of the shared runtime on the version script. - # Replacing it with a straightforward - # add_dependencies(clang_rt.memprof-dynamic-${arch} clang_rt.memprof-dynamic-${arch}-version-list) - # generates an order-only dependency in ninja. - RTMemprof_dynamic_version_script_dummy - ${MEMPROF_DYNAMIC_WEAK_INTERCEPTION} + OS ${SANITIZER_COMMON_SUPPORTED_OS} + ARCHS ${MEMPROF_SUPPORTED_ARCH} + OBJECT_LIBS RTMemprof_dynamic + RTInterception + RTSanitizerCommon + RTSanitizerCommonLibc + RTSanitizerCommonCoverage + RTSanitizerCommonSymbolizer CFLAGS ${MEMPROF_DYNAMIC_CFLAGS} - LINK_FLAGS ${MEMPROF_DYNAMIC_LINK_FLAGS} - ${VERSION_SCRIPT_FLAG} - LINK_LIBS ${MEMPROF_DYNAMIC_LIBS} + LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS} DEFS ${MEMPROF_DYNAMIC_DEFINITIONS} PARENT_TARGET memprof) +else() + # Build separate libraries for each target. + + set(MEMPROF_COMMON_RUNTIME_OBJECT_LIBS + RTInterception + RTSanitizerCommon + RTSanitizerCommonLibc + RTSanitizerCommonCoverage + RTSanitizerCommonSymbolizer + # FIXME: hangs. + # RTSanitizerCommonSymbolizerInternal + ) - if (SANITIZER_USE_SYMBOLS) - add_sanitizer_rt_symbols(clang_rt.memprof_cxx - ARCHS ${arch}) - add_dependencies(memprof clang_rt.memprof_cxx-${arch}-symbols) - add_sanitizer_rt_symbols(clang_rt.memprof - ARCHS ${arch} - EXTRA memprof.syms.extra) - add_dependencies(memprof clang_rt.memprof-${arch}-symbols) - endif() -endforeach() + add_compiler_rt_runtime(clang_rt.memprof + STATIC + ARCHS ${MEMPROF_SUPPORTED_ARCH} + OBJECT_LIBS RTMemprof_preinit + RTMemprof + ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS} + CFLAGS ${MEMPROF_CFLAGS} + DEFS ${MEMPROF_COMMON_DEFINITIONS} + PARENT_TARGET memprof) + + add_compiler_rt_runtime(clang_rt.memprof_cxx + STATIC + ARCHS ${MEMPROF_SUPPORTED_ARCH} + OBJECT_LIBS RTMemprof_cxx + CFLAGS ${MEMPROF_CFLAGS} + DEFS ${MEMPROF_COMMON_DEFINITIONS} + PARENT_TARGET memprof) + add_compiler_rt_runtime(clang_rt.memprof-preinit + STATIC + ARCHS ${MEMPROF_SUPPORTED_ARCH} + OBJECT_LIBS RTMemprof_preinit + CFLAGS ${MEMPROF_CFLAGS} + DEFS ${MEMPROF_COMMON_DEFINITIONS} + PARENT_TARGET memprof) + + foreach(arch ${MEMPROF_SUPPORTED_ARCH}) + if (UNIX) + add_sanitizer_rt_version_list(clang_rt.memprof-dynamic-${arch} + LIBS clang_rt.memprof-${arch} clang_rt.memprof_cxx-${arch} + EXTRA memprof.syms.extra) + set(VERSION_SCRIPT_FLAG + -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers) + set_property(SOURCE + ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp + APPEND PROPERTY + OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers) + else() + set(VERSION_SCRIPT_FLAG) + endif() + + set(MEMPROF_DYNAMIC_WEAK_INTERCEPTION) + + add_compiler_rt_runtime(clang_rt.memprof + SHARED + ARCHS ${arch} + OBJECT_LIBS ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS} + RTMemprof_dynamic + # The only purpose of RTMemprof_dynamic_version_script_dummy is to + # carry a dependency of the shared runtime on the version script. + # Replacing it with a straightforward + # add_dependencies(clang_rt.memprof-dynamic-${arch} clang_rt.memprof-dynamic-${arch}-version-list) + # generates an order-only dependency in ninja. + RTMemprof_dynamic_version_script_dummy + ${MEMPROF_DYNAMIC_WEAK_INTERCEPTION} + CFLAGS ${MEMPROF_DYNAMIC_CFLAGS} + LINK_FLAGS ${MEMPROF_DYNAMIC_LINK_FLAGS} + ${VERSION_SCRIPT_FLAG} + LINK_LIBS ${MEMPROF_DYNAMIC_LIBS} + DEFS ${MEMPROF_DYNAMIC_DEFINITIONS} + PARENT_TARGET memprof) + + if (SANITIZER_USE_SYMBOLS) + add_sanitizer_rt_symbols(clang_rt.memprof_cxx + ARCHS ${arch}) + add_dependencies(memprof clang_rt.memprof_cxx-${arch}-symbols) + add_sanitizer_rt_symbols(clang_rt.memprof + ARCHS ${arch} + EXTRA memprof.syms.extra) + add_dependencies(memprof clang_rt.memprof-${arch}-symbols) + endif() + endforeach() +endif() if(COMPILER_RT_INCLUDE_TESTS) add_subdirectory(tests) diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp index 60f5c853f9d76..5b3e18e8a750c 100644 --- a/compiler-rt/lib/memprof/memprof_allocator.cpp +++ b/compiler-rt/lib/memprof/memprof_allocator.cpp @@ -90,7 +90,13 @@ static int GetCpuId(void) { // will seg fault as the address of __vdso_getcpu will be null. if (!memprof_inited) return -1; +#if SANITIZER_APPLE + // On Apple platforms, sched_getcpu is not available. + // TODO: better way to handle this? + return -100; +#else return sched_getcpu(); +#endif } // Compute the timestamp in ms. @@ -356,11 +362,15 @@ struct Allocator { InsertLiveBlocks(); if (flags()->print_text) { - if (!flags()->print_terse) - Printf("Recorded MIBs (incl. live on exit):\n"); - MIBMap.ForEach(PrintCallback, - reinterpret_cast<void *>(flags()->print_terse)); - StackDepotPrintAll(); + if (flags()->dump_binary_access_profile_only) + DumpBinaryAccesses(); + else { + if (!flags()->print_terse) + Printf("Recorded MIBs (incl. live on exit):\n"); + MIBMap.ForEach(PrintCallback, + reinterpret_cast<void *>(flags()->print_terse)); + StackDepotPrintAll(); + } } else { // Serialize the contents to a raw profile. Format documented in // memprof_rawprofile.h. @@ -369,7 +379,11 @@ struct Allocator { __sanitizer::ListOfModules List; List.init(); ArrayRef<LoadedModule> Modules(List.begin(), List.end()); - u64 BytesSerialized = SerializeToRawProfile(MIBMap, Modules, Buffer); + u64 BytesSerialized = 0; + if (flags()->dump_binary_access_profile_only) + BytesSerialized = SerializeBinaryAccesses(Modules, Buffer); + else + BytesSerialized = SerializeToRawProfile(MIBMap, Modules, Buffer); CHECK(Buffer && BytesSerialized && "could not serialize to buffer"); report_file.Write(Buffer, BytesSerialized); } @@ -766,6 +780,18 @@ uptr memprof_malloc_usable_size(const void *ptr) { return usable_size; } +uptr memprof_mz_size(const void *ptr) { + return instance.AllocationSize(reinterpret_cast<uptr>(ptr)); +} + +void memprof_mz_force_lock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + instance.ForceLock(); +} + +void memprof_mz_force_unlock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + instance.ForceUnlock(); +} + } // namespace __memprof // ---------------------- Interface ---------------- {{{1 diff --git a/compiler-rt/lib/memprof/memprof_allocator.h b/compiler-rt/lib/memprof/memprof_allocator.h index 6d898f06f7e42..11cf93fce7c08 100644 --- a/compiler-rt/lib/memprof/memprof_allocator.h +++ b/compiler-rt/lib/memprof/memprof_allocator.h @@ -20,13 +20,6 @@ #include "sanitizer_common/sanitizer_allocator.h" #include "sanitizer_common/sanitizer_list.h" -#if !defined(__x86_64__) -#error Unsupported platform -#endif -#if !SANITIZER_CAN_USE_ALLOCATOR64 -#error Only 64-bit allocator supported -#endif - namespace __memprof { enum AllocType { @@ -46,6 +39,7 @@ struct MemprofMapUnmapCallback { void OnUnmap(uptr p, uptr size) const; }; +#if SANITIZER_CAN_USE_ALLOCATOR64 constexpr uptr kAllocatorSpace = ~(uptr)0; constexpr uptr kAllocatorSize = 0x40000000000ULL; // 4T. typedef DefaultSizeClassMap SizeClassMap; @@ -64,6 +58,23 @@ template <typename AddressSpaceView> using PrimaryAllocatorASVT = SizeClassAllocator64<AP64<AddressSpaceView>>; using PrimaryAllocator = PrimaryAllocatorASVT<LocalAddressSpaceView>; +#else // Fallback to SizeClassAllocator32. +typedef CompactSizeClassMap SizeClassMap; +template <typename AddressSpaceViewTy> struct AP32 { + static const uptr kSpaceBeg = 0; + static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; + static const uptr kMetadataSize = 0; + typedef __memprof::SizeClassMap SizeClassMap; + static const uptr kRegionSizeLog = 20; + using AddressSpaceView = AddressSpaceViewTy; + typedef MemprofMapUnmapCallback MapUnmapCallback; + static const uptr kFlags = 0; +}; +template <typename AddressSpaceView> +using PrimaryAllocatorASVT = SizeClassAllocator32<AP32<AddressSpaceView>>; +using PrimaryAllocator = PrimaryAllocatorASVT<LocalAddressSpaceView>; +#endif + static const uptr kNumberOfSizeClasses = SizeClassMap::kNumClasses; template <typename AddressSpaceView> @@ -101,6 +112,10 @@ int memprof_posix_memalign(void **memptr, uptr alignment, uptr size, BufferedStackTrace *stack); uptr memprof_malloc_usable_size(const void *ptr); +uptr memprof_mz_size(const void *ptr); +void memprof_mz_force_lock(); +void memprof_mz_force_unlock(); + void PrintInternalAllocatorStats(); } // namespace __memprof diff --git a/compiler-rt/lib/memprof/memprof_flags.inc b/compiler-rt/lib/memprof/memprof_flags.inc index 1d8e77752caa5..d2c5ebe000d7a 100644 --- a/compiler-rt/lib/memprof/memprof_flags.inc +++ b/compiler-rt/lib/memprof/memprof_flags.inc @@ -42,3 +42,5 @@ MEMPROF_FLAG(bool, print_terse, false, "if print_text = true.") MEMPROF_FLAG(bool, dump_at_exit, true, "If set, dump profiles when the program terminates.") +MEMPROF_FLAG(bool, dump_binary_access_profile_only, false, + "Dump the binary access MemProf format only.") diff --git a/compiler-rt/lib/memprof/memprof_interceptors.cpp b/compiler-rt/lib/memprof/memprof_interceptors.cpp index f4d7fd46e6198..c87144900eace 100644 --- a/compiler-rt/lib/memprof/memprof_interceptors.cpp +++ b/compiler-rt/lib/memprof/memprof_interceptors.cpp @@ -18,7 +18,9 @@ #include "memprof_stack.h" #include "memprof_stats.h" #include "sanitizer_common/sanitizer_libc.h" +#if SANITIZER_POSIX #include "sanitizer_common/sanitizer_posix.h" +#endif namespace __memprof { diff --git a/compiler-rt/lib/memprof/memprof_interceptors.h b/compiler-rt/lib/memprof/memprof_interceptors.h index 53d685706b849..5793df2c0ed45 100644 --- a/compiler-rt/lib/memprof/memprof_interceptors.h +++ b/compiler-rt/lib/memprof/memprof_interceptors.h @@ -40,6 +40,7 @@ DECLARE_REAL(char *, strncpy, char *to, const char *from, SIZE_T size) DECLARE_REAL(SIZE_T, strnlen, const char *s, SIZE_T maxlen) DECLARE_REAL(char *, strstr, const char *s1, const char *s2) +#if !SANITIZER_APPLE #define MEMPROF_INTERCEPT_FUNC(name) \ do { \ if (!INTERCEPT_FUNCTION(name)) \ @@ -56,6 +57,10 @@ DECLARE_REAL(char *, strstr, const char *s1, const char *s2) VReport(1, "MemProfiler: failed to intercept '%s@@%s' or '%s'\n", #name, \ ver, #name); \ } while (0) +#else +// OS X interceptors don't need to be initialized with INTERCEPT_FUNCTION. +#define MEMPROF_INTERCEPT_FUNC(name) +#endif // SANITIZER_APPLE #define MEMPROF_INTERCEPTOR_ENTER(ctx, func) \ ctx = 0; \ diff --git a/compiler-rt/lib/memprof/memprof_linux.cpp b/compiler-rt/lib/memprof/memprof_linux.cpp index fbe5d250f840b..7d214b9f6dfc8 100644 --- a/compiler-rt/lib/memprof/memprof_linux.cpp +++ b/compiler-rt/lib/memprof/memprof_linux.cpp @@ -12,9 +12,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if !SANITIZER_LINUX -#error Unsupported OS -#endif +#if SANITIZER_LINUX #include "memprof_interceptors.h" #include "memprof_internal.h" @@ -65,3 +63,4 @@ uptr FindDynamicShadowStart() { void *MemprofDlSymNext(const char *sym) { return dlsym(RTLD_NEXT, sym); } } // namespace __memprof +#endif // SANITIZER_LINUX diff --git a/compiler-rt/lib/memprof/memprof_mac.cpp b/compiler-rt/lib/memprof/memprof_mac.cpp new file mode 100644 index 0000000000000..a931dd364b0bc --- /dev/null +++ b/compiler-rt/lib/memprof/memprof_mac.cpp @@ -0,0 +1,56 @@ +//===-- memprof_mac.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of MemProfiler, a memory profiler. +// +// Mac-specific details. +//===----------------------------------------------------------------------===// + +#include "sanitizer_common/sanitizer_platform.h" +#if SANITIZER_APPLE + +#include "memprof_interceptors.h" +#include "memprof_internal.h" +#include "memprof_thread.h" +#include "sanitizer_common/sanitizer_flags.h" +#include "sanitizer_common/sanitizer_freebsd.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "sanitizer_common/sanitizer_procmaps.h" + +#include <dlfcn.h> +#include <fcntl.h> +#include <limits.h> +#include <pthread.h> +#include <stdio.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/ucontext.h> +#include <unistd.h> +#include <unwind.h> + +namespace __memprof { + +void InitializePlatformInterceptors() {} +void InitializePlatformExceptionHandlers() {} + +// No-op. Mac does not support static linkage anyway. +void *MemprofDoesNotSupportStaticLinkage() { return 0; } + +uptr FindDynamicShadowStart() { + uptr shadow_size_bytes = MemToShadowSize(kHighMemEnd); + return MapDynamicShadow(shadow_size_bytes, SHADOW_SCALE, + /*min_shadow_base_alignment*/ 0, kHighMemEnd); +} + +void *MemprofDlSymNext(const char *sym) { return dlsym(RTLD_NEXT, sym); } + +} // namespace __memprof +#endif diff --git a/compiler-rt/lib/memprof/memprof_malloc_linux.cpp b/compiler-rt/lib/memprof/memprof_malloc_linux.cpp index 2a028c7d0b489..5e154f9fd75b8 100644 --- a/compiler-rt/lib/memprof/memprof_malloc_linux.cpp +++ b/compiler-rt/lib/memprof/memprof_malloc_linux.cpp @@ -14,9 +14,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if !SANITIZER_LINUX -#error Unsupported OS -#endif +#if SANITIZER_LINUX #include "memprof_allocator.h" #include "memprof_interceptors.h" @@ -145,3 +143,4 @@ INTERCEPTOR(void, malloc_stats, void) { __memprof_print_accumulated_stats(); } namespace __memprof { void ReplaceSystemMalloc() {} } // namespace __memprof +#endif // SANITIZER_LINUX diff --git a/compiler-rt/lib/memprof/memprof_malloc_mac.cpp b/compiler-rt/lib/memprof/memprof_malloc_mac.cpp new file mode 100644 index 0000000000000..cc57b0b7bb0fd --- /dev/null +++ b/compiler-rt/lib/memprof/memprof_malloc_mac.cpp @@ -0,0 +1,72 @@ +//===-- memprof_malloc_mac.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of MemProfiler, a memory profiler. +// +// Mac-specific malloc interception. +//===----------------------------------------------------------------------===// + +#include "sanitizer_common/sanitizer_platform.h" +#if SANITIZER_APPLE + +#include "memprof_allocator.h" +#include "memprof_interceptors.h" +#include "memprof_internal.h" +#include "memprof_stack.h" +#include "sanitizer_common/sanitizer_allocator_checks.h" +#include "sanitizer_common/sanitizer_allocator_dlsym.h" +#include "sanitizer_common/sanitizer_errno.h" +#include "sanitizer_common/sanitizer_tls_get_addr.h" + +// ---------------------- Replacement functions ---------------- {{{1 +using namespace __memprof; + +struct DlsymAlloc : public DlSymAllocator<DlsymAlloc> { + static bool UseImpl() { return memprof_init_is_running; } +}; + +#define COMMON_MALLOC_ZONE_NAME "memprof" +#define COMMON_MALLOC_ENTER() ENSURE_MEMPROF_INITED() +#define COMMON_MALLOC_SANITIZER_INITIALIZED memprof_inited +#define COMMON_MALLOC_FORCE_LOCK() memprof_mz_force_lock() +#define COMMON_MALLOC_FORCE_UNLOCK() memprof_mz_force_unlock() +#define COMMON_MALLOC_MEMALIGN(alignment, size) \ + GET_STACK_TRACE_MALLOC; \ + void *p = memprof_memalign(alignment, size, &stack, FROM_MALLOC) +#define COMMON_MALLOC_MALLOC(size) \ + GET_STACK_TRACE_MALLOC; \ + void *p = memprof_malloc(size, &stack) +#define COMMON_MALLOC_REALLOC(ptr, size) \ + GET_STACK_TRACE_MALLOC; \ + void *p = memprof_realloc(ptr, size, &stack); +#define COMMON_MALLOC_CALLOC(count, size) \ + GET_STACK_TRACE_MALLOC; \ + void *p = memprof_calloc(count, size, &stack); +#define COMMON_MALLOC_POSIX_MEMALIGN(memptr, alignment, size) \ + GET_STACK_TRACE_MALLOC; \ + int res = memprof_posix_memalign(memptr, alignment, size, &stack); +#define COMMON_MALLOC_VALLOC(size) \ + GET_STACK_TRACE_MALLOC; \ + void *p = memprof_memalign(GetPageSizeCached(), size, &stack, FROM_MALLOC); +#define COMMON_MALLOC_FREE(ptr) \ + GET_STACK_TRACE_FREE; \ + memprof_free(ptr, &stack, FROM_MALLOC); +#define COMMON_MALLOC_SIZE(ptr) uptr size = memprof_mz_size(ptr); +#define COMMON_MALLOC_FILL_STATS(zone, stats) +#define COMMON_MALLOC_REPORT_UNKNOWN_REALLOC(ptr, zone_ptr, zone_name) +#define COMMON_MALLOC_NAMESPACE __memprof +#define COMMON_MALLOC_HAS_ZONE_ENUMERATOR 0 +#define COMMON_MALLOC_HAS_EXTRA_INTROSPECTION_INIT 1 + +#include "sanitizer_common/sanitizer_malloc_mac.inc" + +namespace { + +void mi_extra_init(sanitizer_malloc_introspection_t *mi) {} +} // namespace +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/memprof/memprof_mapping.h b/compiler-rt/lib/memprof/memprof_mapping.h index 6da385ab3d6e2..024923a6b424c 100644 --- a/compiler-rt/lib/memprof/memprof_mapping.h +++ b/compiler-rt/lib/memprof/memprof_mapping.h @@ -29,8 +29,12 @@ extern uptr kHighMemEnd; // Initialized in __memprof_init. } // namespace __memprof // Size of memory block mapped to a single shadow location +#if SANITIZER_APPLE +#define MEM_GRANULARITY 8ULL +#define MEM_GRANULARITY_8_MAX_COUNTER 255U +#else #define MEM_GRANULARITY 64ULL - +#endif #define SHADOW_MASK ~(MEM_GRANULARITY - 1) #define MEM_TO_SHADOW(mem) \ @@ -129,7 +133,13 @@ inline bool AddrIsAlignedByGranularity(uptr a) { } inline void RecordAccess(uptr a) { - // If we use a different shadow size then the type below needs adjustment. +// If we use a different shadow size then the type below needs adjustment. +#if SANITIZER_APPLE + CHECK_EQ(SHADOW_ENTRY_SIZE, 1); + u8 *shadow_address = (u8 *)MEM_TO_SHADOW(a); + if (*shadow_address < MEM_GRANULARITY_8_MAX_COUNTER) + (*shadow_address)++; +#else CHECK_EQ(SHADOW_ENTRY_SIZE, 8); u64 *shadow_address = (u64 *)MEM_TO_SHADOW(a); (*shadow_address)++; diff --git a/compiler-rt/lib/memprof/memprof_new_delete.cpp b/compiler-rt/lib/memprof/memprof_new_delete.cpp index cae5de301367a..78b6d02ca33ed 100644 --- a/compiler-rt/lib/memprof/memprof_new_delete.cpp +++ b/compiler-rt/lib/memprof/memprof_new_delete.cpp @@ -43,6 +43,14 @@ enum class align_val_t : size_t {}; ReportOutOfMemory(size, &stack); \ return res; +// On OS X it's not enough to just provide our own 'operator new' and +// 'operator delete' implementations, because they're going to be in the +// runtime dylib, and the main executable will depend on both the runtime +// dylib and libstdc++, each of those'll have its implementation of new and +// delete. +// To make sure that C++ allocation/deallocation operators are overridden on +// OS X we need to intercept them using their mangled names. +#if !SANITIZER_APPLE CXX_OPERATOR_ATTRIBUTE void *operator new(size_t size) { OPERATOR_NEW_BODY(FROM_NEW, false /*nothrow*/); @@ -77,6 +85,20 @@ void *operator new[](size_t size, std::align_val_t align, std::nothrow_t const &) { OPERATOR_NEW_BODY_ALIGN(FROM_NEW_BR, true /*nothrow*/); } +#else // SANITIZER_APPLE +INTERCEPTOR(void *, _Znwm, size_t size) { + OPERATOR_NEW_BODY(FROM_NEW, false /*nothrow*/); +} +INTERCEPTOR(void *, _Znam, size_t size) { + OPERATOR_NEW_BODY(FROM_NEW_BR, false /*nothrow*/); +} +INTERCEPTOR(void *, _ZnwmRKSt9nothrow_t, size_t size, std::nothrow_t const &) { + OPERATOR_NEW_BODY(FROM_NEW, true /*nothrow*/); +} +INTERCEPTOR(void *, _ZnamRKSt9nothrow_t, size_t size, std::nothrow_t const &) { + OPERATOR_NEW_BODY(FROM_NEW_BR, true /*nothrow*/); +} +#endif // !SANITIZER_APPLE #define OPERATOR_DELETE_BODY(type) \ GET_STACK_TRACE_FREE; \ @@ -94,8 +116,9 @@ void *operator new[](size_t size, std::align_val_t align, GET_STACK_TRACE_FREE; \ memprof_delete(ptr, size, static_cast<uptr>(align), &stack, type); +#if !SANITIZER_APPLE CXX_OPERATOR_ATTRIBUTE -void operator delete(void *ptr)NOEXCEPT { OPERATOR_DELETE_BODY(FROM_NEW); } +void operator delete(void *ptr) NOEXCEPT { OPERATOR_DELETE_BODY(FROM_NEW); } CXX_OPERATOR_ATTRIBUTE void operator delete[](void *ptr) NOEXCEPT { OPERATOR_DELETE_BODY(FROM_NEW_BR); @@ -109,7 +132,7 @@ void operator delete[](void *ptr, std::nothrow_t const &) { OPERATOR_DELETE_BODY(FROM_NEW_BR); } CXX_OPERATOR_ATTRIBUTE -void operator delete(void *ptr, size_t size)NOEXCEPT { +void operator delete(void *ptr, size_t size) NOEXCEPT { OPERATOR_DELETE_BODY_SIZE(FROM_NEW); } CXX_OPERATOR_ATTRIBUTE @@ -117,7 +140,7 @@ void operator delete[](void *ptr, size_t size) NOEXCEPT { OPERATOR_DELETE_BODY_SIZE(FROM_NEW_BR); } CXX_OPERATOR_ATTRIBUTE -void operator delete(void *ptr, std::align_val_t align)NOEXCEPT { +void operator delete(void *ptr, std::align_val_t align) NOEXCEPT { OPERATOR_DELETE_BODY_ALIGN(FROM_NEW); } CXX_OPERATOR_ATTRIBUTE @@ -135,7 +158,7 @@ void operator delete[](void *ptr, std::align_val_t align, OPERATOR_DELETE_BODY_ALIGN(FROM_NEW_BR); } CXX_OPERATOR_ATTRIBUTE -void operator delete(void *ptr, size_t size, std::align_val_t align)NOEXCEPT { +void operator delete(void *ptr, size_t size, std::align_val_t align) NOEXCEPT { OPERATOR_DELETE_BODY_SIZE_ALIGN(FROM_NEW); } CXX_OPERATOR_ATTRIBUTE @@ -143,3 +166,13 @@ void operator delete[](void *ptr, size_t size, std::align_val_t align) NOEXCEPT { OPERATOR_DELETE_BODY_SIZE_ALIGN(FROM_NEW_BR); } +#else // SANITIZER_APPLE +INTERCEPTOR(void, _ZdlPv, void *ptr) { OPERATOR_DELETE_BODY(FROM_NEW); } +INTERCEPTOR(void, _ZdaPv, void *ptr) { OPERATOR_DELETE_BODY(FROM_NEW_BR); } +INTERCEPTOR(void, _ZdlPvRKSt9nothrow_t, void *ptr, std::nothrow_t const &) { + OPERATOR_DELETE_BODY(FROM_NEW); +} +INTERCEPTOR(void, _ZdaPvRKSt9nothrow_t, void *ptr, std::nothrow_t const &) { + OPERATOR_DELETE_BODY(FROM_NEW_BR); +} +#endif // !SANITIZER_APPLE diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp b/compiler-rt/lib/memprof/memprof_rawprofile.cpp index a897648584828..758eab8b403c6 100644 --- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp +++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp @@ -1,4 +1,5 @@ #include <stdint.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> @@ -286,4 +287,137 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules, return TotalSizeBytes; } +/* + The format of the binary access profile: + // header + BinaryAccessHeader header; + // segment info + SegmentEntry entry1; + SegmentEntry entry2; + ... + // memblock addresses + u64 MemBlockAddress1; + u64 MemBlockAddress2; + ... + // end + +BinaryAccessHeader is defined in MemProfBinaryAccessData.inc +PACKED(struct BinaryAccessHeader { + uint64_t Magic; + uint64_t Version; + uint64_t TotalSize; + uint64_t SegmentOffset; + uint64_t NumSegments; + uint64_t MemAddressOffset; + uint64_t NumMemBlockAddresses; +}); +SegmentEntry is defined in MemProfData.inc + struct SegmentEntry { + uint64_t Start; // segment start address + uint64_t End; // segment end address + uint64_t Offset; // binary offset at runtime + uint64_t BuildIdSize; + uint8_t BuildId[MEMPROF_BUILDID_MAX_SIZE] = {0}; +#define MEMPROF_BUILDID_MAX_SIZE 32ULL +*/ + +using BinaryAccessHeader = ::llvm::memprof::BinaryAccessHeader; + +u64 SerializeBinaryAccesses(ArrayRef<LoadedModule> Modules, + char *&BufferStart) { + // Serialize the contents to a raw profile. + Vector<SegmentEntry> Entries; + Vector<u64> MemBlockAddresses; + for (const auto &Module : Modules) { + // TODO: is there a better way to filter the binaries we care? + if (strstr(Module.full_name(), ".app")) + for (const auto &Segment : Module.ranges()) { + // collect segment info + SegmentEntry Entry(Segment.beg, Segment.end, Module.base_address()); + Entry.BuildIdSize = Module.uuid_size(); + memcpy(Entry.BuildId, Module.uuid(), Module.uuid_size()); + Entries.PushBack(Entry); + // collect memblock addresses whose access > 0 for each segment + for (uptr t = Entry.Start & SHADOW_MASK; t < Entry.End; + t += MEM_GRANULARITY) + if (GetShadowCount(t, MEM_GRANULARITY - 4) > 0) + MemBlockAddresses.PushBack(t); + } + } + // Allocate the memory for the entire buffer incl binaries segment info + // and memblock addresses + u64 NumSegmentsToRecord = Entries.Size(); + u64 NumMemBlockAddressesToRecord = MemBlockAddresses.Size(); + u64 NumSegmentsToRecordBytes = + RoundUpTo(sizeof(SegmentEntry) * NumSegmentsToRecord, 8); + u64 NumMemBlockAddressesToRecordBytes = + RoundUpTo(NumMemBlockAddressesToRecord * sizeof(u64), 8); + u64 NumHeaderBytes = RoundUpTo(sizeof(BinaryAccessHeader), 8); + u64 TotalBytes = NumHeaderBytes + NumSegmentsToRecordBytes + + NumMemBlockAddressesToRecordBytes; + BufferStart = (char *)InternalAlloc(TotalBytes); + char *Buffer = BufferStart; + + BinaryAccessHeader header{MEMPROF_BINARY_ACCESS_RAW_MAGIC_64, + MEMPROF_BINARY_ACCESS_RAW_VERSION, + TotalBytes, + NumHeaderBytes, + NumSegmentsToRecord, + NumHeaderBytes + NumSegmentsToRecordBytes, + NumMemBlockAddressesToRecord}; + memcpy(Buffer, &header, sizeof(BinaryAccessHeader)); + Buffer += NumHeaderBytes; + for (unsigned k = 0; k < NumSegmentsToRecord; k++) { + memcpy(Buffer, &Entries[k], sizeof(SegmentEntry)); + Buffer += sizeof(SegmentEntry); + } + for (unsigned k = 0; k < NumMemBlockAddressesToRecord; k++) { + *(uptr *)Buffer = MemBlockAddresses[k]; + Buffer += sizeof(uptr); + } + u64 BytesSerialized = Buffer - BufferStart; + + fprintf( + stderr, + "[MemProf] NumSegmentsToRecord: %d, NumSegmentsToRecordBytes: %d, " + "NumMemBlockAddressesToRecord: %d, NumMemBlockAddressesToRecordBytes: " + "%d, BytesSerialized: %d, Buffer: %p\n", + NumSegmentsToRecord, NumSegmentsToRecordBytes, + NumMemBlockAddressesToRecord, NumMemBlockAddressesToRecordBytes, + BytesSerialized, Buffer); + + return BytesSerialized; +} + +void DumpBinaryAccesses() { + __sanitizer::ListOfModules List; + List.init(); + ArrayRef<LoadedModule> Modules(List.begin(), List.end()); + for (const auto &Module : Modules) { + // TODO: is there a better way to filter the binaries we care? + if (strstr(Module.full_name(), ".app")) { + for (const auto &Segment : Module.ranges()) { + SegmentEntry Entry(Segment.beg, Segment.end, Module.base_address()); + Printf("\n[MemProf] BuildId: "); + for (size_t I = 0; I < Module.uuid_size(); I++) + Printf("%02x", Module.uuid()[I]); + Printf("\n"); + Printf("[MemProf] ExecutableName: %s\n", Module.full_name()); + Printf("[MemProf] Start: 0x%zx\n", Entry.Start); + + InternalScopedString shadows; + for (auto t = Entry.Start & SHADOW_MASK; t < Entry.End; + t += MEM_GRANULARITY) { + // should not be 64, as it will include the next shadow memory + u64 c = GetShadowCount(t, MEM_GRANULARITY - 4); + if (c > 0) + shadows.AppendF("[MemProf] Shadow: 0x%zx %d\n", t, c); + } + Printf("%s", shadows.data()); + Printf("[MemProf] End: 0x%zx\n", Entry.End); + Printf("[MemProf] Offset: 0x%zx\n", Entry.Offset); + } + } + } +} } // namespace __memprof diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.h b/compiler-rt/lib/memprof/memprof_rawprofile.h index e2494175f165e..1dbbab85f7da8 100644 --- a/compiler-rt/lib/memprof/memprof_rawprofile.h +++ b/compiler-rt/lib/memprof/memprof_rawprofile.h @@ -10,6 +10,12 @@ namespace __memprof { // binary format. The format itself is documented memprof_rawprofile.cpp. u64 SerializeToRawProfile(MIBMapTy &BlockCache, ArrayRef<LoadedModule> Modules, char *&Buffer); +// Serialize the in-memory representation of the memprof profile for the binary +// accesss to the raw binary format. The format itself is documented +// memprof_rawprofile.cpp. +u64 SerializeBinaryAccesses(ArrayRef<LoadedModule> Modules, char *&Buffer); +// Print out the binary accesss profile. +void DumpBinaryAccesses(); } // namespace __memprof #endif // MEMPROF_RAWPROFILE_H_ diff --git a/compiler-rt/test/memprof/CMakeLists.txt b/compiler-rt/test/memprof/CMakeLists.txt index 4c50ae6b83719..566d6da9f6bf0 100644 --- a/compiler-rt/test/memprof/CMakeLists.txt +++ b/compiler-rt/test/memprof/CMakeLists.txt @@ -4,7 +4,7 @@ set(MEMPROF_TESTSUITES) set(MEMPROF_DYNAMIC_TESTSUITES) macro(get_bits_for_arch arch bits) - if (${arch} MATCHES "x86_64") + if (${arch} MATCHES "x86_64|arm64") set(${bits} 64) else() message(FATAL_ERROR "Unexpected target architecture: ${arch}") @@ -18,13 +18,22 @@ endif() set(MEMPROF_DYNAMIC_TEST_DEPS ${MEMPROF_TEST_DEPS}) set(MEMPROF_TEST_ARCH ${MEMPROF_SUPPORTED_ARCH}) +if(APPLE) + darwin_filter_host_archs(MEMPROF_SUPPORTED_ARCH MEMPROF_TEST_ARCH) +endif() foreach(arch ${MEMPROF_TEST_ARCH}) set(MEMPROF_TEST_TARGET_ARCH ${arch}) + set(MEMPROF_TEST_APPLE_PLATFORM "osx") + set(MEMPROF_TEST_MIN_DEPLOYMENT_TARGET_FLAG "${DARWIN_osx_MIN_VER_FLAG}") string(TOLOWER "-${arch}-${OS_NAME}" MEMPROF_TEST_CONFIG_SUFFIX) get_bits_for_arch(${arch} MEMPROF_TEST_BITS) get_test_cc_for_arch(${arch} MEMPROF_TEST_TARGET_CC MEMPROF_TEST_TARGET_CFLAGS) - set(MEMPROF_TEST_DYNAMIC False) + if(APPLE) + set(MEMPROF_TEST_DYNAMIC True) + else() + set(MEMPROF_TEST_DYNAMIC False) + endif() string(TOUPPER ${arch} ARCH_UPPER_CASE) set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config) configure_lit_site_cfg( @@ -33,14 +42,16 @@ foreach(arch ${MEMPROF_TEST_ARCH}) ) list(APPEND MEMPROF_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) - string(TOLOWER "-${arch}-${OS_NAME}-dynamic" MEMPROF_TEST_CONFIG_SUFFIX) - set(MEMPROF_TEST_DYNAMIC True) - set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig) - configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py) - list(APPEND MEMPROF_DYNAMIC_TESTSUITES - ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) + if (COMPILER_RT_MEMPROF_HAS_STATIC_RUNTIME) + string(TOLOWER "-${arch}-${OS_NAME}-dynamic" MEMPROF_TEST_CONFIG_SUFFIX) + set(MEMPROF_TEST_DYNAMIC True) + set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig) + configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py) + list(APPEND MEMPROF_DYNAMIC_TESTSUITES + ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) + endif() endforeach() # Add unit tests. @@ -54,12 +65,60 @@ foreach(arch ${MEMPROF_TEST_ARCH}) list(APPEND MEMPROF_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit/${CONFIG_NAME}) endforeach() +# iOS and iOS simulator test suites +# These are not added into "check-all", in order to run these tests, use +# "check-memprof-iossim-x86_64" and similar. They also require that an extra env +# variable to select which iOS device or simulator to use, e.g.: +# SANITIZER_IOSSIM_TEST_DEVICE_IDENTIFIER="iPhone 6" +if(APPLE) + set(MEMPROF_TEST_TARGET_CC ${COMPILER_RT_TEST_COMPILER}) + set(MEMPROF_TEST_DYNAMIC True) + set(MEMPROF_APPLE_PLATFORMS ${SANITIZER_COMMON_SUPPORTED_OS}) + + foreach(platform ${MEMPROF_APPLE_PLATFORMS}) + if ("${platform}" STREQUAL "osx") + # Skip macOS because it's handled by the code above that builds tests for the host machine. + continue() + endif() + list_intersect( + MEMPROF_TEST_${platform}_ARCHS + MEMPROF_SUPPORTED_ARCH + DARWIN_${platform}_ARCHS + ) + foreach(arch ${MEMPROF_TEST_${platform}_ARCHS}) + get_test_cflags_for_apple_platform( + "${platform}" + "${arch}" + MEMPROF_TEST_TARGET_CFLAGS + ) + string(TOUPPER "${arch}" ARCH_UPPER_CASE) + get_capitalized_apple_platform("${platform}" PLATFORM_CAPITALIZED) + set(CONFIG_NAME "${PLATFORM_CAPITALIZED}${ARCH_UPPER_CASE}Config") + set(MEMPROF_TEST_CONFIG_SUFFIX "-${arch}-${platform}") + set(MEMPROF_TEST_APPLE_PLATFORM "${platform}") + set(MEMPROF_TEST_TARGET_ARCH "${arch}") + set(MEMPROF_TEST_MIN_DEPLOYMENT_TARGET_FLAG "${DARWIN_${platform}_MIN_VER_FLAG}") + get_bits_for_arch(${arch} MEMPROF_TEST_BITS) + configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py + ) + add_lit_testsuite(check-memprof-${platform}-${arch} "AddressSanitizer ${platform} ${arch} tests" + ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/ + EXCLUDE_FROM_CHECK_ALL + DEPENDS ${MEMPROF_TEST_DEPS}) + endforeach() + endforeach() +endif() + add_lit_testsuite(check-memprof "Running the MemProfiler tests" ${MEMPROF_TESTSUITES} DEPENDS ${MEMPROF_TEST_DEPS}) -add_lit_testsuite(check-memprof-dynamic - "Running the MemProfiler tests with dynamic runtime" - ${MEMPROF_DYNAMIC_TESTSUITES} - ${exclude_from_check_all.g} - DEPENDS ${MEMPROF_DYNAMIC_TEST_DEPS}) +if (COMPILER_RT_MEMPROF_HAS_STATIC_RUNTIME) + add_lit_testsuite(check-memprof-dynamic + "Running the MemProfiler tests with dynamic runtime" + ${MEMPROF_DYNAMIC_TESTSUITES} + ${exclude_from_check_all.g} + DEPENDS ${MEMPROF_DYNAMIC_TEST_DEPS}) +endif() diff --git a/compiler-rt/test/memprof/lit.cfg.py b/compiler-rt/test/memprof/lit.cfg.py index 4057da0c65b51..0bedb669a8857 100644 --- a/compiler-rt/test/memprof/lit.cfg.py +++ b/compiler-rt/test/memprof/lit.cfg.py @@ -106,7 +106,7 @@ def build_invocation(compile_flags): config.substitutions.append(("%pie", "-pie")) # Only run the tests on supported OSs. -if config.host_os not in ["Linux"]: +if config.host_os not in ["Linux", "Darwin"]: config.unsupported = True if not config.parallelism_group: diff --git a/llvm/include/llvm/ProfileData/MemProfBinaryAccessData.inc.cpp b/llvm/include/llvm/ProfileData/MemProfBinaryAccessData.inc.cpp new file mode 100644 index 0000000000000..e1aac9e4a57d3 --- /dev/null +++ b/llvm/include/llvm/ProfileData/MemProfBinaryAccessData.inc.cpp @@ -0,0 +1,47 @@ +*= == --MemProfBinaryAccessData.inc - + MemProf profiling runtime macros - *-C++ - * -= == == == + = * | * | *Part of the LLVM Project, + under the Apache License v2 .0 with LLVM Exceptions.| + *See https : // llvm.org/LICENSE.txt for license information. + | *SPDX - License - + Identifier + : Apache - + 2.0 WITH LLVM - + exception | * +\*= + == + -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- == + = * / +/* + * This file defines the macros for memprof profiling data structures + * for binary access. + * + * This file has two identical copies. The primary copy lives in LLVM and + * the other one sits in compiler-rt/include/profile directory. To make changes + * in this file, first modify the primary copy and copy it over to compiler-rt. + * Testing of any change in this file can start only after the two copies are + * synced up. + * +\*===----------------------------------------------------------------------===*/ + +// A 64-bit magic number to uniquely identify the raw binary memprof binary +// access profile file. +#define MEMPROF_BINARY_ACCESS_RAW_MAGIC_64 \ + ((uint64_t)255 << 56 | (uint64_t)'f' << 48 | (uint64_t)'b' << 40 | \ + (uint64_t)'m' << 32 | (uint64_t)'b' << 24 | (uint64_t)'i' << 16 | \ + (uint64_t)'n' << 8 | (uint64_t)129) + +// The version number of the raw binary format. +#define MEMPROF_BINARY_ACCESS_RAW_VERSION 1ULL + + // A struct describing the header used for the raw binary memprof profile + // format. + PACKED(struct BinaryAccessHeader { + uint64_t Magic; + uint64_t Version; + uint64_t TotalSize; + uint64_t SegmentOffset; + uint64_t NumSegments; + uint64_t MemAddressOffset; + uint64_t NumMemBlockAddresses; + }); diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc index 3f785bd23fce3..f30e95bc51111 100644 --- a/llvm/include/llvm/ProfileData/MemProfData.inc +++ b/llvm/include/llvm/ProfileData/MemProfData.inc @@ -45,6 +45,8 @@ namespace llvm { namespace memprof { +#include "MemProfBinaryAccessData.inc" + // A struct describing the header used for the raw binary memprof profile format. PACKED(struct Header { uint64_t Magic; diff --git a/llvm/include/module.modulemap b/llvm/include/module.modulemap index b378023453241..e2aa58e94211c 100644 --- a/llvm/include/module.modulemap +++ b/llvm/include/module.modulemap @@ -315,6 +315,7 @@ module LLVM_ProfileData { textual header "llvm/ProfileData/InstrProfData.inc" textual header "llvm/ProfileData/MemProfData.inc" + textual header "llvm/ProfileData/MemProfBinaryAccessData.inc" textual header "llvm/ProfileData/MIBEntryDef.inc" } diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 885e06df6c390..deda32247ca44 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -28,6 +29,7 @@ #include "llvm/Support/BalancedPartitioning.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Discriminator.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" @@ -73,6 +75,9 @@ cl::SubCommand MergeSubcommand( "Takes several profiles and merge them together. See detailed " "documentation in " "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge"); +cl::SubCommand + MemprofBinarySubcommand("memprof-binary", + "LLVM MemProf binary access profile data"); namespace { enum ProfileKinds { instr, sample, memory }; @@ -91,12 +96,11 @@ enum class ShowFormat { Text, Json, Yaml }; } // namespace // Common options. -cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), - cl::init("-"), cl::desc("Output file"), - cl::sub(ShowSubcommand), - cl::sub(OrderSubcommand), - cl::sub(OverlapSubcommand), - cl::sub(MergeSubcommand)); +cl::opt<std::string> + OutputFilename("output", cl::value_desc("output"), cl::init("-"), + cl::desc("Output file"), cl::sub(ShowSubcommand), + cl::sub(OrderSubcommand), cl::sub(OverlapSubcommand), + cl::sub(MemprofBinarySubcommand), cl::sub(MergeSubcommand)); // NOTE: cl::alias must not have cl::sub(), since aliased option's cl::sub() // will be used. llvm::cl::alias::done() method asserts this condition. static cl::alias OutputFilenameA("o", cl::desc("Alias for --output"), @@ -635,7 +639,7 @@ class SymbolRemapper { return New.empty() ? Name : FunctionId(New); } }; -} +} // namespace struct WeightedFile { std::string Filename; @@ -885,13 +889,11 @@ getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type &Val) { return Val.first(); } -static std::string -getFuncName(const SampleProfileMap::value_type &Val) { +static std::string getFuncName(const SampleProfileMap::value_type &Val) { return Val.second.getContext().toString(); } -template <typename T> -static void filterFunctions(T &ProfileMap) { +template <typename T> static void filterFunctions(T &ProfileMap) { bool hasFilter = !FuncNameFilter.empty(); bool hasNegativeFilter = !FuncNameNegativeFilter.empty(); if (!hasFilter && !hasNegativeFilter) @@ -1492,8 +1494,8 @@ remapSamples(const sampleprof::FunctionSamples &Samples, BodySample.second.getSamples()); for (const auto &Target : BodySample.second.getCallTargets()) { Result.addCalledTargetSamples(BodySample.first.LineOffset, - MaskedDiscriminator, - Remapper(Target.first), Target.second); + MaskedDiscriminator, Remapper(Target.first), + Target.second); } } for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) { @@ -1510,12 +1512,8 @@ remapSamples(const sampleprof::FunctionSamples &Samples, } static sampleprof::SampleProfileFormat FormatMap[] = { - sampleprof::SPF_None, - sampleprof::SPF_Text, - sampleprof::SPF_None, - sampleprof::SPF_Ext_Binary, - sampleprof::SPF_GCC, - sampleprof::SPF_Binary}; + sampleprof::SPF_None, sampleprof::SPF_Text, sampleprof::SPF_None, + sampleprof::SPF_Ext_Binary, sampleprof::SPF_GCC, sampleprof::SPF_Binary}; static std::unique_ptr<MemoryBuffer> getInputFileBuf(const StringRef &InputFile) { @@ -3372,7 +3370,8 @@ static int show_main(StringRef ProgName) { exitWithErrorCode(EC, OutputFilename); if (ShowAllFunctions && !FuncNameFilter.empty()) - WithColor::warning() << "-function argument ignored: showing all functions\n"; + WithColor::warning() + << "-function argument ignored: showing all functions\n"; if (!DebugInfoFilename.empty()) return showDebugInfoCorrelation(DebugInfoFilename, SFormat, OS); @@ -3449,6 +3448,251 @@ static int order_main() { return 0; } +using SegmentEntry = ::llvm::memprof::SegmentEntry; +using BinaryAccessHeader = ::llvm::memprof::BinaryAccessHeader; + +/* + The format of the binary access profile: + // header + BinaryAccessHeader header; + // segment info + SegmentEntry entry1; + SegmentEntry entry2; + ... + // memblock addresses + u64 MemBlockAddress1; + u64 MemBlockAddress2; + ... + // end + +BinaryAccessHeader is defined in MemProfBinaryAccessData.inc +PACKED(struct BinaryAccessHeader { + uint64_t Magic; + uint64_t Version; + uint64_t TotalSize; + uint64_t SegmentOffset; + uint64_t NumSegments; + uint64_t MemAddressOffset; + uint64_t NumMemBlockAddresses; +}); +SegmentEntry is defined in MemProfData.inc + struct SegmentEntry { + uint64_t Start; // segment start address + uint64_t End; // segment end address + uint64_t Offset; // binary offset at runtime + uint64_t BuildIdSize; + uint8_t BuildId[MEMPROF_BUILDID_MAX_SIZE] = {0}; +#define MEMPROF_BUILDID_MAX_SIZE 32ULL +*/ + +static bool ShouldSwapBytes = false; +template <typename value_type> +inline value_type maybe_byte_swap(value_type value) { + if (ShouldSwapBytes) + llvm::sys::swapByteOrder(value); + return value; +} + +std::string getBuildIdString(const SegmentEntry &Entry) { + // If the build id is unset print a helpful string instead of all zeros. + if (Entry.BuildIdSize == 0) + return "<None>"; + + std::string Str; + raw_string_ostream OS(Str); + for (size_t I = 0; I < Entry.BuildIdSize; I++) { + OS << format_hex_no_prefix(Entry.BuildId[I], 2); + } + return OS.str(); +} + +// This function categorizes memory block addresses according to their +// corresponding binaries. Note that both segments and memory addresses are +// contiguous and synchronized within each binary in the buffers, although the +// arrangement of entries across different binaries may vary. Within each +// binary, entries are pre-sorted in ascending order. The function identifies +// and tracks the boundaries in the buffer for each binary using its UUID. +// Subsequently, it outputs these sorted memory addresses as binary offsets into +// separate files, each file being named after the binary's UUID. +static int memprof_binary_show_covered_per_binary( + const SegmentEntry *SegmentPtr, uint64_t NumSegments, + const uint64_t *MemAddressPtr, uint64_t NumMemBlockAddresses, + const std::string &OutputDirectory) { + if (!llvm::sys::fs::is_directory(OutputDirectory)) + if (auto EC = llvm::sys::fs::create_directories(OutputDirectory)) + exitWithErrorCode(EC, "Failed to create the output directory: " + + OutputDirectory); + + struct BinaryInfo { + const std::string UUID; // Unique identifier for the binary + uint64_t Base; // Runtime base address of the binary + uint64_t End; // Runtime end address of the last segment in the binary + uint64_t FirstAddrIdx = 0; // Index of the first address in MemAddressPtr[] + uint64_t LastAddrIdx = 0; // Index of the last address in MemAddressPtr[] + BinaryInfo(const std::string &UUID, uint64_t Base, uint64_t End) + : UUID(UUID), Base(Base), End(End) {} + }; + + // Segment entries are contiguous by binary (identified by UUID) and sorted by + // their start address in ascending order. This loop updates UUID and the + // base/end addresses for each binary. + SmallVector<BinaryInfo> Binaries; + std::string LastUUID; + for (uint64_t I = 0; I < NumSegments; ++I) { + const SegmentEntry &Entry = SegmentPtr[I]; + std::string CurrUUID = getBuildIdString(Entry); + if (CurrUUID == LastUUID) { + assert(maybe_byte_swap(Entry.Offset) == Binaries.back().Base && + "Segment entries within the same binary should have the same " + "base address"); + assert(maybe_byte_swap(Entry.Start) >= Binaries.back().End && + "Segment entries within the same binary should not overlap and " + "must be sorted in ascending order."); + // Update the end address of the current binary. + Binaries.back().End = maybe_byte_swap(Entry.End); + } else { + // Create and add a new BinaryInfo instance. + Binaries.emplace_back(CurrUUID, maybe_byte_swap(Entry.Offset), + maybe_byte_swap(Entry.End)); + LastUUID = CurrUUID; + } + } + + // Get the first binary. + uint64_t BinIdx = 0; + BinaryInfo *CurreBinaryInfo = &Binaries[BinIdx]; + // The first/last address indices have been already zero-initialized. + assert(CurreBinaryInfo->FirstAddrIdx == 0); + assert(CurreBinaryInfo->LastAddrIdx == 0); + + // Memory block addresses are also contiguous by binary and sorted in + // ascending order within each binary. This loop updates the first and last + // address indices for each binary. + for (uint64_t AddrIdx = 0; AddrIdx < NumMemBlockAddresses; ++AddrIdx) { + uint64_t Addr = maybe_byte_swap(MemAddressPtr[AddrIdx]); + + if (Addr >= CurreBinaryInfo->Base && Addr < CurreBinaryInfo->End) { + // Update the last address index for the current binary. + CurreBinaryInfo->LastAddrIdx = AddrIdx; + } else { + // If the address is out of range, it must belong to the next binary. + ++BinIdx; + assert(BinIdx < Binaries.size() && "Invalid binary index"); + CurreBinaryInfo = &Binaries[BinIdx]; + assert(Addr >= CurreBinaryInfo->Base && Addr < CurreBinaryInfo->End && + "The address does not belong to the current binary."); + // Initialize the first and last address indices for the new current + // binary. + CurreBinaryInfo->FirstAddrIdx = AddrIdx; + CurreBinaryInfo->LastAddrIdx = AddrIdx; + } + } + + for (auto &Binary : Binaries) { + std::error_code EC; + std::string OutputFilenameWithUUID = OutputDirectory + "/" + Binary.UUID; + raw_fd_ostream OS(OutputFilenameWithUUID.data(), EC, + sys::fs::OF_TextWithCRLF); + if (EC) + exitWithErrorCode(EC, OutputFilenameWithUUID); + + for (uint64_t I = Binary.FirstAddrIdx; I <= Binary.LastAddrIdx; ++I) { + uint64_t Addr = maybe_byte_swap(MemAddressPtr[I]); + uint64_t BinaryOffset = Addr - Binary.Base; + OS << "0x" << llvm::utohexstr(BinaryOffset) << "\n"; + } + } + return 0; +} + +cl::opt<std::string> MemprofBinaryAccessFilename( + cl::Positional, cl::desc("<memprof-binary-access-profile-file>"), + cl::sub(MemprofBinarySubcommand)); +cl::opt<bool> ShowCoveredPerBinary( + "show-covered", cl::init(false), + cl::desc("Show runtime memory access addresses without runtime offset " + "per binary."), + cl::sub(MemprofBinarySubcommand)); +cl::opt<std::string> OutputDirectory( + "output-dir", cl::value_desc("output-dir"), cl::init("out"), + cl::desc("Output Directory for per binary memory access profiles"), + cl::sub(MemprofBinarySubcommand)); + +static int memprof_binary_access_main() { + auto Buffer = getInputFileBuf(MemprofBinaryAccessFilename); + const char *Ptr = Buffer->getBufferStart(); + + // 1) check if the header is valid + if (Buffer->getBufferSize() < sizeof(BinaryAccessHeader)) + exitWithError("[MemProf] The binary access profile header is corrupted."); + BinaryAccessHeader Header = + *reinterpret_cast<const BinaryAccessHeader *>(Ptr); + uint64_t SegmentOffset = maybe_byte_swap(Header.SegmentOffset); + uint64_t NumSegments = maybe_byte_swap(Header.NumSegments); + uint64_t MemAddressOffset = maybe_byte_swap(Header.MemAddressOffset); + uint64_t NumMemBlockAddresses = maybe_byte_swap(Header.NumMemBlockAddresses); + Ptr += sizeof(BinaryAccessHeader); + + // check if the host and target endianness match + ShouldSwapBytes = (Header.Magic != MEMPROF_BINARY_ACCESS_RAW_MAGIC_64); + if (Header.Magic != maybe_byte_swap(MEMPROF_BINARY_ACCESS_RAW_MAGIC_64)) + exitWithError("[MemProf] The binary access profile Magic is invalid."); + + // 2) check if segment entries are valid and print segment entries + if (Buffer->getBufferEnd() < Buffer->getBufferStart() + SegmentOffset + + NumSegments * sizeof(SegmentEntry)) { + exitWithError("[MemProf] The binary access profile segment is corrupted."); + } + + // 3) check if memblock addresses are valid and print memblock addresses + if (Buffer->getBufferEnd() < Buffer->getBufferStart() + MemAddressOffset + + sizeof(uint64_t) * NumMemBlockAddresses) + exitWithError("[MemProf] The binary access profile memblock is corrupted."); + + if (ShowCoveredPerBinary) { + return memprof_binary_show_covered_per_binary( + reinterpret_cast<const SegmentEntry *>(Buffer->getBufferStart() + + SegmentOffset), + NumSegments, + reinterpret_cast<const uint64_t *>(Buffer->getBufferStart() + + MemAddressOffset), + NumMemBlockAddresses, OutputDirectory); + } + + std::error_code EC; + raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF); + if (EC) + exitWithErrorCode(EC, OutputFilename); + + OS << "Header:\n"; + OS << "Version: " << maybe_byte_swap(Header.Version) << "\n"; + OS << "TotalSize: " << maybe_byte_swap(Header.TotalSize) << "\n"; + OS << "NumSegments: " << NumSegments << "\n"; + OS << "NumMemBlockAddresses: " << NumMemBlockAddresses << "\n"; + + Ptr = + reinterpret_cast<const char *>(Buffer->getBufferStart() + SegmentOffset); + while (NumSegments--) { + SegmentEntry Entry = *reinterpret_cast<const SegmentEntry *>(Ptr); + Ptr += sizeof(SegmentEntry); + OS << "BuildId: " << getBuildIdString(Entry) << "\n"; + OS << "Start: 0x" << llvm::utohexstr(maybe_byte_swap(Entry.Start)) << "\n"; + OS << "End: 0x" << llvm::utohexstr(maybe_byte_swap(Entry.End)) << "\n"; + OS << "Offset: 0x" << llvm::utohexstr(maybe_byte_swap(Entry.Offset)) + << "\n"; + } + + OS << "MemBlockAddresses:\n"; + Ptr = reinterpret_cast<const char *>(Buffer->getBufferStart() + + MemAddressOffset); + while (NumMemBlockAddresses--) { + uint64_t Addr = maybe_byte_swap(*reinterpret_cast<const uint64_t *>(Ptr)); + Ptr += sizeof(uint64_t); + OS << "0x" << llvm::utohexstr(Addr) << "\n"; + } + return 0; +} + int llvm_profdata_main(int argc, char **argvNonConst, const llvm::ToolContext &) { const char **argv = const_cast<const char **>(argvNonConst); @@ -3476,6 +3720,8 @@ int llvm_profdata_main(int argc, char **argvNonConst, if (MergeSubcommand) return merge_main(ProgName); + if (MemprofBinarySubcommand) + return memprof_binary_access_main(); errs() << ProgName << ": Unknown command. Run llvm-profdata --help for usage.\n"; return 1; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits