================ @@ -0,0 +1,222 @@ +//===--- ProfileFuncRef.h - Sample profile function name ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the StringRefOrHashCode class. It is to represent function +// names in a sample profile, which can be in one of two forms - either a +// regular string, or a 64-bit hash code. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_PROFILEFUNCREF_H +#define LLVM_PROFILEDATA_PROFILEFUNCREF_H + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" +#include <cstdint> + +namespace llvm { +namespace sampleprof { + +/// This class represents a function name that is read from a sample profile. It +/// comes with two forms: a string or a hash code. For efficient storage, a +/// sample profile may store function names as 64-bit MD5 values, so when +/// reading the profile, this class can represnet them without converting it to +/// a string first. +/// When representing a hash code, we utilize the Length field to store it, and +/// Data is set to null. When representing a string, it is same as StringRef, +/// and can be pointer-casted as one. +/// We disallow implicit cast to StringRef because there are too many instances +/// that it may cause break the code, such as using it in a StringMap. +class ProfileFuncRef { + + const char *Data = nullptr; + + /// Use uint64_t instead of size_t so that it can also hold a MD5 value. + uint64_t LengthOrHashCode = 0; + + /// Extension to memcmp to handle hash code representation. If both are hash + /// values, Lhs and Rhs are both null, function returns 0 (and needs an extra + /// comparison using getIntValue). If only one is hash code, it is considered + /// less than the StringRef one. Otherwise perform normal string comparison. + static int compareMemory(const char *Lhs, const char *Rhs, uint64_t Length) { + if (Lhs == Rhs) + return 0; + if (!Lhs) + return -1; + if (!Rhs) + return 1; + return ::memcmp(Lhs, Rhs, (size_t)Length); + } + +public: + ProfileFuncRef() = default; + + /// Constructor from a StringRef. Check if Str is a number, which is generated + /// by converting a MD5 sample profile to a format that does not support MD5, + /// and if so, convert the numerical string to a hash code first. We assume + /// that no function name (from a profile) can be a pure number. + explicit ProfileFuncRef(StringRef Str) + : Data(Str.data()), LengthOrHashCode(Str.size()) { + // Only need to check for base 10 digits, fail faster if otherwise. + if (Str.size() > 0 && isdigit(Str[0]) && + !Str.getAsInteger(10, LengthOrHashCode)) + Data = nullptr; + } + + /// Constructor from a hash code. + explicit ProfileFuncRef(uint64_t HashCode) + : Data(nullptr), LengthOrHashCode(HashCode) { + assert(HashCode != 0); + } + + /// Check for equality. Similar to StringRef::equals, but will also cover for + /// the case where one or both are hash codes. Comparing their int values are + /// sufficient. A hash code ProfileFuncRef is considered not equal to a + /// StringRef ProfileFuncRef regardless of actual contents. + bool equals(const ProfileFuncRef &Other) const { + return LengthOrHashCode == Other.LengthOrHashCode && + compareMemory(Data, Other.Data, LengthOrHashCode) == 0; + } + + /// Total order comparison. If both ProfileFuncRef are StringRef, this is the + /// same as StringRef::compare. If one of them is StringRef, it is considered + /// greater than the hash code ProfileFuncRef. Otherwise this is the the same + /// as comparing their int values. + int compare(const ProfileFuncRef &Other) const { + auto Res = compareMemory( + Data, Other.Data, std::min(LengthOrHashCode, Other.LengthOrHashCode)); + if (Res != 0) + return Res; + if (LengthOrHashCode == Other.LengthOrHashCode) + return 0; + return LengthOrHashCode < Other.LengthOrHashCode ? -1 : 1; + } + + /// Convert to a string, usually for output purpose. + std::string str() const { + if (Data) + return std::string(Data, LengthOrHashCode); + if (LengthOrHashCode != 0) + return std::to_string(LengthOrHashCode); + return std::string(); + } + + /// Convert to StringRef, a backing buffer must be provided in case this + /// object represents a hash code, which will be converted to a string into + /// the buffer. If this object represents a StringRef, the buffer is not used. + StringRef stringRef(std::string &Buffer) const { + if (Data) + return StringRef(Data, LengthOrHashCode); + if (LengthOrHashCode != 0) { + Buffer = std::to_string(LengthOrHashCode); + return Buffer; + } + return StringRef(); + } ---------------- MatzeB wrote:
This API feels klunky. Would it be possible to just have a `name()` (potentially returning StringRef() if there is only a hashcode) and a `std::optional<uint64_t> hashcode()` if necessary? This way you can also avoid the buffering logic that may or may not fit the callers needs... https://github.com/llvm/llvm-project/pull/66164 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits