labath created this revision. labath added reviewers: clayborg, lemo, sas, davide. Herald added subscribers: arichardson, emaste. Herald added a reviewer: espindola.
The data structure is optimized for the case where the UUID size is <= 20 bytes (standard length emitted by the GNU linkers), but larger sizes are also possible. I've modified the string conversion function to support the new sizes as well. For standard UUIDs it maintains the traditional formatting (4-2-2-2-6). If a UUID is shorter, we just cut this sequence short, and for longer UUIDs it will just repeat the last 6-byte block as long as necessary. I've also modified ObjectFileELF to take advantage of the new UUIDs and avoid manually padding the UUID to 16 bytes. While there, I also made sure the computed UUID does not depend on host endianness. https://reviews.llvm.org/D48633 Files: include/lldb/Utility/UUID.h source/Interpreter/OptionValueUUID.cpp source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp source/Utility/UUID.cpp unittests/Utility/UUIDTest.cpp
Index: unittests/Utility/UUIDTest.cpp =================================================================== --- unittests/Utility/UUIDTest.cpp +++ unittests/Utility/UUIDTest.cpp @@ -71,3 +71,15 @@ 32u, u.SetFromStringRef("404142434445464748494a4b4c4d4e4f-50515253", 16)); EXPECT_EQ(UUID::fromData("@ABCDEFGHIJKLMNO", 16), u); } + +TEST(UUIDTest, StringConverion) { + EXPECT_EQ("40414243", UUID::fromData("@ABC", 4).GetAsString()); + EXPECT_EQ("40414243-4445-4647", + UUID::fromData("@ABCDEFG", 8).GetAsString()); + EXPECT_EQ("40414243-4445-4647-4849-4A4B", + UUID::fromData("@ABCDEFGHIJK", 12).GetAsString()); + EXPECT_EQ("40414243-4445-4647-4849-4A4B4C4D4E4F", + UUID::fromData("@ABCDEFGHIJKLMNO", 16).GetAsString()); + EXPECT_EQ("40414243-4445-4647-4849-4A4B4C4D4E4F-50515253", + UUID::fromData("@ABCDEFGHIJKLMNOPQRS", 20).GetAsString()); +} Index: source/Utility/UUID.cpp =================================================================== --- source/Utility/UUID.cpp +++ source/Utility/UUID.cpp @@ -13,43 +13,44 @@ // Project includes #include "lldb/Utility/Stream.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Format.h" // C Includes #include <ctype.h> #include <stdio.h> #include <string.h> using namespace lldb_private; -UUID::UUID(llvm::ArrayRef<uint8_t> bytes) { - if (bytes.size() != 20 && bytes.size() != 16) - bytes = {}; - - m_num_uuid_bytes = bytes.size(); - std::memcpy(m_uuid, bytes.data(), bytes.size()); +// Whether to put a separator after count uuid bytes. +// For the first 16 bytes we follow the traditional UUID format. After that, we +// simply put a dash after every 6 bytes. +static inline bool separate(size_t count) { + if (count >= 10) + return (count - 10) % 6 == 0; + + switch(count) { + case 4: + case 6: + case 8: + return true; + default: + return false; + } } -std::string UUID::GetAsString(const char *separator) const { +std::string UUID::GetAsString(llvm::StringRef separator) const { std::string result; - char buf[256]; - if (!separator) - separator = "-"; - const uint8_t *u = GetBytes().data(); - if (sizeof(buf) > - (size_t)snprintf(buf, sizeof(buf), "%2.2X%2.2X%2.2X%2.2X%s%2.2X%2.2X%s%2." - "2X%2.2X%s%2.2X%2.2X%s%2.2X%2.2X%2.2X%" - "2.2X%2.2X%2.2X", - u[0], u[1], u[2], u[3], separator, u[4], u[5], separator, - u[6], u[7], separator, u[8], u[9], separator, u[10], - u[11], u[12], u[13], u[14], u[15])) { - result.append(buf); - if (m_num_uuid_bytes == 20) { - if (sizeof(buf) > (size_t)snprintf(buf, sizeof(buf), - "%s%2.2X%2.2X%2.2X%2.2X", separator, - u[16], u[17], u[18], u[19])) - result.append(buf); - } + llvm::raw_string_ostream os(result); + + for(auto B: llvm::enumerate(GetBytes())) { + if (separate(B.index())) + os << separator; + + os << llvm::format_hex_no_prefix(B.value(), 2, true); } + os.flush(); + return result; } @@ -64,25 +65,24 @@ return ch - '0'; } -llvm::StringRef UUID::DecodeUUIDBytesFromString(llvm::StringRef p, - ValueType &uuid_bytes, - uint32_t &bytes_decoded, - uint32_t num_uuid_bytes) { - ::memset(uuid_bytes, 0, sizeof(uuid_bytes)); - size_t uuid_byte_idx = 0; +llvm::StringRef +UUID::DecodeUUIDBytesFromString(llvm::StringRef p, + llvm::SmallVectorImpl<uint8_t> &uuid_bytes, + uint32_t num_uuid_bytes) { + uuid_bytes.clear(); while (!p.empty()) { if (isxdigit(p[0]) && isxdigit(p[1])) { int hi_nibble = xdigit_to_int(p[0]); int lo_nibble = xdigit_to_int(p[1]); // Translate the two hex nibble characters into a byte - uuid_bytes[uuid_byte_idx] = (hi_nibble << 4) + lo_nibble; + uuid_bytes.push_back((hi_nibble << 4) + lo_nibble); // Skip both hex digits p = p.drop_front(2); // Increment the byte that we are decoding within the UUID value and // break out if we are done - if (++uuid_byte_idx == num_uuid_bytes) + if (uuid_bytes.size() == num_uuid_bytes) break; } else if (p.front() == '-') { // Skip dashes @@ -92,11 +92,6 @@ break; } } - - // Clear trailing bytes to 0. - for (uint32_t i = uuid_byte_idx; i < sizeof(ValueType); i++) - uuid_bytes[i] = 0; - bytes_decoded = uuid_byte_idx; return p; } @@ -106,52 +101,17 @@ // Skip leading whitespace characters p = p.ltrim(); - ValueType bytes; - uint32_t bytes_decoded = 0; + llvm::SmallVector<uint8_t, 20> bytes; llvm::StringRef rest = - UUID::DecodeUUIDBytesFromString(p, bytes, bytes_decoded, num_uuid_bytes); + UUID::DecodeUUIDBytesFromString(p, bytes, num_uuid_bytes); // If we successfully decoded a UUID, return the amount of characters that // were consumed - if (bytes_decoded == num_uuid_bytes) { - *this = fromData(bytes, bytes_decoded); + if (bytes.size() == num_uuid_bytes) { + *this = fromData(bytes); return str.size() - rest.size(); } // Else return zero to indicate we were not able to parse a UUID value return 0; } - -bool lldb_private::operator==(const lldb_private::UUID &lhs, - const lldb_private::UUID &rhs) { - return lhs.GetBytes() == rhs.GetBytes(); -} - -bool lldb_private::operator!=(const lldb_private::UUID &lhs, - const lldb_private::UUID &rhs) { - return !(lhs == rhs); -} - -bool lldb_private::operator<(const lldb_private::UUID &lhs, - const lldb_private::UUID &rhs) { - if (lhs.GetBytes().size() != rhs.GetBytes().size()) - return lhs.GetBytes().size() < rhs.GetBytes().size(); - - return std::memcmp(lhs.GetBytes().data(), rhs.GetBytes().data(), - lhs.GetBytes().size()); -} - -bool lldb_private::operator<=(const lldb_private::UUID &lhs, - const lldb_private::UUID &rhs) { - return !(lhs > rhs); -} - -bool lldb_private::operator>(const lldb_private::UUID &lhs, - const lldb_private::UUID &rhs) { - return rhs < lhs; -} - -bool lldb_private::operator>=(const lldb_private::UUID &lhs, - const lldb_private::UUID &rhs) { - return !(lhs < rhs); -} Index: source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp =================================================================== --- source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -730,16 +730,17 @@ data.GetDataStart(), data.GetByteSize()); } } + using u32le = llvm::support::ulittle32_t; if (gnu_debuglink_crc) { // Use 4 bytes of crc from the .gnu_debuglink section. - uint32_t uuidt[4] = {gnu_debuglink_crc, 0, 0, 0}; - uuid = UUID::fromData(uuidt, sizeof(uuidt)); + u32le data(gnu_debuglink_crc); + uuid = UUID::fromData(&data, sizeof(data)); } else if (core_notes_crc) { // Use 8 bytes - first 4 bytes for *magic* prefix, mainly to make // it look different form .gnu_debuglink crc followed by 4 bytes // of note segments crc. - uint32_t uuidt[4] = {g_core_uuid_magic, core_notes_crc, 0, 0}; - uuid = UUID::fromData(uuidt, sizeof(uuidt)); + u32le data[] = {u32le(g_core_uuid_magic), u32le(core_notes_crc)}; + uuid = UUID::fromData(data, sizeof(data)); } } @@ -909,6 +910,7 @@ if (!ParseSectionHeaders() && GetType() != ObjectFile::eTypeCoreFile) return false; + using u32le = llvm::support::ulittle32_t; if (m_uuid.IsValid()) { // We have the full build id uuid. *uuid = m_uuid; @@ -925,17 +927,17 @@ // Use 8 bytes - first 4 bytes for *magic* prefix, mainly to make it look // different form .gnu_debuglink crc - followed by 4 bytes of note // segments crc. - uint32_t uuidt[4] = {g_core_uuid_magic, core_notes_crc, 0, 0}; - m_uuid = UUID::fromData(uuidt, sizeof(uuidt)); + u32le data[] = {u32le(g_core_uuid_magic), u32le(core_notes_crc)}; + m_uuid = UUID::fromData(data, sizeof(data)); } } else { if (!m_gnu_debuglink_crc) m_gnu_debuglink_crc = calc_gnu_debuglink_crc32(m_data.GetDataStart(), m_data.GetByteSize()); if (m_gnu_debuglink_crc) { // Use 4 bytes of crc from the .gnu_debuglink section. - uint32_t uuidt[4] = {m_gnu_debuglink_crc, 0, 0, 0}; - m_uuid = UUID::fromData(uuidt, sizeof(uuidt)); + u32le data(m_gnu_debuglink_crc); + m_uuid = UUID::fromData(&data, sizeof(data)); } } @@ -1273,18 +1275,16 @@ // Only bother processing this if we don't already have the uuid set. if (!uuid.IsValid()) { // 16 bytes is UUID|MD5, 20 bytes is SHA1. Other linkers may produce a - // build-id of a different - // length. Accept it as long as it's at least 4 bytes as it will be - // better than our own crc32. - if (note.n_descsz >= 4 && note.n_descsz <= 20) { - uint8_t uuidbuf[20]; - if (data.GetU8(&offset, &uuidbuf, note.n_descsz) == nullptr) { + // build-id of a different length. Accept it as long as it's at least + // 4 bytes as it will be better than our own crc32. + if (note.n_descsz >= 4) { + if(const uint8_t *buf = data.PeekData(offset, note.n_descsz)) { + // Save the build id as the UUID for the module. + uuid = UUID::fromData(buf, note.n_descsz); + } else { error.SetErrorString("failed to read GNU_BUILD_ID note payload"); return error; } - - // Save the build id as the UUID for the module. - uuid = UUID::fromData(uuidbuf, note.n_descsz); } } break; Index: source/Interpreter/OptionValueUUID.cpp =================================================================== --- source/Interpreter/OptionValueUUID.cpp +++ source/Interpreter/OptionValueUUID.cpp @@ -78,19 +78,16 @@ if (target) { const size_t num_modules = target->GetImages().GetSize(); if (num_modules > 0) { - UUID::ValueType uuid_bytes; - uint32_t num_bytes_decoded = 0; - UUID::DecodeUUIDBytesFromString(s, uuid_bytes, num_bytes_decoded); + llvm::SmallVector<uint8_t, 20> uuid_bytes; + UUID::DecodeUUIDBytesFromString(s, uuid_bytes); for (size_t i = 0; i < num_modules; ++i) { ModuleSP module_sp(target->GetImages().GetModuleAtIndex(i)); if (module_sp) { const UUID &module_uuid = module_sp->GetUUID(); if (module_uuid.IsValid()) { - llvm::ArrayRef<uint8_t> decoded_bytes(uuid_bytes, - num_bytes_decoded); llvm::ArrayRef<uint8_t> module_bytes = module_uuid.GetBytes(); - if (module_bytes.size() >= num_bytes_decoded && - module_bytes.take_front(num_bytes_decoded) == decoded_bytes) { + if (module_bytes.size() >= uuid_bytes.size() && + module_bytes.take_front(uuid_bytes.size()) == uuid_bytes) { matches.AppendString(module_uuid.GetAsString()); } } Index: include/lldb/Utility/UUID.h =================================================================== --- include/lldb/Utility/UUID.h +++ include/lldb/Utility/UUID.h @@ -27,12 +27,6 @@ class UUID { public: - // Most UUIDs are 16 bytes, but some Linux build-ids (SHA1) are 20. - typedef uint8_t ValueType[20]; - - //------------------------------------------------------------------ - // Constructors and Destructors - //------------------------------------------------------------------ UUID() = default; /// Creates a UUID from the data pointed to by the bytes argument. No special @@ -64,18 +58,16 @@ return UUID(bytes); } - void Clear() { m_num_uuid_bytes = 0; } + void Clear() { m_bytes.clear(); } void Dump(Stream *s) const; - llvm::ArrayRef<uint8_t> GetBytes() const { - return {m_uuid, m_num_uuid_bytes}; - } + llvm::ArrayRef<uint8_t> GetBytes() const { return m_bytes; } explicit operator bool() const { return IsValid(); } - bool IsValid() const { return m_num_uuid_bytes > 0; } + bool IsValid() const { return !m_bytes.empty(); } - std::string GetAsString(const char *separator = nullptr) const; + std::string GetAsString(llvm::StringRef separator = "-") const; size_t SetFromStringRef(llvm::StringRef str, uint32_t num_uuid_bytes = 16); @@ -99,24 +91,36 @@ /// The original string, with all decoded bytes removed. //------------------------------------------------------------------ static llvm::StringRef - DecodeUUIDBytesFromString(llvm::StringRef str, ValueType &uuid_bytes, - uint32_t &bytes_decoded, + DecodeUUIDBytesFromString(llvm::StringRef str, + llvm::SmallVectorImpl<uint8_t> &uuid_bytes, uint32_t num_uuid_bytes = 16); private: - UUID(llvm::ArrayRef<uint8_t> bytes); + UUID(llvm::ArrayRef<uint8_t> bytes) : m_bytes(bytes.begin(), bytes.end()) {} - uint32_t m_num_uuid_bytes = 0; // Should be 0, 16 or 20 - ValueType m_uuid; -}; - -bool operator==(const UUID &lhs, const UUID &rhs); -bool operator!=(const UUID &lhs, const UUID &rhs); -bool operator<(const UUID &lhs, const UUID &rhs); -bool operator<=(const UUID &lhs, const UUID &rhs); -bool operator>(const UUID &lhs, const UUID &rhs); -bool operator>=(const UUID &lhs, const UUID &rhs); + // GNU ld generates 20-byte build-ids. Size chosen to avoid heap allocations + // for this case. + llvm::SmallVector<uint8_t, 20> m_bytes; + friend bool operator==(const UUID &LHS, const UUID &RHS) { + return LHS.m_bytes == RHS.m_bytes; + } + friend bool operator!=(const UUID &LHS, const UUID &RHS) { + return !(LHS == RHS); + } + friend bool operator<(const UUID &LHS, const UUID &RHS) { + return LHS.m_bytes < RHS.m_bytes; + } + friend bool operator<=(const UUID &LHS, const UUID &RHS) { + return !(RHS < LHS); + } + friend bool operator>(const UUID &LHS, const UUID &RHS) { + return RHS < LHS; + } + friend bool operator>=(const UUID &LHS, const UUID &RHS) { + return !(LHS < RHS); + } +}; } // namespace lldb_private #endif // LLDB_UTILITY_UUID_H
_______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits