jasonmolenda updated this revision to Diff 196776. jasonmolenda added a comment.
Fix one small bug in the jitted expressions. Increase the estimated size of class names that lldb uses to pre-allocate the string pool buffer. Added a method to read the string pool out of the inferior, to reduce code duplication, and have it read the last name to get the full size of the string pool correct. Repository: rLLDB LLDB CHANGES SINCE LAST ACTION https://reviews.llvm.org/D60957/new/ https://reviews.llvm.org/D60957 Files: AppleObjCRuntimeV2.cpp AppleObjCRuntimeV2.h
Index: AppleObjCRuntimeV2.h =================================================================== --- AppleObjCRuntimeV2.h +++ AppleObjCRuntimeV2.h @@ -297,7 +297,8 @@ UpdateISAToDescriptorMapDynamic(RemoteNXMapTable &hash_table); uint32_t ParseClassInfoArray(const lldb_private::DataExtractor &data, - uint32_t num_class_infos); + uint32_t num_class_infos, + lldb::DataBufferSP strpool_buffer_sp); DescriptorMapUpdateResult UpdateISAToDescriptorMapSharedCache(); @@ -314,6 +315,10 @@ friend class ClassDescriptorV2; + lldb::DataBufferSP ReadClassNameStringPool (lldb::addr_t string_pool_addr, + lldb_private::DataExtractor *class_data, + uint32_t num_class_infos); + std::unique_ptr<UtilityFunction> m_get_class_info_code; lldb::addr_t m_get_class_info_args; std::mutex m_get_class_info_args_mutex; Index: AppleObjCRuntimeV2.cpp =================================================================== --- AppleObjCRuntimeV2.cpp +++ AppleObjCRuntimeV2.cpp @@ -79,10 +79,11 @@ extern "C" { size_t strlen(const char *); - char *strncpy (char * s1, const char * s2, size_t n); + char *strcpy (char * dst, const char * src); int printf(const char * format, ...); } #define DEBUG_PRINTF(fmt, ...) if (should_log) printf(fmt, ## __VA_ARGS__) +#define UINT32_MAX 0xffffffff typedef struct _NXMapTable { void *prototype; @@ -103,17 +104,22 @@ { Class isa; uint32_t hash; + uint32_t stroffset; } __attribute__((__packed__)); uint32_t __lldb_apple_objc_v2_get_dynamic_class_info (void *gdb_objc_realized_classes_ptr, void *class_infos_ptr, uint32_t class_infos_byte_size, + void *string_pool_ptr, + uint32_t string_pool_byte_size, uint32_t should_log) { DEBUG_PRINTF ("gdb_objc_realized_classes_ptr = %p\n", gdb_objc_realized_classes_ptr); DEBUG_PRINTF ("class_infos_ptr = %p\n", class_infos_ptr); DEBUG_PRINTF ("class_infos_byte_size = %u\n", class_infos_byte_size); + DEBUG_PRINTF ("string_pool_ptr = %p\n", string_pool_ptr); + DEBUG_PRINTF ("string_pool_byte_size = %u\n", string_pool_byte_size); const NXMapTable *grc = (const NXMapTable *)gdb_objc_realized_classes_ptr; if (grc) { @@ -123,7 +129,8 @@ const size_t max_class_infos = class_infos_byte_size/sizeof(ClassInfo); ClassInfo *class_infos = (ClassInfo *)class_infos_ptr; BucketInfo *buckets = (BucketInfo *)grc->buckets; - + char *string_pool_base = (char *)string_pool_ptr; + uint32_t current_strpool_offset = 0; uint32_t idx = 0; for (unsigned i=0; i<=grc->num_buckets_minus_one; ++i) { @@ -131,12 +138,27 @@ { if (idx < max_class_infos) { - const char *s = buckets[i].name_ptr; + const char *name = buckets[i].name_ptr; + const char *s = name; uint32_t h = 5381; for (unsigned char c = *s; c; c = *++s) h = ((h << 5) + h) + c; class_infos[idx].hash = h; class_infos[idx].isa = buckets[i].isa; + class_infos[idx].stroffset = UINT32_MAX; // default to no strpool offset + if (string_pool_base && string_pool_byte_size != 0 && name && h != 0) + { + const int name_len = strlen (name); + const int remaining_strpool = string_pool_byte_size - current_strpool_offset; + + if (name_len > 0 && remaining_strpool > (name_len + 1)) + { + DEBUG_PRINTF ("[%u] name %s copied into stringpool offset %u\n", idx, name, current_strpool_offset); + strcpy (string_pool_base + current_strpool_offset, name); + class_infos[idx].stroffset = current_strpool_offset; + current_strpool_offset += name_len + 1; + } + } } ++idx; } @@ -145,6 +167,7 @@ { class_infos[idx].isa = NULL; class_infos[idx].hash = 0; + class_infos[idx].stroffset = UINT32_MAX; // no strpool offset } } return num_classes; @@ -164,13 +187,13 @@ { const char *class_getName(void *objc_class); size_t strlen(const char *); - char *strncpy (char * s1, const char * s2, size_t n); + char *strcpy (char * dst, const char * src); int printf(const char * format, ...); } #define DEBUG_PRINTF(fmt, ...) if (should_log) printf(fmt, ## __VA_ARGS__) +#define UINT32_MAX 0xffffffff - struct objc_classheader_t { int32_t clsOffset; int32_t hiOffset; @@ -212,18 +235,23 @@ { Class isa; uint32_t hash; + uint32_t stroffset; } __attribute__((__packed__)); uint32_t __lldb_apple_objc_v2_get_shared_cache_class_info (void *objc_opt_ro_ptr, void *class_infos_ptr, uint32_t class_infos_byte_size, + void *string_pool_ptr, + uint32_t string_pool_byte_size, uint32_t should_log) { uint32_t idx = 0; DEBUG_PRINTF ("objc_opt_ro_ptr = %p\n", objc_opt_ro_ptr); DEBUG_PRINTF ("class_infos_ptr = %p\n", class_infos_ptr); + DEBUG_PRINTF ("string_pool_ptr = %p\n", string_pool_ptr); DEBUG_PRINTF ("class_infos_byte_size = %u (%llu class infos)\n", class_infos_byte_size, (uint64_t)(class_infos_byte_size/sizeof(ClassInfo))); + DEBUG_PRINTF ("string_pool_byte_size = %u\n", string_pool_byte_size); if (objc_opt_ro_ptr) { const objc_opt_t *objc_opt = (objc_opt_t *)objc_opt_ro_ptr; @@ -254,6 +282,8 @@ const size_t max_class_infos = class_infos_byte_size/sizeof(ClassInfo); DEBUG_PRINTF("max_class_infos = %llu\n", (uint64_t)max_class_infos); ClassInfo *class_infos = (ClassInfo *)class_infos_ptr; + char *string_pool_base = (char *)string_pool_ptr; + uint32_t current_strpool_offset = 0; int32_t invalidEntryOffset = 0; // this is safe to do because the version field order is invariant if (objc_opt->version == 12) @@ -289,8 +319,25 @@ const char *s = name; uint32_t h = 5381; for (unsigned char c = *s; c; c = *++s) + { + // See comment in ParseClassInfoArray + if (c == '.') { h = 0; break; } h = ((h << 5) + h) + c; + } class_infos[idx].hash = h; + class_infos[idx].stroffset = UINT32_MAX; // default to no strpool offset + if (string_pool_base && string_pool_byte_size != 0 && name && h != 0) + { + const int name_len = strlen (name); + const int remaining_strpool = string_pool_byte_size - current_strpool_offset; + if (name_len > 0 && remaining_strpool > (name_len + 1)) + { + DEBUG_PRINTF ("[%u] name %s copied into stringpool offset %u\n", idx, name, current_strpool_offset); + strcpy (string_pool_base + current_strpool_offset, name); + class_infos[idx].stroffset = current_strpool_offset; + current_strpool_offset += name_len + 1; + } + } } else { @@ -321,8 +368,25 @@ const char *s = name; uint32_t h = 5381; for (unsigned char c = *s; c; c = *++s) + { + // See comment in ParseClassInfoArray + if (c == '.') { h = 0; break; } h = ((h << 5) + h) + c; + } class_infos[idx].hash = h; + class_infos[idx].stroffset = UINT32_MAX; // default to no strpool offset + if (string_pool_base && string_pool_byte_size != 0 && name && h != 0) + { + const int name_len = strlen (name); + const int remaining_strpool = string_pool_byte_size - current_strpool_offset; + if (name_len > 0 && remaining_strpool > (name_len + 1)) + { + DEBUG_PRINTF ("[%u] name %s copied into stringpool offset %u\n", idx, name, current_strpool_offset); + strcpy (string_pool_base + current_strpool_offset, name); + class_infos[idx].stroffset = current_strpool_offset; + current_strpool_offset += name_len + 1; + } + } } ++idx; } @@ -1246,6 +1310,56 @@ return m_isa_hash_table_ptr; } +// Iterate through the ClassInfo tuples in the class_data array, +// find the entry with the highest stroffset value, add the length +// of the final class name string at stroffset, and read the entire +// block of memory in large read packets. +DataBufferSP +AppleObjCRuntimeV2::ReadClassNameStringPool (addr_t string_pool_addr, + DataExtractor *class_infos_data, + uint32_t num_class_infos) { + DataBufferSP strpool_buffer_sp; + + if (string_pool_addr && + string_pool_addr != LLDB_INVALID_ADDRESS + && class_infos_data) { + uint32_t max_offset_seen = 0; + const int address_size = class_infos_data->GetAddressByteSize(); + offset_t offset = 0; + + for (uint32_t i = 0; i < num_class_infos; ++i) { + offset += address_size + 4; /* isa + hash */ + uint32_t strpool_offset = class_infos_data->GetU32(&offset); + if (strpool_offset != UINT32_MAX & strpool_offset > max_offset_seen) + max_offset_seen = strpool_offset; + } + if (max_offset_seen != 0) { + Process *process = GetProcess(); + + // The highest string offset we find points to the final string in + // the buffer - but will not fetch the final string. Do one extra + // read to find the size of that final string so we have all of them + // in a single buffer. + + std::string last_class_name; + Status err; + size_t string_size = process->ReadCStringFromMemory( + string_pool_addr + max_offset_seen, last_class_name, err); + if (string_size > 0 && err.Success()) { + max_offset_seen += last_class_name.length() + 1; + } + + strpool_buffer_sp.reset(new DataBufferHeap (max_offset_seen, 0)); + if (process->ReadMemory (string_pool_addr, strpool_buffer_sp->GetBytes(), + strpool_buffer_sp->GetByteSize(), err) != + strpool_buffer_sp->GetByteSize()) { + strpool_buffer_sp.reset(); + } + } + } + return strpool_buffer_sp; +} + AppleObjCRuntimeV2::DescriptorMapUpdateResult AppleObjCRuntimeV2::UpdateISAToDescriptorMapDynamic( RemoteNXMapTable &hash_table) { @@ -1322,16 +1436,19 @@ return DescriptorMapUpdateResult::Fail(); // Next make the runner function for our implementation utility function. - Value value; - value.SetValueType(Value::eValueTypeScalar); - value.SetCompilerType(clang_void_pointer_type); - arguments.PushValue(value); - arguments.PushValue(value); + Value ptr_value; + Value uint32_value; + ptr_value.SetValueType(Value::eValueTypeScalar); + ptr_value.SetCompilerType(clang_void_pointer_type); + uint32_value.SetValueType(Value::eValueTypeScalar); + uint32_value.SetCompilerType(clang_uint32_t_type); - value.SetValueType(Value::eValueTypeScalar); - value.SetCompilerType(clang_uint32_t_type); - arguments.PushValue(value); - arguments.PushValue(value); + arguments.PushValue(ptr_value); // void *gdb_objc_realized_classes_ptr + arguments.PushValue(ptr_value); // void *class_infos_ptr + arguments.PushValue(uint32_value); // uint32_t class_infos_byte_size + arguments.PushValue(ptr_value); // void *string_pool_ptr + arguments.PushValue(uint32_value); // uint32_t string_pool_byte_size + arguments.PushValue(uint32_value); // uint32_t should_log get_class_info_function = m_get_class_info_code->MakeFunctionCaller( clang_uint32_t_type, arguments, thread_sp, error); @@ -1358,11 +1475,21 @@ diagnostics.Clear(); - const uint32_t class_info_byte_size = addr_size + 4; + const uint32_t class_info_byte_size = addr_size + + 4 /* hash */ + + 4 /* stroffset */; const uint32_t class_infos_byte_size = num_classes * class_info_byte_size; lldb::addr_t class_infos_addr = process->AllocateMemory( class_infos_byte_size, ePermissionsReadable | ePermissionsWritable, err); + // Assume classes names will be less than 48 chars on average. If + // we run out of space, entries will have an offset of UINT32_MAX + // and lldb will read the class names out of memory individually. + // Actual average is around 23 characters per class name in 2019. + const uint32_t string_pool_byte_size = num_classes * 48; + addr_t string_pool_addr = process->AllocateMemory( + string_pool_byte_size, ePermissionsReadable | ePermissionsWritable, err); + if (class_infos_addr == LLDB_INVALID_ADDRESS) { if (log) log->Printf("unable to allocate %" PRIu32 @@ -1371,19 +1498,30 @@ return DescriptorMapUpdateResult::Fail(); } + if (string_pool_addr == LLDB_INVALID_ADDRESS) { + if (log) + log->Printf("unable to allocate %" PRIu32 + " bytes in process for shared cache string_pool read, will use slow method", + string_pool_byte_size); + // NB: representing invalid address as 0 for simplicity of checking in jitted expr + string_pool_addr = 0; + } + std::lock_guard<std::mutex> guard(m_get_class_info_args_mutex); // Fill in our function argument values arguments.GetValueAtIndex(0)->GetScalar() = hash_table.GetTableLoadAddress(); arguments.GetValueAtIndex(1)->GetScalar() = class_infos_addr; arguments.GetValueAtIndex(2)->GetScalar() = class_infos_byte_size; + arguments.GetValueAtIndex(3)->GetScalar() = string_pool_addr; + arguments.GetValueAtIndex(4)->GetScalar() = string_pool_byte_size; // Only dump the runtime classes from the expression evaluation if the log is // verbose: Log *type_log = GetLogIfAllCategoriesSet(LIBLLDB_LOG_TYPES); bool dump_log = type_log && type_log->GetVerbose(); - arguments.GetValueAtIndex(3)->GetScalar() = dump_log ? 1 : 0; + arguments.GetValueAtIndex(5)->GetScalar() = dump_log ? 1 : 0; bool success = false; @@ -1427,7 +1565,11 @@ DataExtractor class_infos_data(buffer.GetBytes(), buffer.GetByteSize(), process->GetByteOrder(), addr_size); - ParseClassInfoArray(class_infos_data, num_class_infos); + + DataBufferSP strpool_buffer_sp = ReadClassNameStringPool (string_pool_addr, + &class_infos_data, num_class_infos); + + ParseClassInfoArray(class_infos_data, num_class_infos, strpool_buffer_sp); } } success = true; @@ -1446,25 +1588,27 @@ // Deallocate the memory we allocated for the ClassInfo array process->DeallocateMemory(class_infos_addr); + process->DeallocateMemory(string_pool_addr); return DescriptorMapUpdateResult(success, num_class_infos); } uint32_t AppleObjCRuntimeV2::ParseClassInfoArray(const DataExtractor &data, - uint32_t num_class_infos) { + uint32_t num_class_infos, + DataBufferSP strpool_buffer_sp) { // Parses an array of "num_class_infos" packed ClassInfo structures: // // struct ClassInfo // { // Class isa; // uint32_t hash; + // uint32_t stroffset; // } __attribute__((__packed__)); Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_TYPES)); bool should_log = log && log->GetVerbose(); uint32_t num_parsed = 0; - // Iterate through all ClassInfo structures lldb::offset_t offset = 0; for (uint32_t i = 0; i < num_class_infos; ++i) { @@ -1483,18 +1627,54 @@ log->Printf("AppleObjCRuntimeV2 found cached isa=0x%" PRIx64 ", ignoring this class info", isa); - offset += 4; + offset += 4; // hash + offset += 4; // stroffset } else { // Read the 32 bit hash for the class name const uint32_t name_hash = data.GetU32(&offset); - ClassDescriptorSP descriptor_sp(new ClassDescriptorV2(*this, isa, NULL)); - AddClass(isa, descriptor_sp, name_hash); + const uint32_t strpool_offset = data.GetU32(&offset); + + bool name_read_from_strpool = false; + const char *name = nullptr; + if (strpool_buffer_sp.get() + && strpool_offset != UINT32_MAX + && strpool_buffer_sp->GetByteSize() > strpool_offset) { + name = (const char*) strpool_buffer_sp->GetBytes() + strpool_offset; + name_read_from_strpool = true; + } + ClassDescriptorSP descriptor_sp(new ClassDescriptorV2(*this, isa, name)); + + // The code in g_get_shared_cache_class_info_body sets the value of the hash + // to 0 to signal a mangled symbol. We use class_getName() in that code to + // find the class name, but this returns a demangled name for Swift symbols. + // For those symbols, recompute the hash here by extracing their name from the + // runtime (this is slow and cannot be done generally for the 45000+ symbols + // of the shared cache. + + if (name_hash) { + AddClass(isa, descriptor_sp, name_hash); + } else { + if (name == nullptr) { + name = descriptor_sp->GetClassName().AsCString(nullptr); + } + AddClass(isa, descriptor_sp, name); + } num_parsed++; - if (should_log) - log->Printf("AppleObjCRuntimeV2 added isa=0x%" PRIx64 - ", hash=0x%8.8x, name=%s", - isa, name_hash, - descriptor_sp->GetClassName().AsCString("<unknown>")); + if (should_log) { + if (name == nullptr) + name = descriptor_sp->GetClassName().AsCString(nullptr); + char name_from_strpool_buf[80]; + if (name_read_from_strpool) { + snprintf (name_from_strpool_buf, sizeof (name_from_strpool_buf), + "read from strpool offset %u", strpool_offset); + } else { + strcpy (name_from_strpool_buf, "read directly from memory, not via strpool"); + } + log->Printf("AppleObjCRuntimeV2 added [%d] isa=0x%" PRIx64 + ", hash=0x%8.8x, name=%s %s", + i, isa, name_hash, + name, name_from_strpool_buf); + } } } if (should_log) @@ -1579,18 +1759,19 @@ return DescriptorMapUpdateResult::Fail(); // Next make the function caller for our implementation utility function. - Value value; - value.SetValueType(Value::eValueTypeScalar); - // value.SetContext (Value::eContextTypeClangType, clang_void_pointer_type); - value.SetCompilerType(clang_void_pointer_type); - arguments.PushValue(value); - arguments.PushValue(value); + Value ptr_value; + Value uint32_value; + ptr_value.SetValueType(Value::eValueTypeScalar); + ptr_value.SetCompilerType(clang_void_pointer_type); + uint32_value.SetValueType(Value::eValueTypeScalar); + uint32_value.SetCompilerType(clang_uint32_t_type); - value.SetValueType(Value::eValueTypeScalar); - // value.SetContext (Value::eContextTypeClangType, clang_uint32_t_type); - value.SetCompilerType(clang_uint32_t_type); - arguments.PushValue(value); - arguments.PushValue(value); + arguments.PushValue(ptr_value); // void *gdb_objc_realized_classes_ptr + arguments.PushValue(ptr_value); // void *class_infos_ptr + arguments.PushValue(uint32_value); // uint32_t class_infos_byte_size + arguments.PushValue(ptr_value); // void *string_pool_ptr + arguments.PushValue(uint32_value); // uint32_t string_pool_byte_size + arguments.PushValue(uint32_value); // uint32_t should_log get_shared_cache_class_info_function = m_get_shared_cache_class_info_code->MakeFunctionCaller( @@ -1609,11 +1790,21 @@ diagnostics.Clear(); - const uint32_t class_info_byte_size = addr_size + 4; + const uint32_t class_info_byte_size = addr_size + + 4 /* hash */ + + 4 /* stroffset */; const uint32_t class_infos_byte_size = num_classes * class_info_byte_size; lldb::addr_t class_infos_addr = process->AllocateMemory( class_infos_byte_size, ePermissionsReadable | ePermissionsWritable, err); + // Assume classes names will be less than 48 chars on average. If + // we run out of space, entries will have an offset of UINT32_MAX + // and lldb will read the class names out of memory individually. + // Actual average is around 23 characters per class name in 2019. + const uint32_t string_pool_byte_size = num_classes * 48; + addr_t string_pool_addr = process->AllocateMemory( + string_pool_byte_size, ePermissionsReadable | ePermissionsWritable, err); + if (class_infos_addr == LLDB_INVALID_ADDRESS) { if (log) log->Printf("unable to allocate %" PRIu32 @@ -1622,18 +1813,29 @@ return DescriptorMapUpdateResult::Fail(); } + if (string_pool_addr == LLDB_INVALID_ADDRESS) { + if (log) + log->Printf("unable to allocate %" PRIu32 + " bytes in process for shared cache string_pool read, will use slow method", + string_pool_byte_size); + // NB: representing invalid address as 0 for simplicity of checking in jitted expr + string_pool_addr = 0; + } + std::lock_guard<std::mutex> guard(m_get_shared_cache_class_info_args_mutex); // Fill in our function argument values arguments.GetValueAtIndex(0)->GetScalar() = objc_opt_ptr; arguments.GetValueAtIndex(1)->GetScalar() = class_infos_addr; arguments.GetValueAtIndex(2)->GetScalar() = class_infos_byte_size; + arguments.GetValueAtIndex(3)->GetScalar() = string_pool_addr; + arguments.GetValueAtIndex(4)->GetScalar() = string_pool_byte_size; // Only dump the runtime classes from the expression evaluation if the log is // verbose: Log *type_log = GetLogIfAllCategoriesSet(LIBLLDB_LOG_TYPES); bool dump_log = type_log && type_log->GetVerbose(); - arguments.GetValueAtIndex(3)->GetScalar() = dump_log ? 1 : 0; + arguments.GetValueAtIndex(5)->GetScalar() = dump_log ? 1 : 0; bool success = false; @@ -1691,7 +1893,10 @@ buffer.GetByteSize(), process->GetByteOrder(), addr_size); - ParseClassInfoArray(class_infos_data, num_class_infos); + DataBufferSP strpool_buffer_sp = ReadClassNameStringPool (string_pool_addr, + &class_infos_data, num_class_infos); + + ParseClassInfoArray(class_infos_data, num_class_infos, strpool_buffer_sp); } } else { success = true; @@ -1711,6 +1916,7 @@ // Deallocate the memory we allocated for the ClassInfo array process->DeallocateMemory(class_infos_addr); + process->DeallocateMemory(string_pool_addr); return DescriptorMapUpdateResult(success, num_class_infos); } @@ -2428,12 +2634,12 @@ ObjCISA isa, ObjCISA &ret_isa) { Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_TYPES)); + if ((isa & ~m_objc_debug_isa_class_mask) == 0) + return false; + if (log) log->Printf("AOCRT::NPI Evalulate(isa = 0x%" PRIx64 ")", (uint64_t)isa); - if ((isa & ~m_objc_debug_isa_class_mask) == 0) - return false; - // If all of the indexed ISA variables are set, then its possible that this // ISA is indexed, and we should first try to get its value using the index. // Note, we check these variables first as the ObjC runtime will set at least
_______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits