teemperor created this revision. teemperor added reviewers: dvlahovski, zturner.
It's possible to hit an unaligned memory read when reading `source_length` as the `data` array is only aligned with 2 bytes (it's actually a UTF16 array). This patch memcpy's `source_length` into a local variable to prevent this: MinidumpTypes.cpp:49:23: runtime error: load of misaligned address 0x7f0f4792692a for type 'const uint32_t' (aka 'const unsigned int'), which requires 4 byte alignment https://reviews.llvm.org/D42348 Files: source/Plugins/Process/minidump/MinidumpTypes.cpp Index: source/Plugins/Process/minidump/MinidumpTypes.cpp =================================================================== --- source/Plugins/Process/minidump/MinidumpTypes.cpp +++ source/Plugins/Process/minidump/MinidumpTypes.cpp @@ -44,19 +44,24 @@ lldb_private::minidump::parseMinidumpString(llvm::ArrayRef<uint8_t> &data) { std::string result; - const uint32_t *source_length; - Status error = consumeObject(data, source_length); - if (error.Fail() || *source_length > data.size() || *source_length % 2 != 0) + const uint32_t *source_length_ptr; + Status error = consumeObject(data, source_length_ptr); + + // Copy non-aligned source_length data into aligned memory. + uint32_t source_length; + std::memcpy(&source_length, source_length_ptr, sizeof(source_length)); + + if (error.Fail() || source_length > data.size() || source_length % 2 != 0) return llvm::None; auto source_start = reinterpret_cast<const llvm::UTF16 *>(data.data()); // source_length is the length of the string in bytes // we need the length of the string in UTF-16 characters/code points (16 bits // per char) // that's why it's divided by 2 - const auto source_end = source_start + (*source_length) / 2; + const auto source_end = source_start + source_length / 2; // resize to worst case length - result.resize(UNI_MAX_UTF8_BYTES_PER_CODE_POINT * (*source_length) / 2); + result.resize(UNI_MAX_UTF8_BYTES_PER_CODE_POINT * source_length / 2); auto result_start = reinterpret_cast<llvm::UTF8 *>(&result[0]); const auto result_end = result_start + result.size(); llvm::ConvertUTF16toUTF8(&source_start, source_end, &result_start, result_end,
Index: source/Plugins/Process/minidump/MinidumpTypes.cpp =================================================================== --- source/Plugins/Process/minidump/MinidumpTypes.cpp +++ source/Plugins/Process/minidump/MinidumpTypes.cpp @@ -44,19 +44,24 @@ lldb_private::minidump::parseMinidumpString(llvm::ArrayRef<uint8_t> &data) { std::string result; - const uint32_t *source_length; - Status error = consumeObject(data, source_length); - if (error.Fail() || *source_length > data.size() || *source_length % 2 != 0) + const uint32_t *source_length_ptr; + Status error = consumeObject(data, source_length_ptr); + + // Copy non-aligned source_length data into aligned memory. + uint32_t source_length; + std::memcpy(&source_length, source_length_ptr, sizeof(source_length)); + + if (error.Fail() || source_length > data.size() || source_length % 2 != 0) return llvm::None; auto source_start = reinterpret_cast<const llvm::UTF16 *>(data.data()); // source_length is the length of the string in bytes // we need the length of the string in UTF-16 characters/code points (16 bits // per char) // that's why it's divided by 2 - const auto source_end = source_start + (*source_length) / 2; + const auto source_end = source_start + source_length / 2; // resize to worst case length - result.resize(UNI_MAX_UTF8_BYTES_PER_CODE_POINT * (*source_length) / 2); + result.resize(UNI_MAX_UTF8_BYTES_PER_CODE_POINT * source_length / 2); auto result_start = reinterpret_cast<llvm::UTF8 *>(&result[0]); const auto result_end = result_start + result.size(); llvm::ConvertUTF16toUTF8(&source_start, source_end, &result_start, result_end,
_______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits