This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 3428469547179828bb520c8f3a859e4fd25ca5fe Author: Zoltan Borok-Nagy <[email protected]> AuthorDate: Wed Nov 22 12:56:04 2023 +0100 IMPALA-12571: Fix errors related to small strings found by Clang sanitizer builds TSAN and ASAN builds found errors in the Small String Optimization patch. TSAN error: When multiple scanner threads create template tuples they invoke Tuple::DeepCopy() concurrently which invoke StringValue::Smallify(), hence we end up having a data race on the StringValue object. This CR changes Tuple::DeepCopy() to only modify the copied tuple and leave the source tuple untouched. ASAN error: This was a bug in a backend test code. StringValues referred to temporary std::string objects. Testing: * verified the solution in TSAN build * verified the solution in ASAN build Change-Id: I51d48b79bde0a5ae0103a780f9c09f3003ed157e Reviewed-on: http://gerrit.cloudera.org:8080/20723 Reviewed-by: Michael Smith <[email protected]> Tested-by: Michael Smith <[email protected]> --- be/src/runtime/string-value-test.cc | 18 +++++++++++------- be/src/runtime/string-value.cc | 3 ++- be/src/runtime/tuple.cc | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/be/src/runtime/string-value-test.cc b/be/src/runtime/string-value-test.cc index d05e1ce52..0f9e83e36 100644 --- a/be/src/runtime/string-value-test.cc +++ b/be/src/runtime/string-value-test.cc @@ -160,16 +160,20 @@ void TestConvertToUInt64Impl(StringValue* svs) { TEST(StringValueTest, TestConvertToUInt64) { // Test converting StringValues to uint64_t which utilizes up to first 8 bytes. const int NUM_STRINGS = 7; + string strings[NUM_STRINGS]; + strings[0] = ""; + strings[1] = "\1"; + strings[2] = "\1\2"; + strings[3] = "\1\2\3"; + strings[4] = "\1\2\3\4\5\6\7\7"; + strings[5] = "\1\2\3\4\5\6\7\7\7"; + strings[6] = "\1\2\3\4\5\6\7\7\7\7\7\7\7\7\7"; // Must be in lexical order StringValue svs[NUM_STRINGS]; - svs[0] = FromStdString(""); - svs[1] = FromStdString("\1"); - svs[2] = FromStdString("\1\2"); - svs[3] = FromStdString("\1\2\3"); - svs[4] = FromStdString("\1\2\3\4\5\6\7\7"); - svs[5] = FromStdString("\1\2\3\4\5\6\7\7\7"); - svs[6] = FromStdString("\1\2\3\4\5\6\7\7\7\7\7\7\7\7\7"); + for (int i = 0; i < NUM_STRINGS; ++i) { + svs[i] = FromStdString(strings[i]); + } TestConvertToUInt64Impl(svs); for (int i = 0; i < NUM_STRINGS; ++i) { diff --git a/be/src/runtime/string-value.cc b/be/src/runtime/string-value.cc index 834f1475f..c6a367f5c 100644 --- a/be/src/runtime/string-value.cc +++ b/be/src/runtime/string-value.cc @@ -36,9 +36,10 @@ uint64_t StringValue::ToUInt64() const { unsigned char bytes[8]; *((uint64_t*)bytes) = 0; uint32_t len = Len(); + const char* ptr = Ptr(); int chars_to_copy = (len < 8) ? len : 8; for (int i = 0; i < chars_to_copy; i++) { - bytes[i] = static_cast<unsigned char>(Ptr()[i]); + bytes[i] = static_cast<unsigned char>(ptr[i]); } return static_cast<uint64_t>(bytes[0]) << 56 | static_cast<uint64_t>(bytes[1]) << 48 | static_cast<uint64_t>(bytes[2]) << 40 | static_cast<uint64_t>(bytes[3]) << 32 diff --git a/be/src/runtime/tuple.cc b/be/src/runtime/tuple.cc index e28848e1a..ff79c852d 100644 --- a/be/src/runtime/tuple.cc +++ b/be/src/runtime/tuple.cc @@ -107,8 +107,8 @@ Tuple* Tuple::DeepCopy(const TupleDescriptor& desc, MemPool* pool) { // overhead. void Tuple::DeepCopy(Tuple* dst, const TupleDescriptor& desc, MemPool* pool) { if (desc.HasVarlenSlots()) { - SmallifyStrings(desc); memcpy(dst, this, desc.byte_size()); + dst->SmallifyStrings(desc); dst->DeepCopyVarlenData(desc, pool); } else { memcpy(dst, this, desc.byte_size());
