This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 3428469547179828bb520c8f3a859e4fd25ca5fe
Author: Zoltan Borok-Nagy <[email protected]>
AuthorDate: Wed Nov 22 12:56:04 2023 +0100

    IMPALA-12571: Fix errors related to small strings found by Clang sanitizer 
builds
    
    TSAN and ASAN builds found errors in the Small String Optimization
    patch.
    
    TSAN error:
    
    When multiple scanner threads create template tuples they
    invoke Tuple::DeepCopy() concurrently which invoke
    StringValue::Smallify(), hence we end up having a data race
    on the StringValue object. This CR changes Tuple::DeepCopy()
    to only modify the copied tuple and leave the source tuple untouched.
    
    ASAN error:
    
    This was a bug in a backend test code. StringValues referred to
    temporary std::string objects.
    
    Testing:
     * verified the solution in TSAN build
     * verified the solution in ASAN build
    
    Change-Id: I51d48b79bde0a5ae0103a780f9c09f3003ed157e
    Reviewed-on: http://gerrit.cloudera.org:8080/20723
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Michael Smith <[email protected]>
---
 be/src/runtime/string-value-test.cc | 18 +++++++++++-------
 be/src/runtime/string-value.cc      |  3 ++-
 be/src/runtime/tuple.cc             |  2 +-
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/be/src/runtime/string-value-test.cc 
b/be/src/runtime/string-value-test.cc
index d05e1ce52..0f9e83e36 100644
--- a/be/src/runtime/string-value-test.cc
+++ b/be/src/runtime/string-value-test.cc
@@ -160,16 +160,20 @@ void TestConvertToUInt64Impl(StringValue* svs) {
 TEST(StringValueTest, TestConvertToUInt64) {
   // Test converting StringValues to uint64_t which utilizes up to first 8 
bytes.
   const int NUM_STRINGS = 7;
+  string strings[NUM_STRINGS];
+  strings[0] = "";
+  strings[1] = "\1";
+  strings[2] = "\1\2";
+  strings[3] = "\1\2\3";
+  strings[4] = "\1\2\3\4\5\6\7\7";
+  strings[5] = "\1\2\3\4\5\6\7\7\7";
+  strings[6] = "\1\2\3\4\5\6\7\7\7\7\7\7\7\7\7";
 
   // Must be in lexical order
   StringValue svs[NUM_STRINGS];
-  svs[0] = FromStdString("");
-  svs[1] = FromStdString("\1");
-  svs[2] = FromStdString("\1\2");
-  svs[3] = FromStdString("\1\2\3");
-  svs[4] = FromStdString("\1\2\3\4\5\6\7\7");
-  svs[5] = FromStdString("\1\2\3\4\5\6\7\7\7");
-  svs[6] = FromStdString("\1\2\3\4\5\6\7\7\7\7\7\7\7\7\7");
+  for (int i = 0; i < NUM_STRINGS; ++i) {
+    svs[i] = FromStdString(strings[i]);
+  }
 
   TestConvertToUInt64Impl(svs);
   for (int i = 0; i < NUM_STRINGS; ++i) {
diff --git a/be/src/runtime/string-value.cc b/be/src/runtime/string-value.cc
index 834f1475f..c6a367f5c 100644
--- a/be/src/runtime/string-value.cc
+++ b/be/src/runtime/string-value.cc
@@ -36,9 +36,10 @@ uint64_t StringValue::ToUInt64() const {
   unsigned char bytes[8];
   *((uint64_t*)bytes) = 0;
   uint32_t len = Len();
+  const char* ptr = Ptr();
   int chars_to_copy = (len < 8) ? len : 8;
   for (int i = 0; i < chars_to_copy; i++) {
-    bytes[i] = static_cast<unsigned char>(Ptr()[i]);
+    bytes[i] = static_cast<unsigned char>(ptr[i]);
   }
   return static_cast<uint64_t>(bytes[0]) << 56 | 
static_cast<uint64_t>(bytes[1]) << 48
       | static_cast<uint64_t>(bytes[2]) << 40 | 
static_cast<uint64_t>(bytes[3]) << 32
diff --git a/be/src/runtime/tuple.cc b/be/src/runtime/tuple.cc
index e28848e1a..ff79c852d 100644
--- a/be/src/runtime/tuple.cc
+++ b/be/src/runtime/tuple.cc
@@ -107,8 +107,8 @@ Tuple* Tuple::DeepCopy(const TupleDescriptor& desc, 
MemPool* pool) {
 // overhead.
 void Tuple::DeepCopy(Tuple* dst, const TupleDescriptor& desc, MemPool* pool) {
   if (desc.HasVarlenSlots()) {
-    SmallifyStrings(desc);
     memcpy(dst, this, desc.byte_size());
+    dst->SmallifyStrings(desc);
     dst->DeepCopyVarlenData(desc, pool);
   } else {
     memcpy(dst, this, desc.byte_size());

Reply via email to