This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 75797e8ec146d6118fabff27397e2e8dbcd2ebdd
Author: Daniel Becker <[email protected]>
AuthorDate: Tue Oct 3 14:06:55 2023 +0200

    IMPALA-12478: Invalid length in StringValue::LeastSmallerString()
    
    Before this change, in StringValue::LeastSmallerString() we did not
    check whether '*this' was an empty string and used 'len - 1' as the size
    of a new std::string, which could lead to an exception. This change adds
    a check for 'len == 0' and we return an empty string in that case.
    
    Also, the function name should actually be LargestSmallerString(), as we
    are interested in the largest string that is smaller than '*this', cf.
    StringValue::LeastLargerString(). This patch renames the function.
    
    Testing:
     - Expanded the relevant unit test in string-value-test.cc with the test
       of an empty string.
    
    Change-Id: I0888f89070a1efeb9efefd1c3ca96dfa873942fd
    Reviewed-on: http://gerrit.cloudera.org:8080/20579
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/runtime/string-value-test.cc | 17 ++++++++++-------
 be/src/runtime/string-value.cc      |  3 ++-
 be/src/runtime/string-value.h       |  7 ++++---
 be/src/util/min-max-filter-ir.cc    |  2 +-
 4 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/be/src/runtime/string-value-test.cc 
b/be/src/runtime/string-value-test.cc
index 84d21e202..a5cf10f90 100644
--- a/be/src/runtime/string-value-test.cc
+++ b/be/src/runtime/string-value-test.cc
@@ -131,20 +131,23 @@ TEST(StringValueTest, TestConvertToUInt64) {
   EXPECT_EQ(StringValue("\1\2\3\4\5\6\7\7\7").ToUInt64(), 0x102030405060707);
 }
 
-// Test finding the least smaller strings.
-TEST(StringValueTest, TestLeastSmallerString) {
+// Test finding the largest smaller strings.
+TEST(StringValueTest, TestLargestSmallerString) {
   string oneKbNullStr(1024, 0x00);
   string a1023NullStr(1023, 0x00);
-  EXPECT_EQ(StringValue(oneKbNullStr).LeastSmallerString(), a1023NullStr);
+  EXPECT_EQ(StringValue(oneKbNullStr).LargestSmallerString(), a1023NullStr);
 
   EXPECT_EQ(
-      StringValue(string("\x12\xef", 2)).LeastSmallerString(), 
string("\x12\xee"));
+      StringValue(string("\x12\xef", 2)).LargestSmallerString(), 
string("\x12\xee"));
   EXPECT_EQ(
-      StringValue(string("\x12\x00", 2)).LeastSmallerString(), string("\x12"));
+      StringValue(string("\x12\x00", 2)).LargestSmallerString(), 
string("\x12"));
 
-  // "0x00" is the smallest string.
+  // "0x00" is the smallest non-empty string.
   string oneNullStr("\00", 1);
-  EXPECT_EQ(StringValue(oneNullStr).LeastSmallerString(), "");
+  EXPECT_EQ(StringValue(oneNullStr).LargestSmallerString(), "");
+
+  // The empty string is the absolute smallest string.
+  EXPECT_EQ(StringValue("").LargestSmallerString(), "");
 }
 
 // Test finding the least larger strings.
diff --git a/be/src/runtime/string-value.cc b/be/src/runtime/string-value.cc
index b701feea9..1519d42eb 100644
--- a/be/src/runtime/string-value.cc
+++ b/be/src/runtime/string-value.cc
@@ -45,7 +45,8 @@ uint64_t StringValue::ToUInt64() const {
       | static_cast<uint64_t>(bytes[6]) << 8 | static_cast<uint64_t>(bytes[7]);
 }
 
-string StringValue::LeastSmallerString() const {
+string StringValue::LargestSmallerString() const {
+  if (len == 0) return "";
   if (len == 1 && ptr[0] == 0x00) return "";
 
   int i = len - 1;
diff --git a/be/src/runtime/string-value.h b/be/src/runtime/string-value.h
index cc20261a2..725c80dd7 100644
--- a/be/src/runtime/string-value.h
+++ b/be/src/runtime/string-value.h
@@ -125,9 +125,10 @@ struct __attribute__((__packed__)) StringValue {
   /// Returns number of characters in a char array (ignores trailing spaces)
   inline static int64_t UnpaddedCharLength(const char* cptr, int64_t len);
 
-  // Return the least smaller string 'result' such that 'result' < 'this'.
-  // The smallest string is "\x00". If no such string exists, return an empty 
string.
-  std::string LeastSmallerString() const;
+  // Return the largest smaller string 'result' such that 'result' < 'this'. 
If no such
+  // string exists, return an empty string. The smallest non-empty string is 
"\x00" and
+  // the absolute smallest string is the empty string.
+  std::string LargestSmallerString() const;
 
   // Return the least larger string 'result' such that 'this' < 'result'.
   std::string LeastLargerString() const;
diff --git a/be/src/util/min-max-filter-ir.cc b/be/src/util/min-max-filter-ir.cc
index dd1188d67..1ecdf1c17 100644
--- a/be/src/util/min-max-filter-ir.cc
+++ b/be/src/util/min-max-filter-ir.cc
@@ -136,7 +136,7 @@ void StringMinMaxFilter::InsertForLE(const void* val) {
 void StringMinMaxFilter::InsertForLT(const void* val) {
   if (LIKELY(val)) {
     std::string result =
-        reinterpret_cast<const StringValue*>(val)->LeastSmallerString();
+        reinterpret_cast<const StringValue*>(val)->LargestSmallerString();
     if (result.size() > 0) {
       UpdateMax(StringValue(result));
     } else {

Reply via email to