This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new bb464c56979 [fix](ub) undefined behavior in FixedContainer (#39191) 
(#41088)
bb464c56979 is described below

commit bb464c56979ce3390c48aa04e0342272dfea66de
Author: Jerry Hu <mrh...@gmail.com>
AuthorDate: Sun Sep 22 13:39:48 2024 +0800

    [fix](ub) undefined behavior in FixedContainer (#39191) (#41088)
---
 be/src/exprs/hybrid_set.h                          | 35 ++++++++++++++++++++++
 be/src/vec/functions/in.h                          |  2 +-
 .../data/nereids_syntax_p0/inpredicate.out         |  9 ++++++
 .../suites/nereids_syntax_p0/inpredicate.groovy    | 16 ++++++++++
 4 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h
index 5bb5b9d69c4..d1a1eb44c2a 100644
--- a/be/src/exprs/hybrid_set.h
+++ b/be/src/exprs/hybrid_set.h
@@ -17,7 +17,13 @@
 
 #pragma once
 
+#include <glog/logging.h>
+
+#include <type_traits>
+
+#include "common/exception.h"
 #include "common/object_pool.h"
+#include "common/status.h"
 #include "exprs/runtime_filter.h"
 #include "runtime/decimalv2_value.h"
 #include "runtime/define_primitive_type.h"
@@ -60,8 +66,16 @@ public:
         }
     }
 
+    void check_size() {
+        if (N != _size) {
+            throw doris::Exception(ErrorCode::INTERNAL_ERROR,
+                                   "invalid size of FixedContainer<{}>: {}", 
N, _size);
+        }
+    }
+
     // Use '|' instead of '||' has better performance by test.
     ALWAYS_INLINE bool find(const T& value) const {
+        DCHECK_EQ(N, _size);
         if constexpr (N == 0) {
             return false;
         }
@@ -144,6 +158,12 @@ private:
     size_t _size;
 };
 
+template <typename T>
+struct IsFixedContainer : std::false_type {};
+
+template <typename T, size_t N>
+struct IsFixedContainer<FixedContainer<T, N>> : std::true_type {};
+
 /**
  * Dynamic Container uses phmap::flat_hash_set.
  * @tparam T Element Type
@@ -351,6 +371,11 @@ public:
         if constexpr (is_nullable) {
             null_map_data = null_map->data();
         }
+
+        if constexpr (IsFixedContainer<ContainerType>::value) {
+            _set.check_size();
+        }
+
         auto* __restrict result_data = results.data();
         for (size_t i = 0; i < rows; ++i) {
             if constexpr (!is_nullable && !is_negative) {
@@ -466,6 +491,11 @@ public:
         if constexpr (is_nullable) {
             null_map_data = null_map->data();
         }
+
+        if constexpr (IsFixedContainer<ContainerType>::value) {
+            _set.check_size();
+        }
+
         auto* __restrict result_data = results.data();
         for (size_t i = 0; i < rows; ++i) {
             const auto& string_data = col.get_data_at(i).to_string();
@@ -596,6 +626,11 @@ public:
         if constexpr (is_nullable) {
             null_map_data = null_map->data();
         }
+
+        if constexpr (IsFixedContainer<ContainerType>::value) {
+            _set.check_size();
+        }
+
         auto* __restrict result_data = results.data();
         for (size_t i = 0; i < rows; ++i) {
             uint32_t len = offset[i] - offset[i - 1];
diff --git a/be/src/vec/functions/in.h b/be/src/vec/functions/in.h
index de6e72d0747..aa7e1b03085 100644
--- a/be/src/vec/functions/in.h
+++ b/be/src/vec/functions/in.h
@@ -120,7 +120,7 @@ public:
                    context->get_arg_type(0)->type == 
PrimitiveType::TYPE_VARCHAR ||
                    context->get_arg_type(0)->type == 
PrimitiveType::TYPE_STRING) {
             // the StringValue's memory is held by FunctionContext, so we can 
use StringValueSet here directly
-            
state->hybrid_set.reset(create_string_value_set((size_t)(context->get_num_args()
 - 1)));
+            
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context)));
         } else {
             state->hybrid_set.reset(
                     create_set(context->get_arg_type(0)->type, 
get_size_with_out_null(context)));
diff --git a/regression-test/data/nereids_syntax_p0/inpredicate.out 
b/regression-test/data/nereids_syntax_p0/inpredicate.out
index cee03178b5c..ac6219c69ce 100644
--- a/regression-test/data/nereids_syntax_p0/inpredicate.out
+++ b/regression-test/data/nereids_syntax_p0/inpredicate.out
@@ -31,3 +31,12 @@
 29     Supplier#000000029      VVSymB3fbwaN    ARGENTINA4      ARGENTINA       
AMERICA 11-773-203-7342
 9      Supplier#000000009      ,gJ6K2MKveYxQT  IRAN     6      IRAN    MIDDLE 
EAST     20-338-906-3675
 
+-- !in_predicate_11 --
+15     Supplier#000000015      DF35PepL5saAK   INDIA    0      INDIA   ASIA    
18-687-542-7601
+
+-- !in_predicate_12 --
+
+-- !in_predicate_13 --
+
+-- !in_predicate_14 --
+
diff --git a/regression-test/suites/nereids_syntax_p0/inpredicate.groovy 
b/regression-test/suites/nereids_syntax_p0/inpredicate.groovy
index 3cdf096519c..bf4ec9787f9 100644
--- a/regression-test/suites/nereids_syntax_p0/inpredicate.groovy
+++ b/regression-test/suites/nereids_syntax_p0/inpredicate.groovy
@@ -61,5 +61,21 @@ suite("inpredicate") {
     order_qt_in_predicate_10 """
         SELECT * FROM supplier WHERE s_suppkey not in (15);
     """
+
+    order_qt_in_predicate_11 """
+        SELECT * FROM supplier WHERE s_suppkey in (15, null);
+    """
+
+    order_qt_in_predicate_12 """
+        SELECT * FROM supplier WHERE s_suppkey not in (15, null);
+    """
+
+    order_qt_in_predicate_13 """
+        SELECT * FROM supplier WHERE s_nation in ('PERU', 'ETHIOPIA', null);
+    """
+
+    order_qt_in_predicate_14 """
+        SELECT * FROM supplier WHERE s_nation not in ('PERU', 'ETHIOPIA', 
null);
+    """
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to