This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 64efb7695 IMPALA-11779: Fix crash in TopNNode due to slots in null type
64efb7695 is described below

commit 64efb7695c57281a55f1b2570d6467a72260281a
Author: stiga-huang <[email protected]>
AuthorDate: Sat Dec 10 17:10:38 2022 +0800

    IMPALA-11779: Fix crash in TopNNode due to slots in null type
    
    BE can't codegen or evaluate exprs in NULL type. So when FE transfers
    exprs to BE (via thrift), it will convert exprs in NULL type into
    NullLiteral with Boolean type, e.g. see code in Expr#treeToThrift().
    The type doesn't matter since ScalarExprEvaluator::GetValue() in BE
    returns nullptr for null values of all types, and nullptr is treated as
    null value.
    
    Most of the exprs in BE are generated from thrift TExprs transferred
    from FE, which guarantees they are not NULL type exprs. However, in
    TopNPlanNode::Init(), we create SlotRefs directly based on the sort
    tuple descriptor. If there are NULL type slots in the tuple descriptor,
    we get SlotRefs in NULL type, which will crash codegen or evaluation (if
    codegen is disabled) on them.
    
    This patch adds a type-safe create method for SlotRef which uses
    TYPE_BOOLEAN for TYPE_NULL. BE codes that create SlotRef directly from
    SlotDescriptors are replaced by calling this create method, which
    guarantees no TYPE_NULL exprs are used in the corresponding evaluators.
    
    Tests:
     - Added new tests in partitioned-top-n.test
     - Ran exhaustive tests
    
    Change-Id: I6aaf80c5129eaf788c70c8f041021eaf73087f94
    Reviewed-on: http://gerrit.cloudera.org:8080/19336
    Reviewed-by: Zoltan Borok-Nagy <[email protected]>
    Tested-by: Quanlong Huang <[email protected]>
---
 be/src/exec/grouping-aggregator.cc                 |  6 ++--
 be/src/exec/topn-node.cc                           |  3 +-
 be/src/exprs/slot-ref.cc                           | 10 ++++++
 be/src/exprs/slot-ref.h                            |  4 +++
 .../queries/QueryTest/partitioned-top-n.test       | 40 ++++++++++++++++++++++
 5 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/be/src/exec/grouping-aggregator.cc 
b/be/src/exec/grouping-aggregator.cc
index a1a794a5b..75425d599 100644
--- a/be/src/exec/grouping-aggregator.cc
+++ b/be/src/exec/grouping-aggregator.cc
@@ -103,10 +103,8 @@ Status GroupingAggregatorConfig::Init(
   for (int i = 0; i < grouping_exprs_.size(); ++i) {
     SlotDescriptor* desc = intermediate_tuple_desc_->slots()[i];
     DCHECK(desc->type().type == TYPE_NULL || desc->type() == 
grouping_exprs_[i]->type());
-    // Hack to avoid TYPE_NULL SlotRefs.
-    SlotRef* build_expr = state->obj_pool()->Add(desc->type().type != 
TYPE_NULL ?
-            new SlotRef(desc) :
-            new SlotRef(desc, ColumnType(TYPE_BOOLEAN)));
+    // Use SlotRef::TypeSafeCreate() to avoid TYPE_NULL SlotRefs.
+    SlotRef* build_expr = 
state->obj_pool()->Add(SlotRef::TypeSafeCreate(desc));
     build_exprs_.push_back(build_expr);
     // Not an entry point because all hash table callers support codegen.
     RETURN_IF_ERROR(
diff --git a/be/src/exec/topn-node.cc b/be/src/exec/topn-node.cc
index 72ceb885f..620b9e229 100644
--- a/be/src/exec/topn-node.cc
+++ b/be/src/exec/topn-node.cc
@@ -95,8 +95,7 @@ Status TopNPlanNode::Init(const TPlanNode& tnode, 
FragmentState* state) {
 
     // Construct SlotRefs that simply copy the output tuple to itself.
     for (const SlotDescriptor* slot_desc : output_tuple_desc_->slots()) {
-      SlotRef* slot_ref =
-          state->obj_pool()->Add(new SlotRef(slot_desc, slot_desc->type()));
+      SlotRef* slot_ref = 
state->obj_pool()->Add(SlotRef::TypeSafeCreate(slot_desc));
       noop_tuple_exprs_.push_back(slot_ref);
       RETURN_IF_ERROR(slot_ref->Init(*row_descriptor_, true, state));
     }
diff --git a/be/src/exprs/slot-ref.cc b/be/src/exprs/slot-ref.cc
index 32d4e254c..d883f68db 100644
--- a/be/src/exprs/slot-ref.cc
+++ b/be/src/exprs/slot-ref.cc
@@ -72,6 +72,16 @@ SlotRef::SlotRef(const ColumnType& type, int offset, const 
bool nullable /* = fa
     null_indicator_offset_(0, nullable ? offset : -1),
     slot_id_(-1) {}
 
+SlotRef* SlotRef::TypeSafeCreate(const SlotDescriptor* desc) {
+  if (desc->type().type == TYPE_NULL) {
+    // ScalarExprEvaluator requires a non-null type for the expr. It returns 
nullptr for
+    // null values of all types. So replacing TYPE_NULL to an arbitrary type 
is ok.
+    // Here we use TYPE_BOOLEAN for consistency with other places.
+    return new SlotRef(desc, ColumnType(TYPE_BOOLEAN));
+  }
+  return new SlotRef(desc);
+}
+
 Status SlotRef::Init(
     const RowDescriptor& row_desc, bool is_entry_point, FragmentState* state) {
   DCHECK(type_.IsStructType() || children_.size() == 0);
diff --git a/be/src/exprs/slot-ref.h b/be/src/exprs/slot-ref.h
index dfb299825..c0b1bf31a 100644
--- a/be/src/exprs/slot-ref.h
+++ b/be/src/exprs/slot-ref.h
@@ -45,6 +45,10 @@ class SlotRef : public ScalarExpr {
   /// does not generate the appropriate exprs).
   SlotRef(const SlotDescriptor* desc, const ColumnType& type);
 
+  /// Create a SlotRef based on the given SlotDescriptor 'desc' and make sure 
the type is
+  /// not TYPE_NULL (if so, replaced it with TYPE_BOOLEAN).
+  static SlotRef* TypeSafeCreate(const SlotDescriptor* desc);
+
   /// Used for testing.  GetValue will return tuple + offset interpreted as 
'type'
   SlotRef(const ColumnType& type, int offset, const bool nullable = false);
 
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test 
b/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test
index 2c29b7389..7cfff846f 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test
@@ -80,3 +80,43 @@ NULL,0,1
 ---- TYPES
 TINYINT, INT, BIGINT
 ====
+---- QUERY
+# IMPALA-11779: test null slots in the sort tuple
+with v1 as (
+  select '0' as a1, '' as b1 from alltypestiny
+), v2 as (
+  select '' as a2, null as b2
+), v3 as (
+  select b1 as b
+  from v1 left join v2 on a1 = a2
+)
+select 1 from (
+  select row_number() over (partition by b order by b) rnk
+  from v3
+) v
+where rnk = 1
+---- RESULTS
+1
+---- TYPES
+TINYINT
+====
+---- QUERY
+# IMPALA-11779: test null slots in the sort tuple
+with v1 as (
+  select '0' as a1, '' as b1 from alltypes
+), v2 as (
+  select '' as a2, null as b2
+), v3 as (
+  select b1 as b
+  from v1 left join v2 on a1 = a2
+)
+select count(*) from (
+  select row_number() over (partition by b order by b) rnk
+  from v3
+) v
+where rnk < 10
+---- RESULTS
+9
+---- TYPES
+BIGINT
+====

Reply via email to