This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch branch-3.4.2
in repository https://gitbox.apache.org/repos/asf/impala.git

commit d7209700b984a35327e0c3ab75fb5aa351f722fc
Author: Csaba Ringhofer <[email protected]>
AuthorDate: Mon Mar 30 20:15:09 2020 +0200

    IMPALA-9572: Fix DCHECK in nested Parquet scanning
    
    The issue occurred when there were skipped pages and a column
    inside a collection was scanned, but its position was not needed.
    The repetition level still needs to be read in this case, as the
    skipped ranges are set in top level rows, so collection items
    need to know which top level row do they belong to.
    
    A DCHECK in StrideWriter's constructor was hit, otherwise the
    code ran correctly in release mode. The DCHECK is moved to
    functions where the condition would actually cause problems.
    
    Testing:
    - added and ran a regression test
    
    Change-Id: I5e8ef514ead71f732c73f910af7fd1aecd37bb81
    Reviewed-on: http://gerrit.cloudera.org:8080/15598
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/exec/parquet/parquet-column-readers.cc                    | 2 +-
 be/src/util/mem-util.h                                           | 9 ++++++++-
 .../queries/QueryTest/nested-types-parquet-page-index.test       | 9 +++++++++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/be/src/exec/parquet/parquet-column-readers.cc 
b/be/src/exec/parquet/parquet-column-readers.cc
index 0a4a753d3..f5b46bc8b 100644
--- a/be/src/exec/parquet/parquet-column-readers.cc
+++ b/be/src/exec/parquet/parquet-column-readers.cc
@@ -1320,7 +1320,7 @@ int 
BaseScalarColumnReader::FillPositionsInCandidateRange(int rows_remaining,
     int rep_level = rep_levels_.CacheGetNext();
     if (rep_level == 0) ++row_count;
     ++val_count;
-    if (pos_slot_desc_ != nullptr) {
+    if (pos_writer.IsValid()) {
       if (rep_level <= max_rep_level() - 1) pos_current_value_ = 0;
       *pos_writer.Advance() = pos_current_value_++;
     }
diff --git a/be/src/util/mem-util.h b/be/src/util/mem-util.h
index a7125a984..16e808574 100644
--- a/be/src/util/mem-util.h
+++ b/be/src/util/mem-util.h
@@ -31,17 +31,21 @@ namespace impala {
 template <typename T>
 struct StrideWriter {
   // The next element to write to. May not be aligned to the natural alignment 
of T.
+  // Can be null for convenience, but the StrideWriter cannot be used in that 
case.
   T* current;
 
   // The stride in bytes between subsequent values.
   int64_t stride;
 
   explicit StrideWriter(T* current, int64_t stride) : current(current), 
stride(stride) {
-    DCHECK(current != nullptr);
   }
 
+  /// No other functions can be called if false.
+  ALWAYS_INLINE bool IsValid() const { return current != nullptr; }
+
   /// Set the next element to 'val' and advance 'current' to the next element.
   ALWAYS_INLINE void SetNext(T& val) {
+    DCHECK(IsValid());
     // memcpy() is necessary because 'current' may not be aligned.
     memcpy(current, &val, sizeof(T));
     SkipNext(1);
@@ -49,6 +53,7 @@ struct StrideWriter {
 
   /// Return a pointer to the current element and advance 'current' to the 
next element.
   ALWAYS_INLINE T* Advance() {
+    DCHECK(IsValid());
     T* curr = current;
     SkipNext(1);
     return curr;
@@ -56,11 +61,13 @@ struct StrideWriter {
 
   /// Set the next 'count' elements to 'val', advancing 'current' by 'count' 
values.
   void SetNext(T& val, int64_t count) {
+    DCHECK(IsValid());
     for (int64_t i = 0; i < count; ++i) SetNext(val);
   }
 
   /// Advance 'current' by 'count' values.
   ALWAYS_INLINE void SkipNext(int64_t count) {
+    DCHECK(IsValid());
     DCHECK_GE(count, 0);
     current = reinterpret_cast<T*>(reinterpret_cast<uint8_t*>(current) + 
stride * count);
   }
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-page-index.test
 
b/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-page-index.test
index 8c85b575d..8a372f782 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-page-index.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-page-index.test
@@ -702,3 +702,12 @@ BIGINT, DECIMAL
 ---- RUNTIME_PROFILE
 aggregation(SUM, NumStatsFilteredPages): 6
 ====
+---- QUERY
+# Regression test for IMPALA-9572.
+select count(l_partkey) from tpch_nested_parquet.customer.c_orders.o_lineitems
+where l_partkey < 10
+---- RESULTS
+263
+---- TYPES
+BIGINT
+====

Reply via email to