This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new e9e1e2894bf [performance](variant) support topn 2phase read for 
variant column (#28318)
e9e1e2894bf is described below

commit e9e1e2894bfa729613d4e3c48a543dbb54b72452
Author: lihangyu <15605149...@163.com>
AuthorDate: Mon Dec 25 11:50:41 2023 +0800

    [performance](variant) support topn 2phase read for variant column (#28318)
    
     [performance](variant) support topn 2phase read for variant column
---
 .../rowset/segment_v2/hierarchical_data_reader.h   |  2 +-
 be/src/olap/rowset/segment_v2/segment.cpp          | 67 +++++++++++++--
 be/src/olap/rowset/segment_v2/segment.h            | 14 +++-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  3 +-
 be/src/olap/rowset/segment_v2/segment_iterator.h   |  4 +-
 be/src/olap/tablet_schema.cpp                      | 14 ++++
 be/src/olap/tablet_schema.h                        |  6 ++
 be/src/runtime/descriptors.cpp                     |  4 +
 be/src/service/internal_service.cpp                | 94 ++++++++++++++--------
 be/src/vec/columns/column_object.cpp               |  9 ++-
 be/src/vec/exec/scan/new_olap_scanner.cpp          | 30 ++-----
 be/src/vec/exec/scan/new_olap_scanner.h            |  1 -
 be/src/vec/json/path_in_data.cpp                   | 11 +++
 be/src/vec/json/path_in_data.h                     |  1 +
 .../java/org/apache/doris/analysis/SelectStmt.java |  3 +-
 gensrc/proto/descriptors.proto                     |  1 +
 regression-test/data/variant_p0/load.out           | 22 ++---
 regression-test/data/variant_p0/sql/gh_data.out    | 68 +++++++++++-----
 regression-test/suites/variant_p0/load.groovy      |  8 +-
 regression-test/suites/variant_p0/sql/gh_data.sql  |  9 ++-
 20 files changed, 261 insertions(+), 110 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h 
b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
index 58da2a43435..99f926f44cf 100644
--- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
+++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
@@ -229,7 +229,7 @@ public:
 private:
     Status extract_to(vectorized::MutableColumnPtr& dst, size_t nrows);
 
-    const TabletColumn& _col;
+    TabletColumn _col;
     // may shared among different column iterators
     std::unique_ptr<StreamReader> _root_reader;
 };
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index e3d1ce0c599..de2593447c5 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -61,10 +61,12 @@
 #include "util/slice.h" // Slice
 #include "vec/columns/column.h"
 #include "vec/common/string_ref.h"
+#include "vec/core/field.h"
 #include "vec/data_types/data_type.h"
 #include "vec/data_types/data_type_factory.hpp"
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_object.h"
+#include "vec/json/path_in_data.h"
 #include "vec/olap/vgeneric_iterators.h"
 
 namespace doris {
@@ -332,17 +334,18 @@ Status Segment::_load_index_impl() {
 
 // Return the storage datatype of related column to field.
 // Return nullptr meaning no such storage infomation for this column
-vectorized::DataTypePtr Segment::get_data_type_of(const Field& field, bool 
ignore_children) const {
+vectorized::DataTypePtr Segment::get_data_type_of(vectorized::PathInData path, 
bool is_nullable,
+                                                  bool ignore_children) const {
     // Path has higher priority
-    if (!field.path().empty()) {
-        auto node = _sub_column_tree.find_leaf(field.path());
+    if (!path.empty()) {
+        auto node = _sub_column_tree.find_leaf(path);
         if (node) {
             if (ignore_children || node->children.empty()) {
                 return node->data.file_column_type;
             }
         }
         // it contains children or column missing in storage, so treat it as 
variant
-        return field.is_nullable()
+        return is_nullable
                        ? 
vectorized::make_nullable(std::make_shared<vectorized::DataTypeObject>())
                        : std::make_shared<vectorized::DataTypeObject>();
     }
@@ -686,7 +689,8 @@ Status Segment::read_key_by_rowid(uint32_t row_id, 
std::string* key) {
 
 bool Segment::same_with_storage_type(int32_t cid, const Schema& schema,
                                      bool ignore_children) const {
-    auto file_column_type = get_data_type_of(*schema.column(cid), 
ignore_children);
+    auto file_column_type = get_data_type_of(schema.column(cid)->path(),
+                                             
schema.column(cid)->is_nullable(), ignore_children);
     auto expected_type = Schema::get_data_type_ptr(*schema.column(cid));
 #ifndef NDEBUG
     if (file_column_type && !file_column_type->equals(*expected_type)) {
@@ -700,5 +704,58 @@ bool Segment::same_with_storage_type(int32_t cid, const 
Schema& schema,
     return same;
 }
 
+Status Segment::seek_and_read_by_rowid(const TabletSchema& schema, 
SlotDescriptor* slot,
+                                       uint32_t row_id, 
vectorized::MutableColumnPtr& result,
+                                       OlapReaderStatistics& stats,
+                                       std::unique_ptr<ColumnIterator>& 
iterator_hint) {
+    StorageReadOptions storage_read_opt;
+    storage_read_opt.io_ctx.reader_type = ReaderType::READER_QUERY;
+    segment_v2::ColumnIteratorOptions opt {
+            .use_page_cache = !config::disable_storage_page_cache,
+            .file_reader = file_reader().get(),
+            .stats = &stats,
+            .io_ctx = io::IOContext {.reader_type = ReaderType::READER_QUERY},
+    };
+    std::vector<segment_v2::rowid_t> single_row_loc {row_id};
+    if (!slot->column_paths().empty()) {
+        vectorized::PathInData 
path(schema.column_by_uid(slot->col_unique_id()).name_lower_case(),
+                                    slot->column_paths());
+        auto storage_type = get_data_type_of(path, slot->is_nullable(), false);
+        vectorized::MutableColumnPtr file_storage_column = 
storage_type->create_column();
+        DCHECK(storage_type != nullptr);
+        TabletColumn column = TabletColumn::create_materialized_variant_column(
+                schema.column_by_uid(slot->col_unique_id()).name_lower_case(), 
slot->column_paths(),
+                slot->col_unique_id());
+        if (iterator_hint == nullptr) {
+            RETURN_IF_ERROR(new_column_iterator(column, &iterator_hint, 
&storage_read_opt));
+            RETURN_IF_ERROR(iterator_hint->init(opt));
+        }
+        RETURN_IF_ERROR(
+                iterator_hint->read_by_rowids(single_row_loc.data(), 1, 
file_storage_column));
+        // iterator_hint.reset(nullptr);
+        // Get it's inner field, for JSONB case
+        vectorized::Field field = remove_nullable(storage_type)->get_default();
+        file_storage_column->get(0, field);
+        result->insert(field);
+    } else {
+        int index = (slot->col_unique_id() >= 0) ? 
schema.field_index(slot->col_unique_id())
+                                                 : 
schema.field_index(slot->col_name());
+        if (index < 0) {
+            std::stringstream ss;
+            ss << "field name is invalid. field=" << slot->col_name()
+               << ", field_name_to_index=" << schema.get_all_field_names();
+            return Status::InternalError(ss.str());
+        }
+        storage_read_opt.io_ctx.reader_type = ReaderType::READER_QUERY;
+        if (iterator_hint == nullptr) {
+            RETURN_IF_ERROR(
+                    new_column_iterator(schema.column(index), &iterator_hint, 
&storage_read_opt));
+            RETURN_IF_ERROR(iterator_hint->init(opt));
+        }
+        RETURN_IF_ERROR(iterator_hint->read_by_rowids(single_row_loc.data(), 
1, result));
+    }
+    return Status::OK();
+}
+
 } // namespace segment_v2
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment.h 
b/be/src/olap/rowset/segment_v2/segment.h
index 42b7fc83ac0..47ccab71be6 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -39,11 +39,14 @@
 #include "olap/rowset/segment_v2/page_handle.h"
 #include "olap/schema.h"
 #include "olap/tablet_schema.h"
+#include "runtime/descriptors.h"
 #include "util/once.h"
 #include "util/slice.h"
+#include "vec/columns/column.h"
 #include "vec/columns/subcolumn_tree.h"
 #include "vec/data_types/data_type.h"
 #include "vec/data_types/data_type_nullable.h"
+#include "vec/json/path_in_data.h"
 
 namespace doris {
 namespace vectorized {
@@ -123,6 +126,10 @@ public:
 
     Status read_key_by_rowid(uint32_t row_id, std::string* key);
 
+    Status seek_and_read_by_rowid(const TabletSchema& schema, SlotDescriptor* 
slot, uint32_t row_id,
+                                  vectorized::MutableColumnPtr& result, 
OlapReaderStatistics& stats,
+                                  std::unique_ptr<ColumnIterator>& 
iterator_hint);
+
     Status load_index();
 
     Status load_pk_index_and_bf();
@@ -146,7 +153,8 @@ public:
     // ignore_chidren set to false will treat field as variant
     // when it contains children with field paths.
     // nullptr will returned if storage type does not contains such column
-    std::shared_ptr<const vectorized::IDataType> get_data_type_of(const Field& 
filed,
+    std::shared_ptr<const vectorized::IDataType> 
get_data_type_of(vectorized::PathInData path,
+                                                                  bool 
is_nullable,
                                                                   bool 
ignore_children) const;
 
     // Check is schema read type equals storage column type
@@ -157,8 +165,8 @@ public:
     bool can_apply_predicate_safely(int cid, Predicate* pred, const Schema& 
schema,
                                     ReaderType read_type) const {
         const Field* col = schema.column(cid);
-        vectorized::DataTypePtr storage_column_type =
-                get_data_type_of(*col, read_type != ReaderType::READER_QUERY);
+        vectorized::DataTypePtr storage_column_type = get_data_type_of(
+                col->path(), col->is_nullable(), read_type != 
ReaderType::READER_QUERY);
         if (storage_column_type == nullptr) {
             // Default column iterator
             return true;
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 483e0e502df..f85ed5abce5 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -342,7 +342,8 @@ Status SegmentIterator::_init_impl(const 
StorageReadOptions& opts) {
         const Field* col = _schema->column(i);
         if (col) {
             auto storage_type = _segment->get_data_type_of(
-                    *col, _opts.io_ctx.reader_type != 
ReaderType::READER_QUERY);
+                    col->path(), col->is_nullable(),
+                    _opts.io_ctx.reader_type != ReaderType::READER_QUERY);
             if (storage_type == nullptr) {
                 storage_type = 
vectorized::DataTypeFactory::instance().create_data_type(*col);
             }
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 3fca034a18f..c953e0b9bcc 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -251,8 +251,8 @@ private:
             if (block_cid >= block->columns()) {
                 continue;
             }
-            vectorized::DataTypePtr storage_type =
-                    _segment->get_data_type_of(*_schema->column(cid), false);
+            vectorized::DataTypePtr storage_type = _segment->get_data_type_of(
+                    _schema->column(cid)->path(), 
_schema->column(cid)->is_nullable(), false);
             if (storage_type && 
!storage_type->equals(*block->get_by_position(block_cid).type)) {
                 // Do additional cast
                 vectorized::MutableColumnPtr tmp = 
storage_type->create_column();
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 63682d8c712..78e7e938caa 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -554,6 +554,20 @@ void TabletColumn::init_from_pb(const ColumnPB& column) {
     }
 }
 
+TabletColumn TabletColumn::create_materialized_variant_column(const 
std::string& root,
+                                                              const 
std::vector<std::string>& paths,
+                                                              int32_t 
parent_unique_id) {
+    TabletColumn subcol;
+    subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+    subcol.set_is_nullable(true);
+    subcol.set_unique_id(-1);
+    subcol.set_parent_unique_id(parent_unique_id);
+    vectorized::PathInData path(root, paths);
+    subcol.set_path_info(path);
+    subcol.set_name(path.get_path());
+    return subcol;
+}
+
 void TabletColumn::to_schema_pb(ColumnPB* column) const {
     column->set_unique_id(_unique_id);
     column->set_name(_col_name);
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index 3f565680800..21970b5cbac 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -36,6 +36,7 @@
 #include "gutil/stringprintf.h"
 #include "olap/olap_common.h"
 #include "runtime/define_primitive_type.h"
+#include "runtime/descriptors.h"
 #include "util/string_util.h"
 #include "vec/aggregate_functions/aggregate_function.h"
 #include "vec/common/string_utils/string_utils.h"
@@ -91,6 +92,11 @@ public:
                _type == FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE ||
                _type == FieldType::OLAP_FIELD_TYPE_AGG_STATE;
     }
+    // Such columns are not exist in frontend schema info, so we need to
+    // add them into tablet_schema for later column indexing.
+    static TabletColumn create_materialized_variant_column(const std::string& 
root,
+                                                           const 
std::vector<std::string>& paths,
+                                                           int32_t 
parent_unique_id);
     bool has_default_value() const { return _has_default_value; }
     std::string default_value() const { return _default_value; }
     size_t length() const { return _length; }
diff --git a/be/src/runtime/descriptors.cpp b/be/src/runtime/descriptors.cpp
index 721950abbee..09113f85eea 100644
--- a/be/src/runtime/descriptors.cpp
+++ b/be/src/runtime/descriptors.cpp
@@ -85,6 +85,7 @@ SlotDescriptor::SlotDescriptor(const PSlotDescriptor& pdesc)
           _is_materialized(pdesc.is_materialized()),
           _is_key(pdesc.is_key()),
           _need_materialize(true),
+          _column_paths(pdesc.column_paths().begin(), 
pdesc.column_paths().end()),
           _is_auto_increment(pdesc.is_auto_increment()) {}
 
 void SlotDescriptor::to_protobuf(PSlotDescriptor* pslot) const {
@@ -103,6 +104,9 @@ void SlotDescriptor::to_protobuf(PSlotDescriptor* pslot) 
const {
     pslot->set_is_key(_is_key);
     pslot->set_is_auto_increment(_is_auto_increment);
     pslot->set_col_type(_col_type);
+    for (const std::string& path : _column_paths) {
+        pslot->add_column_paths(path);
+    }
 }
 
 vectorized::MutableColumnPtr SlotDescriptor::get_empty_mutable_column() const {
diff --git a/be/src/service/internal_service.cpp 
b/be/src/service/internal_service.cpp
index 8d352ba1d53..bca48737bf5 100644
--- a/be/src/service/internal_service.cpp
+++ b/be/src/service/internal_service.cpp
@@ -25,6 +25,7 @@
 #include <butil/errno.h>
 #include <butil/iobuf.h>
 #include <fcntl.h>
+#include <fmt/core.h>
 #include <gen_cpp/MasterService_types.h>
 #include <gen_cpp/PaloInternalService_types.h>
 #include <gen_cpp/PlanNodes_types.h>
@@ -46,6 +47,7 @@
 #include <set>
 #include <sstream>
 #include <string>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -73,6 +75,7 @@
 #include "olap/rowset/segment_v2/common.h"
 #include "olap/rowset/segment_v2/inverted_index_desc.h"
 #include "olap/rowset/segment_v2/segment.h"
+#include "olap/rowset/segment_v2/segment_iterator.h"
 #include "olap/segment_loader.h"
 #include "olap/storage_engine.h"
 #include "olap/tablet.h"
@@ -1711,6 +1714,38 @@ auto scope_timer_run(Func fn, int64_t* cost) -> 
decltype(fn()) {
     return res;
 }
 
+struct IteratorKey {
+    int64_t tablet_id;
+    RowsetId rowset_id;
+    uint64_t segment_id;
+    int slot_id;
+
+    // unordered map std::equal_to
+    bool operator==(const IteratorKey& rhs) const {
+        return tablet_id == rhs.tablet_id && rowset_id == rhs.rowset_id &&
+               segment_id == rhs.segment_id && slot_id == rhs.slot_id;
+    }
+};
+
+struct HashOfIteratorKey {
+    size_t operator()(const IteratorKey& key) const {
+        size_t seed = 0;
+        seed = HashUtil::hash64(&key.tablet_id, sizeof(key.tablet_id), seed);
+        seed = HashUtil::hash64(&key.rowset_id.hi, sizeof(key.rowset_id.hi), 
seed);
+        seed = HashUtil::hash64(&key.rowset_id.mi, sizeof(key.rowset_id.mi), 
seed);
+        seed = HashUtil::hash64(&key.rowset_id.lo, sizeof(key.rowset_id.lo), 
seed);
+        seed = HashUtil::hash64(&key.segment_id, sizeof(key.segment_id), seed);
+        seed = HashUtil::hash64(&key.slot_id, sizeof(key.slot_id), seed);
+        return seed;
+    }
+};
+
+struct IteratorItem {
+    std::unique_ptr<ColumnIterator> iterator;
+    // for holding the reference of segment to avoid use after release
+    SegmentSharedPtr segment;
+};
+
 Status PInternalServiceImpl::_multi_get(const PMultiGetRequest& request,
                                         PMultiGetResponse* response) {
     OlapReaderStatistics stats;
@@ -1735,6 +1770,7 @@ Status PInternalServiceImpl::_multi_get(const 
PMultiGetRequest& request,
         full_read_schema.append_column(TabletColumn(column_pb));
     }
 
+    std::unordered_map<IteratorKey, IteratorItem, HashOfIteratorKey> 
iterator_map;
     // read row by row
     for (size_t i = 0; i < request.row_locs_size(); ++i) {
         const auto& row_loc = request.row_locs(i);
@@ -1773,8 +1809,8 @@ Status PInternalServiceImpl::_multi_get(const 
PMultiGetRequest& request,
                 },
                 &acquire_segments_ms));
         // find segment
-        auto it = std::find_if(segment_cache.get_segments().begin(),
-                               segment_cache.get_segments().end(),
+        auto it = std::find_if(segment_cache.get_segments().cbegin(),
+                               segment_cache.get_segments().cend(),
                                [&row_loc](const segment_v2::SegmentSharedPtr& 
seg) {
                                    return seg->id() == row_loc.segment_id();
                                });
@@ -1802,37 +1838,28 @@ Status PInternalServiceImpl::_multi_get(const 
PMultiGetRequest& request,
         if (result_block.is_empty_column()) {
             result_block = vectorized::Block(desc.slots(), 
request.row_locs().size());
         }
+        VLOG_DEBUG << "Read row location "
+                   << fmt::format("{}, {}, {}, {}", row_location.tablet_id,
+                                  
row_location.row_location.rowset_id.to_string(),
+                                  row_location.row_location.segment_id,
+                                  row_location.row_location.row_id);
         for (int x = 0; x < desc.slots().size(); ++x) {
-            int index = -1;
-            if (desc.slots()[x]->col_unique_id() >= 0) {
-                // light sc enabled
-                index = 
full_read_schema.field_index(desc.slots()[x]->col_unique_id());
-            } else {
-                index = 
full_read_schema.field_index(desc.slots()[x]->col_name());
-            }
-            if (index < 0) {
-                std::stringstream ss;
-                ss << "field name is invalid. field=" << 
desc.slots()[x]->col_name()
-                   << ", field_name_to_index=" << 
full_read_schema.get_all_field_names();
-                return Status::InternalError(ss.str());
-            }
-            std::unique_ptr<segment_v2::ColumnIterator> column_iterator;
+            auto row_id = 
static_cast<segment_v2::rowid_t>(row_loc.ordinal_id());
             vectorized::MutableColumnPtr column =
                     result_block.get_by_position(x).column->assume_mutable();
-            StorageReadOptions storage_read_opt;
-            storage_read_opt.io_ctx.reader_type = ReaderType::READER_QUERY;
-            
RETURN_IF_ERROR(segment->new_column_iterator(full_read_schema.column(index),
-                                                         &column_iterator, 
&storage_read_opt));
-            segment_v2::ColumnIteratorOptions opt {
-                    .use_page_cache = !config::disable_storage_page_cache,
-                    .file_reader = segment->file_reader().get(),
-                    .stats = &stats,
-                    .io_ctx = io::IOContext {.reader_type = 
ReaderType::READER_QUERY},
-            };
-            static_cast<void>(column_iterator->init(opt));
-            std::vector<segment_v2::rowid_t> single_row_loc {
-                    static_cast<segment_v2::rowid_t>(row_loc.ordinal_id())};
-            
RETURN_IF_ERROR(column_iterator->read_by_rowids(single_row_loc.data(), 1, 
column));
+            IteratorKey iterator_key {.tablet_id = tablet->tablet_id(),
+                                      .rowset_id = rowset_id,
+                                      .segment_id = row_loc.segment_id(),
+                                      .slot_id = desc.slots()[x]->id()};
+            IteratorItem& iterator_item = iterator_map[iterator_key];
+            if (iterator_item.segment == nullptr) {
+                // hold the reference
+                iterator_map[iterator_key].segment = segment;
+            }
+            segment = iterator_item.segment;
+            RETURN_IF_ERROR(segment->seek_and_read_by_rowid(full_read_schema, 
desc.slots()[x],
+                                                            row_id, column, 
stats,
+                                                            
iterator_item.iterator));
         }
     }
     // serialize block if not empty
@@ -1852,11 +1879,13 @@ Status PInternalServiceImpl::_multi_get(const 
PMultiGetRequest& request,
                          "hit_cached_pages:{}, total_pages_read:{}, 
compressed_bytes_read:{}, "
                          "io_latency:{}ns, "
                          "uncompressed_bytes_read:{},"
+                         "bytes_read:{},"
                          "acquire_tablet_ms:{}, acquire_rowsets_ms:{}, 
acquire_segments_ms:{}, "
                          "lookup_row_data_ms:{}",
                          stats.cached_pages_num, stats.total_pages_num, 
stats.compressed_bytes_read,
-                         stats.io_ns, stats.uncompressed_bytes_read, 
acquire_tablet_ms,
-                         acquire_rowsets_ms, acquire_segments_ms, 
lookup_row_data_ms);
+                         stats.io_ns, stats.uncompressed_bytes_read, 
stats.bytes_read,
+                         acquire_tablet_ms, acquire_rowsets_ms, 
acquire_segments_ms,
+                         lookup_row_data_ms);
     return Status::OK();
 }
 
@@ -1865,6 +1894,7 @@ void 
PInternalServiceImpl::multiget_data(google::protobuf::RpcController* contro
                                          PMultiGetResponse* response,
                                          google::protobuf::Closure* done) {
     bool ret = _light_work_pool.try_offer([request, response, done, this]() {
+        signal::set_signal_task_id(request->query_id());
         // multi get data by rowid
         MonotonicStopWatch watch;
         watch.start();
diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index f44fc62ed98..782a2967f03 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -645,11 +645,12 @@ void ColumnObject::for_each_subcolumn(ColumnCallback 
callback) {
 }
 
 void ColumnObject::insert_from(const IColumn& src, size_t n) {
-    const auto& src_v = assert_cast<const ColumnObject&>(src);
+    const auto* src_v = check_and_get_column<ColumnObject>(src);
     // optimize when src and this column are scalar variant, since try_insert 
is inefficiency
-    if (src_v.is_scalar_variant() && is_scalar_variant() &&
-        src_v.get_root_type()->equals(*get_root_type()) && 
src_v.is_finalized() && is_finalized()) {
-        
assert_cast<ColumnNullable&>(*get_root()).insert_from(*src_v.get_root(), n);
+    if (src_v != nullptr && src_v->is_scalar_variant() && is_scalar_variant() 
&&
+        src_v->get_root_type()->equals(*get_root_type()) && 
src_v->is_finalized() &&
+        is_finalized()) {
+        
assert_cast<ColumnNullable&>(*get_root()).insert_from(*src_v->get_root(), n);
         ++num_rows;
         return;
     }
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp 
b/be/src/vec/exec/scan/new_olap_scanner.cpp
index c6e401f3f54..5566abec3ab 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -58,6 +58,7 @@
 #include "vec/exec/scan/new_olap_scan_node.h"
 #include "vec/exec/scan/vscan_node.h"
 #include "vec/exprs/vexpr_context.h"
+#include "vec/json/path_in_data.h"
 #include "vec/olap/block_reader.h"
 
 namespace doris::vectorized {
@@ -411,16 +412,6 @@ Status NewOlapScanner::_init_tablet_reader_params(
     return Status::OK();
 }
 
-vectorized::PathInData NewOlapScanner::_build_path(SlotDescriptor* slot,
-                                                   const std::string& 
root_name) {
-    PathInDataBuilder path_builder;
-    path_builder.append(root_name, false);
-    for (const std::string& path : slot->column_paths()) {
-        path_builder.append(path, false);
-    }
-    return path_builder.build();
-}
-
 Status NewOlapScanner::_init_variant_columns() {
     auto& tablet_schema = _tablet_reader_params.tablet_schema;
     // Parent column has path info to distinction from each other
@@ -434,16 +425,10 @@ Status NewOlapScanner::_init_variant_columns() {
         if (slot->type().is_variant_type()) {
             // Such columns are not exist in frontend schema info, so we need 
to
             // add them into tablet_schema for later column indexing.
-            TabletColumn subcol;
-            subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
-            subcol.set_is_nullable(true);
-            subcol.set_unique_id(-1);
-            subcol.set_parent_unique_id(slot->col_unique_id());
-            PathInData path = _build_path(
-                    slot, 
tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case());
-            subcol.set_path_info(path);
-            subcol.set_name(path.get_path());
-            if (tablet_schema->field_index(path) < 0) {
+            TabletColumn subcol = 
TabletColumn::create_materialized_variant_column(
+                    
tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case(),
+                    slot->column_paths(), slot->col_unique_id());
+            if (tablet_schema->field_index(subcol.path_info()) < 0) {
                 tablet_schema->append_column(subcol, 
TabletSchema::ColumnType::VARIANT);
             }
         }
@@ -465,8 +450,9 @@ Status NewOlapScanner::_init_return_columns() {
         int32_t index = 0;
         auto& tablet_schema = _tablet_reader_params.tablet_schema;
         if (slot->type().is_variant_type()) {
-            index = tablet_schema->field_index(_build_path(
-                    slot, 
tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case()));
+            index = tablet_schema->field_index(PathInData(
+                    
tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case(),
+                    slot->column_paths()));
         } else {
             index = slot->col_unique_id() >= 0 ? 
tablet_schema->field_index(slot->col_unique_id())
                                                : 
tablet_schema->field_index(slot->col_name());
diff --git a/be/src/vec/exec/scan/new_olap_scanner.h 
b/be/src/vec/exec/scan/new_olap_scanner.h
index ae094b84f03..9eab71f3c34 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.h
+++ b/be/src/vec/exec/scan/new_olap_scanner.h
@@ -93,7 +93,6 @@ private:
                                       const std::vector<FunctionFilter>& 
function_filters);
 
     [[nodiscard]] Status _init_return_columns();
-    vectorized::PathInData _build_path(SlotDescriptor* slot, const 
std::string& root_name);
     [[nodiscard]] Status _init_variant_columns();
 
     std::vector<OlapScanRange*> _key_ranges;
diff --git a/be/src/vec/json/path_in_data.cpp b/be/src/vec/json/path_in_data.cpp
index 1c02febd446..ae91b444994 100644
--- a/be/src/vec/json/path_in_data.cpp
+++ b/be/src/vec/json/path_in_data.cpp
@@ -46,11 +46,22 @@ PathInData::PathInData(const PathInData& other) : 
path(other.path) {
     build_parts(other.get_parts());
 }
 
+PathInData::PathInData(const std::string& root, const 
std::vector<std::string>& paths) {
+    PathInDataBuilder path_builder;
+    path_builder.append(root, false);
+    for (const std::string& path : paths) {
+        path_builder.append(path, false);
+    }
+    build_path(path_builder.get_parts());
+    build_parts(path_builder.get_parts());
+}
+
 PathInData::PathInData(const std::vector<std::string>& paths) {
     PathInDataBuilder path_builder;
     for (size_t i = 0; i < paths.size(); ++i) {
         path_builder.append(paths[i], false);
     }
+    build_path(path_builder.get_parts());
     build_parts(path_builder.get_parts());
 }
 
diff --git a/be/src/vec/json/path_in_data.h b/be/src/vec/json/path_in_data.h
index 6531a8bfc6a..267ab1fab3f 100644
--- a/be/src/vec/json/path_in_data.h
+++ b/be/src/vec/json/path_in_data.h
@@ -61,6 +61,7 @@ public:
     explicit PathInData(std::string_view path_);
     explicit PathInData(const Parts& parts_);
     explicit PathInData(const std::vector<std::string>& paths);
+    explicit PathInData(const std::string& root, const 
std::vector<std::string>& paths);
     PathInData(const PathInData& other);
     PathInData& operator=(const PathInData& other);
     static UInt128 get_parts_hash(const Parts& parts_);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
index 893d4c93239..dc3c2f52d81 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
@@ -829,8 +829,7 @@ public class SelectStmt extends QueryStmt {
             LOG.debug("only support duplicate key or MOW model");
             return false;
         }
-        if (!olapTable.getEnableLightSchemaChange() || 
!Strings.isNullOrEmpty(olapTable.getStoragePolicy())
-                    || olapTable.hasVariantColumns()) {
+        if (!olapTable.getEnableLightSchemaChange()) {
             return false;
         }
         if (getOrderByElements() != null) {
diff --git a/gensrc/proto/descriptors.proto b/gensrc/proto/descriptors.proto
index 270199cc020..8762a78c0f4 100644
--- a/gensrc/proto/descriptors.proto
+++ b/gensrc/proto/descriptors.proto
@@ -38,6 +38,7 @@ message PSlotDescriptor {
     optional bool is_key = 12;
     optional bool is_auto_increment = 13;
     optional int32 col_type = 14 [default = 0];
+    repeated string column_paths = 15;
 };
 
 message PTupleDescriptor {
diff --git a/regression-test/data/variant_p0/load.out 
b/regression-test/data/variant_p0/load.out
index e48aafe2e0d..7def94d2fad 100644
--- a/regression-test/data/variant_p0/load.out
+++ b/regression-test/data/variant_p0/load.out
@@ -143,7 +143,7 @@
 [123]
 
 -- !sql_25 --
-50000  55000.00000000013       6150000
+50000  55000.000000002256      6150000
 
 -- !sql_26 --
 5000
@@ -227,16 +227,16 @@
 \N     \N      \N
 
 -- !sql_36_2 --
-7702   
{"payload":{"commits":[{"sha":"348743fdce27d3f3c97e366381b1b7b371fc4510","author":{"email":"9b7a0973fc99779f7e1822eb7336ff5d28bd2...@users.noreply.github.com","name":"Łukasz
 Magiera"},"message":"Create 
README.md","distinct":true,"url":"https://api.github.com/repos/magik6k/BitBuffer/commits/348743fdce27d3f3c97e366381b1b7b371fc4510"}],"before":"4e150694dacd35e7d5cda4e9f6a2aedb1d35db36","head":"348743fdce27d3f3c97e366381b1b7b371fc4510","size":1,"push_id":536752118,"ref":"refs/heads/mas
 [...]
-7701   {"payload":{"pages":[{"page_name":"Android Development 
Basics","title":"Android Development 
Basics","summary":null,"action":"edited","sha":"e4e947a4f29b1a06f560ac1e62bd3bf183e434b6","html_url":"https://github.com/wllmtrng/wllmtrng.github.io/wiki/Android-Development-Basics"}]},"created_at":"2015-01-01T00:59:58Z","id":"2489395760","public":1,"actor":{"gravatar_id":"","url":"https://api.github.com/users/wllmtrng","id":1335855,"login":"wllmtrng","avatar_url":"https://avatars.githubuserc
 [...]
-7700   
{"payload":{"forkee":{"svn_url":"https://github.com/WangXYZ/TBC","pushed_at":"2014-12-24T18:26:11Z","issues_url":"https://api.github.com/repos/WangXYZ/TBC/issues{/number}","events_url":"https://api.github.com/repos/WangXYZ/TBC/events","labels_url":"https://api.github.com/repos/WangXYZ/TBC/labels{/name}","releases_url":"https://api.github.com/repos/WangXYZ/TBC/releases{/id}","keys_url":"https://api.github.com/repos/WangXYZ/TBC/keys{/key_id}","stargazers_url":"https://api.github.com/r
 [...]
-7699   {"payload":{"description":"Game using the MS Kinect that scans the user 
and creates a 3D version of 
them.","ref":"master","ref_type":"branch","pusher_type":"user","master_branch":"master"},"created_at":"2015-01-01T00:59:57Z","id":"2489395752","public":1,"actor":{"gravatar_id":"","url":"https://api.github.com/users/chrisjimenez","id":3580593,"login":"chrisjimenez","avatar_url":"https://avatars.githubusercontent.com/u/3580593?"},"repo":{"url":"https://api.github.com/repos/chrisjimenez
 [...]
-7698   
{"payload":{"issue":{"url":"https://api.github.com/repos/antonioortegajr/beerfind.me/issues/12","created_at":"2015-01-01T00:59:56Z","body":"The
 bee that are being searched for currently should also have logos and 
descriptions from the 
API.","events_url":"https://api.github.com/repos/antonioortegajr/beerfind.me/issues/12/events","labels_url":"https://api.github.com/repos/antonioortegajr/beerfind.me/issues/12/labels{/name}","locked":0,"state":"open","comments_url":"https://api.github.
 [...]
-7697   
{"payload":{"action":"closed","pull_request":{"review_comments_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/pulls/534/comments","url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/pulls/534","body":"","patch_url":"https://github.com/XLabs/Xamarin-Forms-Labs/pull/534.patch","head":{"ref":"master","user":{"starred_url":"https://api.github.com/users/bokmadsen/starred{/owner}{/repo}","url":"https://api.github.com/users/bokmadsen","repos_url":"https://api.github.c
 [...]
-7696   
{"payload":{"commits":[{"sha":"c356eac8fa4409a1aa794ab07244250a862da03b","author":{"email":"de8898f6c55e335aa0a2b937fae65fb756ee0...@gmail.com","name":"Zaryafaraj"},"message":"reserve
 modal 
styles","distinct":true,"url":"https://api.github.com/repos/Fathalian/Guild/commits/c356eac8fa4409a1aa794ab07244250a862da03b"},{"sha":"9a773fc648910c7a2499401f44a6e5f71eb30460","author":{"email":"de8898f6c55e335aa0a2b937fae65fb756ee0...@gmail.com","name":"Zaryafaraj"},"message":"Merge
 branch 'mas [...]
-7695   
{"payload":{"commits":[{"sha":"17cf9ea07662d74b2d5bac1cf976f5853a63920d","author":{"email":"af59d1d6805404937849f05dafd5a911888fd...@gmail.com","name":"Kevin
 Hofmaenner"},"message":"minor UI 
tweak","distinct":true,"url":"https://api.github.com/repos/kevinhofmaenner/blackjack/commits/17cf9ea07662d74b2d5bac1cf976f5853a63920d"}],"before":"6b4f5af0a2a70bbe00cd88281823b436d506ff2d","head":"17cf9ea07662d74b2d5bac1cf976f5853a63920d","size":1,"push_id":536752112,"ref":"refs/heads/master","d
 [...]
-7694   
{"payload":{"commits":[{"sha":"aa8ec0de017c8003758776739facc819e33ac7c9","author":{"email":"e0e04a2320844b42511db0376599e166ab5bd...@gmail.com","name":"Runhang
 Li"},"message":"finish all hamms 
test","distinct":true,"url":"https://api.github.com/repos/marklrh/ocaml-cohttp-test/commits/aa8ec0de017c8003758776739facc819e33ac7c9"}],"before":"2bb795fc30fc15ab85bcc10f894bfcfa118d69bc","head":"aa8ec0de017c8003758776739facc819e33ac7c9","size":1,"push_id":536752109,"ref":"refs/heads/master",";
 [...]
-7693   
{"payload":{"commits":[{"sha":"0b27989723feb4b183d5f87813fef146b670b1d1","author":{"email":"7c5a0c567b5584a13fde407456875318a5bec...@gmail.com","name":"Raphaël
 Benitte"},"message":"Add time clock widget + Improve stylus 
theming","distinct":true,"url":"https://api.github.com/repos/plouc/mozaik/commits/0b27989723feb4b183d5f87813fef146b670b1d1"}],"before":"7ddf17eb74fff5adad6e2feb72bcb627d4644800","head":"0b27989723feb4b183d5f87813fef146b670b1d1","size":1,"push_id":536752104,"ref":"ref
 [...]
+7702   
{"url":"https://api.github.com/repos/magik6k/BitBuffer","id":28677864,"name":"magik6k/BitBuffer"}
+7701   
{"url":"https://api.github.com/repos/wllmtrng/wllmtrng.github.io","id":18089434,"name":"wllmtrng/wllmtrng.github.io"}
+7700   
{"url":"https://api.github.com/repos/ACID-Scripts/TBC","id":23724137,"name":"ACID-Scripts/TBC"}
+7699   
{"url":"https://api.github.com/repos/chrisjimenez/IpsePuppet","id":28678128,"name":"chrisjimenez/IpsePuppet"}
+7698   
{"url":"https://api.github.com/repos/antonioortegajr/beerfind.me","id":28573267,"name":"antonioortegajr/beerfind.me"}
+7697   
{"url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs","id":20463939,"name":"XLabs/Xamarin-Forms-Labs"}
+7696   
{"url":"https://api.github.com/repos/Fathalian/Guild","id":26995510,"name":"Fathalian/Guild"}
+7695   
{"url":"https://api.github.com/repos/kevinhofmaenner/blackjack","id":28652857,"name":"kevinhofmaenner/blackjack"}
+7694   
{"url":"https://api.github.com/repos/marklrh/ocaml-cohttp-test","id":27470715,"name":"marklrh/ocaml-cohttp-test"}
+7693   
{"url":"https://api.github.com/repos/plouc/mozaik","id":28498113,"name":"plouc/mozaik"}
 
 -- !sql_36_3 --
 2      {"updated_value":10}
diff --git a/regression-test/data/variant_p0/sql/gh_data.out 
b/regression-test/data/variant_p0/sql/gh_data.out
index 7fc5e10dbe1..4c34edabe48 100644
--- a/regression-test/data/variant_p0/sql/gh_data.out
+++ b/regression-test/data/variant_p0/sql/gh_data.out
@@ -3,31 +3,37 @@
 0
 
 -- !gh_data_2 --
-5000
+0
 
 -- !gh_data_3 --
+0
+
+-- !gh_data_4 --
+5000
+
+-- !gh_data_5 --
 leonardomso/33-js-concepts     3
 ytdl-org/youtube-dl    3
 Bogdanp/neko   2
 bminossi/AllVideoPocsFromHackerOne     2
 disclose/diodata       2
 
--- !gh_data_4 --
+-- !gh_data_6 --
 14690758274
 
--- !gh_data_5 --
+-- !gh_data_7 --
 73453762334584
 
--- !gh_data_6 --
+-- !gh_data_8 --
 457806339
 
--- !gh_data_7 --
+-- !gh_data_9 --
 0
 
--- !gh_data_8 --
+-- !gh_data_10 --
 19829
 
--- !gh_data_9 --
+-- !gh_data_11 --
 49390617
 64890096
 10696700
@@ -39,19 +45,19 @@ disclose/diodata    2
 42386044
 73801003
 
--- !gh_data_10 --
-27     
{"payload":{"action":"started"},"created_at":"2021-01-02T16:37:27Z","id":"14690746717","public":1,"actor":{"gravatar_id":"","display_login":"sergdudnik","url":"https://api.github.com/users/sergdudnik","id":16341546,"login":"sergdudnik","avatar_url":"https://avatars.githubusercontent.com/u/16341546?"},"repo":{"url":"https://api.github.com/repos/leonardomso/33-js-concepts","id":147350463,"name":"leonardomso/33-js-concepts"},"type":"WatchEvent"}
-36     
{"payload":{"action":"started"},"created_at":"2021-01-02T16:37:27Z","id":"14690746732","public":1,"actor":{"gravatar_id":"","display_login":"juliusHuelsmann","url":"https://api.github.com/users/juliusHuelsmann","id":9212314,"login":"juliusHuelsmann","avatar_url":"https://avatars.githubusercontent.com/u/9212314?"},"repo":{"url":"https://api.github.com/repos/odeke-em/drive","id":26109545,"name":"odeke-em/drive"},"type":"WatchEvent"}
-46     
{"payload":{"action":"started"},"created_at":"2021-01-02T16:37:27Z","id":"14690746749","org":{"gravatar_id":"","url":"https://api.github.com/orgs/GO-LiFE","id":38434522,"login":"GO-LiFE","avatar_url":"https://avatars.githubusercontent.com/u/38434522?"},"public":1,"actor":{"gravatar_id":"","display_login":"okbean","url":"https://api.github.com/users/okbean","id":75969386,"login":"okbean","avatar_url":"https://avatars.githubusercontent.com/u/75969386?"},"repo":{"url":"https://api.github
 [...]
-56     
{"payload":{"action":"started"},"created_at":"2021-01-02T16:37:28Z","id":"14690746773","public":1,"actor":{"gravatar_id":"","display_login":"PWDream","url":"https://api.github.com/users/PWDream","id":4903755,"login":"PWDream","avatar_url":"https://avatars.githubusercontent.com/u/4903755?"},"repo":{"url":"https://api.github.com/repos/MrXujiang/h5-Dooring","id":289417971,"name":"MrXujiang/h5-Dooring"},"type":"WatchEvent"}
-86     
{"payload":{"action":"started"},"created_at":"2021-01-02T16:37:29Z","id":"14690746843","public":1,"actor":{"gravatar_id":"","display_login":"Gui-Yom","url":"https://api.github.com/users/Gui-Yom","id":25181283,"login":"Gui-Yom","avatar_url":"https://avatars.githubusercontent.com/u/25181283?"},"repo":{"url":"https://api.github.com/repos/redsaph/cleartext","id":106453399,"name":"redsaph/cleartext"},"type":"WatchEvent"}
-98     
{"payload":{"action":"started"},"created_at":"2021-01-02T16:37:29Z","id":"14690746866","org":{"gravatar_id":"","url":"https://api.github.com/orgs/sherlock-project","id":48293496,"login":"sherlock-project","avatar_url":"https://avatars.githubusercontent.com/u/48293496?"},"public":1,"actor":{"gravatar_id":"","display_login":"humaidk2","url":"https://api.github.com/users/humaidk2","id":12982026,"login":"humaidk2","avatar_url":"https://avatars.githubusercontent.com/u/12982026?"},"repo":{";
 [...]
-101    
{"payload":{"action":"started"},"created_at":"2021-01-02T16:37:29Z","id":"14690746870","public":1,"actor":{"gravatar_id":"","display_login":"hasantezcan","url":"https://api.github.com/users/hasantezcan","id":32804505,"login":"hasantezcan","avatar_url":"https://avatars.githubusercontent.com/u/32804505?"},"repo":{"url":"https://api.github.com/repos/okandavut/react-spotify-nowplaying","id":326215605,"name":"okandavut/react-spotify-nowplaying"},"type":"WatchEvent"}
-112    
{"payload":{"action":"started"},"created_at":"2021-01-02T16:37:30Z","id":"14690746899","public":1,"actor":{"gravatar_id":"","display_login":"nicholas-robertson","url":"https://api.github.com/users/nicholas-robertson","id":17681331,"login":"nicholas-robertson","avatar_url":"https://avatars.githubusercontent.com/u/17681331?"},"repo":{"url":"https://api.github.com/repos/sentriz/gonic","id":178435468,"name":"sentriz/gonic"},"type":"WatchEvent"}
-122    
{"payload":{"action":"started"},"created_at":"2021-01-02T16:37:30Z","id":"14690746914","org":{"gravatar_id":"","url":"https://api.github.com/orgs/netlify-labs","id":47546088,"login":"netlify-labs","avatar_url":"https://avatars.githubusercontent.com/u/47546088?"},"public":1,"actor":{"gravatar_id":"","display_login":"javaniecampbell","url":"https://api.github.com/users/javaniecampbell","id":1676496,"login":"javaniecampbell","avatar_url":"https://avatars.githubusercontent.com/u/1676496?
 [...]
-169    
{"payload":{"action":"started"},"created_at":"2021-01-02T16:37:32Z","id":"14690747028","org":{"gravatar_id":"","url":"https://api.github.com/orgs/microsoft","id":6154722,"login":"microsoft","avatar_url":"https://avatars.githubusercontent.com/u/6154722?"},"public":1,"actor":{"gravatar_id":"","display_login":"Yxnt","url":"https://api.github.com/users/Yxnt","id":10323352,"login":"Yxnt","avatar_url":"https://avatars.githubusercontent.com/u/10323352?"},"repo":{"url":"https://api.github.co
 [...]
+-- !gh_data_12 --
+27     
{"url":"https://api.github.com/repos/leonardomso/33-js-concepts","id":147350463,"name":"leonardomso/33-js-concepts"}
+36     
{"url":"https://api.github.com/repos/odeke-em/drive","id":26109545,"name":"odeke-em/drive"}
+46     
{"url":"https://api.github.com/repos/GO-LiFE/GoFIT_SDK_Android","id":141905736,"name":"GO-LiFE/GoFIT_SDK_Android"}
+56     
{"url":"https://api.github.com/repos/MrXujiang/h5-Dooring","id":289417971,"name":"MrXujiang/h5-Dooring"}
+86     
{"url":"https://api.github.com/repos/redsaph/cleartext","id":106453399,"name":"redsaph/cleartext"}
+98     
{"url":"https://api.github.com/repos/sherlock-project/sherlock","id":162998479,"name":"sherlock-project/sherlock"}
+101    
{"url":"https://api.github.com/repos/okandavut/react-spotify-nowplaying","id":326215605,"name":"okandavut/react-spotify-nowplaying"}
+112    
{"url":"https://api.github.com/repos/sentriz/gonic","id":178435468,"name":"sentriz/gonic"}
+122    
{"url":"https://api.github.com/repos/netlify-labs/react-netlify-identity-widget","id":182606378,"name":"netlify-labs/react-netlify-identity-widget"}
+169    
{"url":"https://api.github.com/repos/microsoft/BotBuilder-Samples","id":68730444,"name":"microsoft/BotBuilder-Samples"}
 
--- !gh_data_11 --
+-- !gh_data_13 --
 2051941        1
 10696700       1
 32271952       2
@@ -63,3 +69,27 @@ disclose/diodata     2
 64890096       1
 73801003       1
 
+-- !gh_data_14 --
+27     14690746717     WatchEvent      leonardomso/33-js-concepts
+36     14690746732     WatchEvent      odeke-em/drive
+46     14690746749     WatchEvent      GO-LiFE/GoFIT_SDK_Android
+56     14690746773     WatchEvent      MrXujiang/h5-Dooring
+86     14690746843     WatchEvent      redsaph/cleartext
+98     14690746866     WatchEvent      sherlock-project/sherlock
+101    14690746870     WatchEvent      okandavut/react-spotify-nowplaying
+112    14690746899     WatchEvent      sentriz/gonic
+122    14690746914     WatchEvent      
netlify-labs/react-netlify-identity-widget
+169    14690747028     WatchEvent      microsoft/BotBuilder-Samples
+
+-- !gh_data_15 --
+user
+user
+user
+user
+user
+user
+user
+user
+user
+user
+
diff --git a/regression-test/suites/variant_p0/load.groovy 
b/regression-test/suites/variant_p0/load.groovy
index 0501d3ce52f..b23ae19d12f 100644
--- a/regression-test/suites/variant_p0/load.groovy
+++ b/regression-test/suites/variant_p0/load.groovy
@@ -287,7 +287,7 @@ suite("regression_test_variant", "variant_type"){
         // 12. streamload remote file
         table_name = "logdata"
         create_table.call(table_name, "4")
-        sql "set enable_two_phase_read_opt = false;"
+        // sql "set enable_two_phase_read_opt = false;"
         // no sparse columns
         set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
         load_json_data.call(table_name, """${getS3Url() + 
'/load/logdata.json'}""")
@@ -340,7 +340,7 @@ suite("regression_test_variant", "variant_type"){
         qt_sql_36_1 "select cast(v:a as int), cast(v:b as int), cast(v:c as 
int) from ${table_name} order by k limit 10"
         sql "DELETE FROM ${table_name} WHERE k=1"
         sql "select * from ${table_name}"
-        qt_sql_36_2 "select * from ${table_name} where k > 3 order by k desc 
limit 10"
+        qt_sql_36_2 """select k, json_extract(cast(v as text), "\$.repo") from 
${table_name} where k > 3 order by k desc limit 10"""
         sql "insert into ${table_name} select * from ${table_name}"
         sql """UPDATE ${table_name} set v = '{"updated_value" : 10}' where k = 
2"""
         qt_sql_36_3 """select * from ${table_name} where k = 2"""
@@ -386,13 +386,13 @@ suite("regression_test_variant", "variant_type"){
         sql """insert into ${table_name} values (2, "abe", '{"c" : 1}')"""
         sql """insert into ${table_name} values (3, "abd", '{"d" : 1}')"""
         sql "delete from ${table_name} where k in (select k from variant_mow 
where k in (1, 2))"
-        qt_sql_38 "select * from ${table_name} order by k"
+        qt_sql_38 "select * from ${table_name} order by k limit 10"
 
         // read text from sparse col
         set_be_config.call("variant_ratio_of_defaults_as_sparse_column", 
"0.95")
         sql """insert into  sparse_columns select 0, '{"a": 1123, "b" : [123, 
{"xx" : 1}], "c" : {"c" : 456, "d" : null, "e" : 7.111}, "zzz" : null, "oooo" : 
{"akakaka" : null, "xxxx" : {"xxx" : 123}}}'  as json_str
             union  all select 0, '{"a" : 1234, "xxxx" : "kaana", "ddd" : 
{"aaa" : 123, "mxmxm" : [456, "789"]}}' as json_str from numbers("number" = 
"4096") limit 4096 ;"""
-        qt_sql_31 """select cast(v:xxxx as string) from sparse_columns where 
cast(v:xxxx as string) != 'null' limit 1;"""
+        qt_sql_31 """select cast(v:xxxx as string) from sparse_columns where 
cast(v:xxxx as string) != 'null' order by k limit 1;"""
         sql "truncate table sparse_columns"
         set_be_config.call("variant_ratio_of_defaults_as_sparse_column", 
"0.95")
     } finally {
diff --git a/regression-test/suites/variant_p0/sql/gh_data.sql 
b/regression-test/suites/variant_p0/sql/gh_data.sql
index 72a3adb01cf..2fc0ccac14a 100644
--- a/regression-test/suites/variant_p0/sql/gh_data.sql
+++ b/regression-test/suites/variant_p0/sql/gh_data.sql
@@ -1,4 +1,6 @@
 set exec_mem_limit=8G;
+set enable_two_phase_read_opt = true;
+set topn_opt_limit_threshold = 1024; 
 SELECT count() from ghdata;
 SELECT cast(v:repo.name as string), count() AS stars FROM ghdata WHERE 
cast(v:type as string) = 'WatchEvent' GROUP BY cast(v:repo.name as string)  
ORDER BY stars DESC, cast(v:repo.name as string) LIMIT 5;
 SELECT max(cast(cast(v:`id` as string) as bigint)) FROM ghdata;
@@ -6,8 +8,9 @@ SELECT sum(cast(cast(v:`id` as string) as bigint)) FROM ghdata;
 SELECT sum(cast(v:payload.member.id as bigint)) FROM ghdata;
 SELECT sum(cast(v:payload.pull_request.milestone.creator.site_admin as 
bigint)) FROM ghdata;
 SELECT sum(length(v:payload.pull_request.base.repo.html_url)) FROM ghdata;
--- SELECT v:payload.commits.author.name FROM ghdata ORDER BY k LIMIT 10;
 SELECT v:payload.member.id FROM ghdata where cast(v:payload.member.id as 
string) is not null  ORDER BY k LIMIT 10;
 -- select k, v:payload.commits.author.name AS name, e FROM ghdata as t lateral 
view  explode(cast(v:payload.commits.author.name as array<string>)) tm1 as e  
order by k limit 5;
-select k, v from ghdata WHERE cast(v:type as string) = 'WatchEvent'  order by 
k limit 10;
-SELECT cast(v:payload.member.id as bigint), count() FROM ghdata where 
cast(v:payload.member.id as bigint) is not null group by 
cast(v:payload.member.id as bigint) order by 1, 2 desc LIMIT 10;
\ No newline at end of file
+select k, json_extract(v, '$.repo') from ghdata WHERE cast(v:type as string) = 
'WatchEvent'  order by k limit 10;
+SELECT cast(v:payload.member.id as bigint), count() FROM ghdata where 
cast(v:payload.member.id as bigint) is not null group by 
cast(v:payload.member.id as bigint) order by 1, 2 desc LIMIT 10;
+select k, cast(v:`id` as string), cast(v:type as string), cast(v:repo.name as 
string) from ghdata WHERE cast(v:type as string) = 'WatchEvent'  order by k 
limit 10;
+SELECT cast(v:payload.pusher_type as text) FROM ghdata where 
cast(v:payload.pusher_type as text) is not null ORDER BY k LIMIT 10;
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to