This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new e9e1e2894bf [performance](variant) support topn 2phase read for variant column (#28318) e9e1e2894bf is described below commit e9e1e2894bfa729613d4e3c48a543dbb54b72452 Author: lihangyu <15605149...@163.com> AuthorDate: Mon Dec 25 11:50:41 2023 +0800 [performance](variant) support topn 2phase read for variant column (#28318) [performance](variant) support topn 2phase read for variant column --- .../rowset/segment_v2/hierarchical_data_reader.h | 2 +- be/src/olap/rowset/segment_v2/segment.cpp | 67 +++++++++++++-- be/src/olap/rowset/segment_v2/segment.h | 14 +++- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 3 +- be/src/olap/rowset/segment_v2/segment_iterator.h | 4 +- be/src/olap/tablet_schema.cpp | 14 ++++ be/src/olap/tablet_schema.h | 6 ++ be/src/runtime/descriptors.cpp | 4 + be/src/service/internal_service.cpp | 94 ++++++++++++++-------- be/src/vec/columns/column_object.cpp | 9 ++- be/src/vec/exec/scan/new_olap_scanner.cpp | 30 ++----- be/src/vec/exec/scan/new_olap_scanner.h | 1 - be/src/vec/json/path_in_data.cpp | 11 +++ be/src/vec/json/path_in_data.h | 1 + .../java/org/apache/doris/analysis/SelectStmt.java | 3 +- gensrc/proto/descriptors.proto | 1 + regression-test/data/variant_p0/load.out | 22 ++--- regression-test/data/variant_p0/sql/gh_data.out | 68 +++++++++++----- regression-test/suites/variant_p0/load.groovy | 8 +- regression-test/suites/variant_p0/sql/gh_data.sql | 9 ++- 20 files changed, 261 insertions(+), 110 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h index 58da2a43435..99f926f44cf 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h @@ -229,7 +229,7 @@ public: private: Status extract_to(vectorized::MutableColumnPtr& dst, size_t nrows); - const TabletColumn& _col; + TabletColumn _col; // may shared among different column iterators std::unique_ptr<StreamReader> _root_reader; }; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index e3d1ce0c599..de2593447c5 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -61,10 +61,12 @@ #include "util/slice.h" // Slice #include "vec/columns/column.h" #include "vec/common/string_ref.h" +#include "vec/core/field.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_factory.hpp" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_object.h" +#include "vec/json/path_in_data.h" #include "vec/olap/vgeneric_iterators.h" namespace doris { @@ -332,17 +334,18 @@ Status Segment::_load_index_impl() { // Return the storage datatype of related column to field. // Return nullptr meaning no such storage infomation for this column -vectorized::DataTypePtr Segment::get_data_type_of(const Field& field, bool ignore_children) const { +vectorized::DataTypePtr Segment::get_data_type_of(vectorized::PathInData path, bool is_nullable, + bool ignore_children) const { // Path has higher priority - if (!field.path().empty()) { - auto node = _sub_column_tree.find_leaf(field.path()); + if (!path.empty()) { + auto node = _sub_column_tree.find_leaf(path); if (node) { if (ignore_children || node->children.empty()) { return node->data.file_column_type; } } // it contains children or column missing in storage, so treat it as variant - return field.is_nullable() + return is_nullable ? vectorized::make_nullable(std::make_shared<vectorized::DataTypeObject>()) : std::make_shared<vectorized::DataTypeObject>(); } @@ -686,7 +689,8 @@ Status Segment::read_key_by_rowid(uint32_t row_id, std::string* key) { bool Segment::same_with_storage_type(int32_t cid, const Schema& schema, bool ignore_children) const { - auto file_column_type = get_data_type_of(*schema.column(cid), ignore_children); + auto file_column_type = get_data_type_of(schema.column(cid)->path(), + schema.column(cid)->is_nullable(), ignore_children); auto expected_type = Schema::get_data_type_ptr(*schema.column(cid)); #ifndef NDEBUG if (file_column_type && !file_column_type->equals(*expected_type)) { @@ -700,5 +704,58 @@ bool Segment::same_with_storage_type(int32_t cid, const Schema& schema, return same; } +Status Segment::seek_and_read_by_rowid(const TabletSchema& schema, SlotDescriptor* slot, + uint32_t row_id, vectorized::MutableColumnPtr& result, + OlapReaderStatistics& stats, + std::unique_ptr<ColumnIterator>& iterator_hint) { + StorageReadOptions storage_read_opt; + storage_read_opt.io_ctx.reader_type = ReaderType::READER_QUERY; + segment_v2::ColumnIteratorOptions opt { + .use_page_cache = !config::disable_storage_page_cache, + .file_reader = file_reader().get(), + .stats = &stats, + .io_ctx = io::IOContext {.reader_type = ReaderType::READER_QUERY}, + }; + std::vector<segment_v2::rowid_t> single_row_loc {row_id}; + if (!slot->column_paths().empty()) { + vectorized::PathInData path(schema.column_by_uid(slot->col_unique_id()).name_lower_case(), + slot->column_paths()); + auto storage_type = get_data_type_of(path, slot->is_nullable(), false); + vectorized::MutableColumnPtr file_storage_column = storage_type->create_column(); + DCHECK(storage_type != nullptr); + TabletColumn column = TabletColumn::create_materialized_variant_column( + schema.column_by_uid(slot->col_unique_id()).name_lower_case(), slot->column_paths(), + slot->col_unique_id()); + if (iterator_hint == nullptr) { + RETURN_IF_ERROR(new_column_iterator(column, &iterator_hint, &storage_read_opt)); + RETURN_IF_ERROR(iterator_hint->init(opt)); + } + RETURN_IF_ERROR( + iterator_hint->read_by_rowids(single_row_loc.data(), 1, file_storage_column)); + // iterator_hint.reset(nullptr); + // Get it's inner field, for JSONB case + vectorized::Field field = remove_nullable(storage_type)->get_default(); + file_storage_column->get(0, field); + result->insert(field); + } else { + int index = (slot->col_unique_id() >= 0) ? schema.field_index(slot->col_unique_id()) + : schema.field_index(slot->col_name()); + if (index < 0) { + std::stringstream ss; + ss << "field name is invalid. field=" << slot->col_name() + << ", field_name_to_index=" << schema.get_all_field_names(); + return Status::InternalError(ss.str()); + } + storage_read_opt.io_ctx.reader_type = ReaderType::READER_QUERY; + if (iterator_hint == nullptr) { + RETURN_IF_ERROR( + new_column_iterator(schema.column(index), &iterator_hint, &storage_read_opt)); + RETURN_IF_ERROR(iterator_hint->init(opt)); + } + RETURN_IF_ERROR(iterator_hint->read_by_rowids(single_row_loc.data(), 1, result)); + } + return Status::OK(); +} + } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index 42b7fc83ac0..47ccab71be6 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -39,11 +39,14 @@ #include "olap/rowset/segment_v2/page_handle.h" #include "olap/schema.h" #include "olap/tablet_schema.h" +#include "runtime/descriptors.h" #include "util/once.h" #include "util/slice.h" +#include "vec/columns/column.h" #include "vec/columns/subcolumn_tree.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_nullable.h" +#include "vec/json/path_in_data.h" namespace doris { namespace vectorized { @@ -123,6 +126,10 @@ public: Status read_key_by_rowid(uint32_t row_id, std::string* key); + Status seek_and_read_by_rowid(const TabletSchema& schema, SlotDescriptor* slot, uint32_t row_id, + vectorized::MutableColumnPtr& result, OlapReaderStatistics& stats, + std::unique_ptr<ColumnIterator>& iterator_hint); + Status load_index(); Status load_pk_index_and_bf(); @@ -146,7 +153,8 @@ public: // ignore_chidren set to false will treat field as variant // when it contains children with field paths. // nullptr will returned if storage type does not contains such column - std::shared_ptr<const vectorized::IDataType> get_data_type_of(const Field& filed, + std::shared_ptr<const vectorized::IDataType> get_data_type_of(vectorized::PathInData path, + bool is_nullable, bool ignore_children) const; // Check is schema read type equals storage column type @@ -157,8 +165,8 @@ public: bool can_apply_predicate_safely(int cid, Predicate* pred, const Schema& schema, ReaderType read_type) const { const Field* col = schema.column(cid); - vectorized::DataTypePtr storage_column_type = - get_data_type_of(*col, read_type != ReaderType::READER_QUERY); + vectorized::DataTypePtr storage_column_type = get_data_type_of( + col->path(), col->is_nullable(), read_type != ReaderType::READER_QUERY); if (storage_column_type == nullptr) { // Default column iterator return true; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 483e0e502df..f85ed5abce5 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -342,7 +342,8 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { const Field* col = _schema->column(i); if (col) { auto storage_type = _segment->get_data_type_of( - *col, _opts.io_ctx.reader_type != ReaderType::READER_QUERY); + col->path(), col->is_nullable(), + _opts.io_ctx.reader_type != ReaderType::READER_QUERY); if (storage_type == nullptr) { storage_type = vectorized::DataTypeFactory::instance().create_data_type(*col); } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 3fca034a18f..c953e0b9bcc 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -251,8 +251,8 @@ private: if (block_cid >= block->columns()) { continue; } - vectorized::DataTypePtr storage_type = - _segment->get_data_type_of(*_schema->column(cid), false); + vectorized::DataTypePtr storage_type = _segment->get_data_type_of( + _schema->column(cid)->path(), _schema->column(cid)->is_nullable(), false); if (storage_type && !storage_type->equals(*block->get_by_position(block_cid).type)) { // Do additional cast vectorized::MutableColumnPtr tmp = storage_type->create_column(); diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 63682d8c712..78e7e938caa 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -554,6 +554,20 @@ void TabletColumn::init_from_pb(const ColumnPB& column) { } } +TabletColumn TabletColumn::create_materialized_variant_column(const std::string& root, + const std::vector<std::string>& paths, + int32_t parent_unique_id) { + TabletColumn subcol; + subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT); + subcol.set_is_nullable(true); + subcol.set_unique_id(-1); + subcol.set_parent_unique_id(parent_unique_id); + vectorized::PathInData path(root, paths); + subcol.set_path_info(path); + subcol.set_name(path.get_path()); + return subcol; +} + void TabletColumn::to_schema_pb(ColumnPB* column) const { column->set_unique_id(_unique_id); column->set_name(_col_name); diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 3f565680800..21970b5cbac 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -36,6 +36,7 @@ #include "gutil/stringprintf.h" #include "olap/olap_common.h" #include "runtime/define_primitive_type.h" +#include "runtime/descriptors.h" #include "util/string_util.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/common/string_utils/string_utils.h" @@ -91,6 +92,11 @@ public: _type == FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE || _type == FieldType::OLAP_FIELD_TYPE_AGG_STATE; } + // Such columns are not exist in frontend schema info, so we need to + // add them into tablet_schema for later column indexing. + static TabletColumn create_materialized_variant_column(const std::string& root, + const std::vector<std::string>& paths, + int32_t parent_unique_id); bool has_default_value() const { return _has_default_value; } std::string default_value() const { return _default_value; } size_t length() const { return _length; } diff --git a/be/src/runtime/descriptors.cpp b/be/src/runtime/descriptors.cpp index 721950abbee..09113f85eea 100644 --- a/be/src/runtime/descriptors.cpp +++ b/be/src/runtime/descriptors.cpp @@ -85,6 +85,7 @@ SlotDescriptor::SlotDescriptor(const PSlotDescriptor& pdesc) _is_materialized(pdesc.is_materialized()), _is_key(pdesc.is_key()), _need_materialize(true), + _column_paths(pdesc.column_paths().begin(), pdesc.column_paths().end()), _is_auto_increment(pdesc.is_auto_increment()) {} void SlotDescriptor::to_protobuf(PSlotDescriptor* pslot) const { @@ -103,6 +104,9 @@ void SlotDescriptor::to_protobuf(PSlotDescriptor* pslot) const { pslot->set_is_key(_is_key); pslot->set_is_auto_increment(_is_auto_increment); pslot->set_col_type(_col_type); + for (const std::string& path : _column_paths) { + pslot->add_column_paths(path); + } } vectorized::MutableColumnPtr SlotDescriptor::get_empty_mutable_column() const { diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index 8d352ba1d53..bca48737bf5 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -25,6 +25,7 @@ #include <butil/errno.h> #include <butil/iobuf.h> #include <fcntl.h> +#include <fmt/core.h> #include <gen_cpp/MasterService_types.h> #include <gen_cpp/PaloInternalService_types.h> #include <gen_cpp/PlanNodes_types.h> @@ -46,6 +47,7 @@ #include <set> #include <sstream> #include <string> +#include <unordered_map> #include <utility> #include <vector> @@ -73,6 +75,7 @@ #include "olap/rowset/segment_v2/common.h" #include "olap/rowset/segment_v2/inverted_index_desc.h" #include "olap/rowset/segment_v2/segment.h" +#include "olap/rowset/segment_v2/segment_iterator.h" #include "olap/segment_loader.h" #include "olap/storage_engine.h" #include "olap/tablet.h" @@ -1711,6 +1714,38 @@ auto scope_timer_run(Func fn, int64_t* cost) -> decltype(fn()) { return res; } +struct IteratorKey { + int64_t tablet_id; + RowsetId rowset_id; + uint64_t segment_id; + int slot_id; + + // unordered map std::equal_to + bool operator==(const IteratorKey& rhs) const { + return tablet_id == rhs.tablet_id && rowset_id == rhs.rowset_id && + segment_id == rhs.segment_id && slot_id == rhs.slot_id; + } +}; + +struct HashOfIteratorKey { + size_t operator()(const IteratorKey& key) const { + size_t seed = 0; + seed = HashUtil::hash64(&key.tablet_id, sizeof(key.tablet_id), seed); + seed = HashUtil::hash64(&key.rowset_id.hi, sizeof(key.rowset_id.hi), seed); + seed = HashUtil::hash64(&key.rowset_id.mi, sizeof(key.rowset_id.mi), seed); + seed = HashUtil::hash64(&key.rowset_id.lo, sizeof(key.rowset_id.lo), seed); + seed = HashUtil::hash64(&key.segment_id, sizeof(key.segment_id), seed); + seed = HashUtil::hash64(&key.slot_id, sizeof(key.slot_id), seed); + return seed; + } +}; + +struct IteratorItem { + std::unique_ptr<ColumnIterator> iterator; + // for holding the reference of segment to avoid use after release + SegmentSharedPtr segment; +}; + Status PInternalServiceImpl::_multi_get(const PMultiGetRequest& request, PMultiGetResponse* response) { OlapReaderStatistics stats; @@ -1735,6 +1770,7 @@ Status PInternalServiceImpl::_multi_get(const PMultiGetRequest& request, full_read_schema.append_column(TabletColumn(column_pb)); } + std::unordered_map<IteratorKey, IteratorItem, HashOfIteratorKey> iterator_map; // read row by row for (size_t i = 0; i < request.row_locs_size(); ++i) { const auto& row_loc = request.row_locs(i); @@ -1773,8 +1809,8 @@ Status PInternalServiceImpl::_multi_get(const PMultiGetRequest& request, }, &acquire_segments_ms)); // find segment - auto it = std::find_if(segment_cache.get_segments().begin(), - segment_cache.get_segments().end(), + auto it = std::find_if(segment_cache.get_segments().cbegin(), + segment_cache.get_segments().cend(), [&row_loc](const segment_v2::SegmentSharedPtr& seg) { return seg->id() == row_loc.segment_id(); }); @@ -1802,37 +1838,28 @@ Status PInternalServiceImpl::_multi_get(const PMultiGetRequest& request, if (result_block.is_empty_column()) { result_block = vectorized::Block(desc.slots(), request.row_locs().size()); } + VLOG_DEBUG << "Read row location " + << fmt::format("{}, {}, {}, {}", row_location.tablet_id, + row_location.row_location.rowset_id.to_string(), + row_location.row_location.segment_id, + row_location.row_location.row_id); for (int x = 0; x < desc.slots().size(); ++x) { - int index = -1; - if (desc.slots()[x]->col_unique_id() >= 0) { - // light sc enabled - index = full_read_schema.field_index(desc.slots()[x]->col_unique_id()); - } else { - index = full_read_schema.field_index(desc.slots()[x]->col_name()); - } - if (index < 0) { - std::stringstream ss; - ss << "field name is invalid. field=" << desc.slots()[x]->col_name() - << ", field_name_to_index=" << full_read_schema.get_all_field_names(); - return Status::InternalError(ss.str()); - } - std::unique_ptr<segment_v2::ColumnIterator> column_iterator; + auto row_id = static_cast<segment_v2::rowid_t>(row_loc.ordinal_id()); vectorized::MutableColumnPtr column = result_block.get_by_position(x).column->assume_mutable(); - StorageReadOptions storage_read_opt; - storage_read_opt.io_ctx.reader_type = ReaderType::READER_QUERY; - RETURN_IF_ERROR(segment->new_column_iterator(full_read_schema.column(index), - &column_iterator, &storage_read_opt)); - segment_v2::ColumnIteratorOptions opt { - .use_page_cache = !config::disable_storage_page_cache, - .file_reader = segment->file_reader().get(), - .stats = &stats, - .io_ctx = io::IOContext {.reader_type = ReaderType::READER_QUERY}, - }; - static_cast<void>(column_iterator->init(opt)); - std::vector<segment_v2::rowid_t> single_row_loc { - static_cast<segment_v2::rowid_t>(row_loc.ordinal_id())}; - RETURN_IF_ERROR(column_iterator->read_by_rowids(single_row_loc.data(), 1, column)); + IteratorKey iterator_key {.tablet_id = tablet->tablet_id(), + .rowset_id = rowset_id, + .segment_id = row_loc.segment_id(), + .slot_id = desc.slots()[x]->id()}; + IteratorItem& iterator_item = iterator_map[iterator_key]; + if (iterator_item.segment == nullptr) { + // hold the reference + iterator_map[iterator_key].segment = segment; + } + segment = iterator_item.segment; + RETURN_IF_ERROR(segment->seek_and_read_by_rowid(full_read_schema, desc.slots()[x], + row_id, column, stats, + iterator_item.iterator)); } } // serialize block if not empty @@ -1852,11 +1879,13 @@ Status PInternalServiceImpl::_multi_get(const PMultiGetRequest& request, "hit_cached_pages:{}, total_pages_read:{}, compressed_bytes_read:{}, " "io_latency:{}ns, " "uncompressed_bytes_read:{}," + "bytes_read:{}," "acquire_tablet_ms:{}, acquire_rowsets_ms:{}, acquire_segments_ms:{}, " "lookup_row_data_ms:{}", stats.cached_pages_num, stats.total_pages_num, stats.compressed_bytes_read, - stats.io_ns, stats.uncompressed_bytes_read, acquire_tablet_ms, - acquire_rowsets_ms, acquire_segments_ms, lookup_row_data_ms); + stats.io_ns, stats.uncompressed_bytes_read, stats.bytes_read, + acquire_tablet_ms, acquire_rowsets_ms, acquire_segments_ms, + lookup_row_data_ms); return Status::OK(); } @@ -1865,6 +1894,7 @@ void PInternalServiceImpl::multiget_data(google::protobuf::RpcController* contro PMultiGetResponse* response, google::protobuf::Closure* done) { bool ret = _light_work_pool.try_offer([request, response, done, this]() { + signal::set_signal_task_id(request->query_id()); // multi get data by rowid MonotonicStopWatch watch; watch.start(); diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index f44fc62ed98..782a2967f03 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -645,11 +645,12 @@ void ColumnObject::for_each_subcolumn(ColumnCallback callback) { } void ColumnObject::insert_from(const IColumn& src, size_t n) { - const auto& src_v = assert_cast<const ColumnObject&>(src); + const auto* src_v = check_and_get_column<ColumnObject>(src); // optimize when src and this column are scalar variant, since try_insert is inefficiency - if (src_v.is_scalar_variant() && is_scalar_variant() && - src_v.get_root_type()->equals(*get_root_type()) && src_v.is_finalized() && is_finalized()) { - assert_cast<ColumnNullable&>(*get_root()).insert_from(*src_v.get_root(), n); + if (src_v != nullptr && src_v->is_scalar_variant() && is_scalar_variant() && + src_v->get_root_type()->equals(*get_root_type()) && src_v->is_finalized() && + is_finalized()) { + assert_cast<ColumnNullable&>(*get_root()).insert_from(*src_v->get_root(), n); ++num_rows; return; } diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index c6e401f3f54..5566abec3ab 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -58,6 +58,7 @@ #include "vec/exec/scan/new_olap_scan_node.h" #include "vec/exec/scan/vscan_node.h" #include "vec/exprs/vexpr_context.h" +#include "vec/json/path_in_data.h" #include "vec/olap/block_reader.h" namespace doris::vectorized { @@ -411,16 +412,6 @@ Status NewOlapScanner::_init_tablet_reader_params( return Status::OK(); } -vectorized::PathInData NewOlapScanner::_build_path(SlotDescriptor* slot, - const std::string& root_name) { - PathInDataBuilder path_builder; - path_builder.append(root_name, false); - for (const std::string& path : slot->column_paths()) { - path_builder.append(path, false); - } - return path_builder.build(); -} - Status NewOlapScanner::_init_variant_columns() { auto& tablet_schema = _tablet_reader_params.tablet_schema; // Parent column has path info to distinction from each other @@ -434,16 +425,10 @@ Status NewOlapScanner::_init_variant_columns() { if (slot->type().is_variant_type()) { // Such columns are not exist in frontend schema info, so we need to // add them into tablet_schema for later column indexing. - TabletColumn subcol; - subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT); - subcol.set_is_nullable(true); - subcol.set_unique_id(-1); - subcol.set_parent_unique_id(slot->col_unique_id()); - PathInData path = _build_path( - slot, tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case()); - subcol.set_path_info(path); - subcol.set_name(path.get_path()); - if (tablet_schema->field_index(path) < 0) { + TabletColumn subcol = TabletColumn::create_materialized_variant_column( + tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case(), + slot->column_paths(), slot->col_unique_id()); + if (tablet_schema->field_index(subcol.path_info()) < 0) { tablet_schema->append_column(subcol, TabletSchema::ColumnType::VARIANT); } } @@ -465,8 +450,9 @@ Status NewOlapScanner::_init_return_columns() { int32_t index = 0; auto& tablet_schema = _tablet_reader_params.tablet_schema; if (slot->type().is_variant_type()) { - index = tablet_schema->field_index(_build_path( - slot, tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case())); + index = tablet_schema->field_index(PathInData( + tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case(), + slot->column_paths())); } else { index = slot->col_unique_id() >= 0 ? tablet_schema->field_index(slot->col_unique_id()) : tablet_schema->field_index(slot->col_name()); diff --git a/be/src/vec/exec/scan/new_olap_scanner.h b/be/src/vec/exec/scan/new_olap_scanner.h index ae094b84f03..9eab71f3c34 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.h +++ b/be/src/vec/exec/scan/new_olap_scanner.h @@ -93,7 +93,6 @@ private: const std::vector<FunctionFilter>& function_filters); [[nodiscard]] Status _init_return_columns(); - vectorized::PathInData _build_path(SlotDescriptor* slot, const std::string& root_name); [[nodiscard]] Status _init_variant_columns(); std::vector<OlapScanRange*> _key_ranges; diff --git a/be/src/vec/json/path_in_data.cpp b/be/src/vec/json/path_in_data.cpp index 1c02febd446..ae91b444994 100644 --- a/be/src/vec/json/path_in_data.cpp +++ b/be/src/vec/json/path_in_data.cpp @@ -46,11 +46,22 @@ PathInData::PathInData(const PathInData& other) : path(other.path) { build_parts(other.get_parts()); } +PathInData::PathInData(const std::string& root, const std::vector<std::string>& paths) { + PathInDataBuilder path_builder; + path_builder.append(root, false); + for (const std::string& path : paths) { + path_builder.append(path, false); + } + build_path(path_builder.get_parts()); + build_parts(path_builder.get_parts()); +} + PathInData::PathInData(const std::vector<std::string>& paths) { PathInDataBuilder path_builder; for (size_t i = 0; i < paths.size(); ++i) { path_builder.append(paths[i], false); } + build_path(path_builder.get_parts()); build_parts(path_builder.get_parts()); } diff --git a/be/src/vec/json/path_in_data.h b/be/src/vec/json/path_in_data.h index 6531a8bfc6a..267ab1fab3f 100644 --- a/be/src/vec/json/path_in_data.h +++ b/be/src/vec/json/path_in_data.h @@ -61,6 +61,7 @@ public: explicit PathInData(std::string_view path_); explicit PathInData(const Parts& parts_); explicit PathInData(const std::vector<std::string>& paths); + explicit PathInData(const std::string& root, const std::vector<std::string>& paths); PathInData(const PathInData& other); PathInData& operator=(const PathInData& other); static UInt128 get_parts_hash(const Parts& parts_); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java index 893d4c93239..dc3c2f52d81 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java @@ -829,8 +829,7 @@ public class SelectStmt extends QueryStmt { LOG.debug("only support duplicate key or MOW model"); return false; } - if (!olapTable.getEnableLightSchemaChange() || !Strings.isNullOrEmpty(olapTable.getStoragePolicy()) - || olapTable.hasVariantColumns()) { + if (!olapTable.getEnableLightSchemaChange()) { return false; } if (getOrderByElements() != null) { diff --git a/gensrc/proto/descriptors.proto b/gensrc/proto/descriptors.proto index 270199cc020..8762a78c0f4 100644 --- a/gensrc/proto/descriptors.proto +++ b/gensrc/proto/descriptors.proto @@ -38,6 +38,7 @@ message PSlotDescriptor { optional bool is_key = 12; optional bool is_auto_increment = 13; optional int32 col_type = 14 [default = 0]; + repeated string column_paths = 15; }; message PTupleDescriptor { diff --git a/regression-test/data/variant_p0/load.out b/regression-test/data/variant_p0/load.out index e48aafe2e0d..7def94d2fad 100644 --- a/regression-test/data/variant_p0/load.out +++ b/regression-test/data/variant_p0/load.out @@ -143,7 +143,7 @@ [123] -- !sql_25 -- -50000 55000.00000000013 6150000 +50000 55000.000000002256 6150000 -- !sql_26 -- 5000 @@ -227,16 +227,16 @@ \N \N \N -- !sql_36_2 -- -7702 {"payload":{"commits":[{"sha":"348743fdce27d3f3c97e366381b1b7b371fc4510","author":{"email":"9b7a0973fc99779f7e1822eb7336ff5d28bd2...@users.noreply.github.com","name":"Łukasz Magiera"},"message":"Create README.md","distinct":true,"url":"https://api.github.com/repos/magik6k/BitBuffer/commits/348743fdce27d3f3c97e366381b1b7b371fc4510"}],"before":"4e150694dacd35e7d5cda4e9f6a2aedb1d35db36","head":"348743fdce27d3f3c97e366381b1b7b371fc4510","size":1,"push_id":536752118,"ref":"refs/heads/mas [...] -7701 {"payload":{"pages":[{"page_name":"Android Development Basics","title":"Android Development Basics","summary":null,"action":"edited","sha":"e4e947a4f29b1a06f560ac1e62bd3bf183e434b6","html_url":"https://github.com/wllmtrng/wllmtrng.github.io/wiki/Android-Development-Basics"}]},"created_at":"2015-01-01T00:59:58Z","id":"2489395760","public":1,"actor":{"gravatar_id":"","url":"https://api.github.com/users/wllmtrng","id":1335855,"login":"wllmtrng","avatar_url":"https://avatars.githubuserc [...] -7700 {"payload":{"forkee":{"svn_url":"https://github.com/WangXYZ/TBC","pushed_at":"2014-12-24T18:26:11Z","issues_url":"https://api.github.com/repos/WangXYZ/TBC/issues{/number}","events_url":"https://api.github.com/repos/WangXYZ/TBC/events","labels_url":"https://api.github.com/repos/WangXYZ/TBC/labels{/name}","releases_url":"https://api.github.com/repos/WangXYZ/TBC/releases{/id}","keys_url":"https://api.github.com/repos/WangXYZ/TBC/keys{/key_id}","stargazers_url":"https://api.github.com/r [...] -7699 {"payload":{"description":"Game using the MS Kinect that scans the user and creates a 3D version of them.","ref":"master","ref_type":"branch","pusher_type":"user","master_branch":"master"},"created_at":"2015-01-01T00:59:57Z","id":"2489395752","public":1,"actor":{"gravatar_id":"","url":"https://api.github.com/users/chrisjimenez","id":3580593,"login":"chrisjimenez","avatar_url":"https://avatars.githubusercontent.com/u/3580593?"},"repo":{"url":"https://api.github.com/repos/chrisjimenez [...] -7698 {"payload":{"issue":{"url":"https://api.github.com/repos/antonioortegajr/beerfind.me/issues/12","created_at":"2015-01-01T00:59:56Z","body":"The bee that are being searched for currently should also have logos and descriptions from the API.","events_url":"https://api.github.com/repos/antonioortegajr/beerfind.me/issues/12/events","labels_url":"https://api.github.com/repos/antonioortegajr/beerfind.me/issues/12/labels{/name}","locked":0,"state":"open","comments_url":"https://api.github. [...] -7697 {"payload":{"action":"closed","pull_request":{"review_comments_url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/pulls/534/comments","url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs/pulls/534","body":"","patch_url":"https://github.com/XLabs/Xamarin-Forms-Labs/pull/534.patch","head":{"ref":"master","user":{"starred_url":"https://api.github.com/users/bokmadsen/starred{/owner}{/repo}","url":"https://api.github.com/users/bokmadsen","repos_url":"https://api.github.c [...] -7696 {"payload":{"commits":[{"sha":"c356eac8fa4409a1aa794ab07244250a862da03b","author":{"email":"de8898f6c55e335aa0a2b937fae65fb756ee0...@gmail.com","name":"Zaryafaraj"},"message":"reserve modal styles","distinct":true,"url":"https://api.github.com/repos/Fathalian/Guild/commits/c356eac8fa4409a1aa794ab07244250a862da03b"},{"sha":"9a773fc648910c7a2499401f44a6e5f71eb30460","author":{"email":"de8898f6c55e335aa0a2b937fae65fb756ee0...@gmail.com","name":"Zaryafaraj"},"message":"Merge branch 'mas [...] -7695 {"payload":{"commits":[{"sha":"17cf9ea07662d74b2d5bac1cf976f5853a63920d","author":{"email":"af59d1d6805404937849f05dafd5a911888fd...@gmail.com","name":"Kevin Hofmaenner"},"message":"minor UI tweak","distinct":true,"url":"https://api.github.com/repos/kevinhofmaenner/blackjack/commits/17cf9ea07662d74b2d5bac1cf976f5853a63920d"}],"before":"6b4f5af0a2a70bbe00cd88281823b436d506ff2d","head":"17cf9ea07662d74b2d5bac1cf976f5853a63920d","size":1,"push_id":536752112,"ref":"refs/heads/master","d [...] -7694 {"payload":{"commits":[{"sha":"aa8ec0de017c8003758776739facc819e33ac7c9","author":{"email":"e0e04a2320844b42511db0376599e166ab5bd...@gmail.com","name":"Runhang Li"},"message":"finish all hamms test","distinct":true,"url":"https://api.github.com/repos/marklrh/ocaml-cohttp-test/commits/aa8ec0de017c8003758776739facc819e33ac7c9"}],"before":"2bb795fc30fc15ab85bcc10f894bfcfa118d69bc","head":"aa8ec0de017c8003758776739facc819e33ac7c9","size":1,"push_id":536752109,"ref":"refs/heads/master"," [...] -7693 {"payload":{"commits":[{"sha":"0b27989723feb4b183d5f87813fef146b670b1d1","author":{"email":"7c5a0c567b5584a13fde407456875318a5bec...@gmail.com","name":"Raphaël Benitte"},"message":"Add time clock widget + Improve stylus theming","distinct":true,"url":"https://api.github.com/repos/plouc/mozaik/commits/0b27989723feb4b183d5f87813fef146b670b1d1"}],"before":"7ddf17eb74fff5adad6e2feb72bcb627d4644800","head":"0b27989723feb4b183d5f87813fef146b670b1d1","size":1,"push_id":536752104,"ref":"ref [...] +7702 {"url":"https://api.github.com/repos/magik6k/BitBuffer","id":28677864,"name":"magik6k/BitBuffer"} +7701 {"url":"https://api.github.com/repos/wllmtrng/wllmtrng.github.io","id":18089434,"name":"wllmtrng/wllmtrng.github.io"} +7700 {"url":"https://api.github.com/repos/ACID-Scripts/TBC","id":23724137,"name":"ACID-Scripts/TBC"} +7699 {"url":"https://api.github.com/repos/chrisjimenez/IpsePuppet","id":28678128,"name":"chrisjimenez/IpsePuppet"} +7698 {"url":"https://api.github.com/repos/antonioortegajr/beerfind.me","id":28573267,"name":"antonioortegajr/beerfind.me"} +7697 {"url":"https://api.github.com/repos/XLabs/Xamarin-Forms-Labs","id":20463939,"name":"XLabs/Xamarin-Forms-Labs"} +7696 {"url":"https://api.github.com/repos/Fathalian/Guild","id":26995510,"name":"Fathalian/Guild"} +7695 {"url":"https://api.github.com/repos/kevinhofmaenner/blackjack","id":28652857,"name":"kevinhofmaenner/blackjack"} +7694 {"url":"https://api.github.com/repos/marklrh/ocaml-cohttp-test","id":27470715,"name":"marklrh/ocaml-cohttp-test"} +7693 {"url":"https://api.github.com/repos/plouc/mozaik","id":28498113,"name":"plouc/mozaik"} -- !sql_36_3 -- 2 {"updated_value":10} diff --git a/regression-test/data/variant_p0/sql/gh_data.out b/regression-test/data/variant_p0/sql/gh_data.out index 7fc5e10dbe1..4c34edabe48 100644 --- a/regression-test/data/variant_p0/sql/gh_data.out +++ b/regression-test/data/variant_p0/sql/gh_data.out @@ -3,31 +3,37 @@ 0 -- !gh_data_2 -- -5000 +0 -- !gh_data_3 -- +0 + +-- !gh_data_4 -- +5000 + +-- !gh_data_5 -- leonardomso/33-js-concepts 3 ytdl-org/youtube-dl 3 Bogdanp/neko 2 bminossi/AllVideoPocsFromHackerOne 2 disclose/diodata 2 --- !gh_data_4 -- +-- !gh_data_6 -- 14690758274 --- !gh_data_5 -- +-- !gh_data_7 -- 73453762334584 --- !gh_data_6 -- +-- !gh_data_8 -- 457806339 --- !gh_data_7 -- +-- !gh_data_9 -- 0 --- !gh_data_8 -- +-- !gh_data_10 -- 19829 --- !gh_data_9 -- +-- !gh_data_11 -- 49390617 64890096 10696700 @@ -39,19 +45,19 @@ disclose/diodata 2 42386044 73801003 --- !gh_data_10 -- -27 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:27Z","id":"14690746717","public":1,"actor":{"gravatar_id":"","display_login":"sergdudnik","url":"https://api.github.com/users/sergdudnik","id":16341546,"login":"sergdudnik","avatar_url":"https://avatars.githubusercontent.com/u/16341546?"},"repo":{"url":"https://api.github.com/repos/leonardomso/33-js-concepts","id":147350463,"name":"leonardomso/33-js-concepts"},"type":"WatchEvent"} -36 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:27Z","id":"14690746732","public":1,"actor":{"gravatar_id":"","display_login":"juliusHuelsmann","url":"https://api.github.com/users/juliusHuelsmann","id":9212314,"login":"juliusHuelsmann","avatar_url":"https://avatars.githubusercontent.com/u/9212314?"},"repo":{"url":"https://api.github.com/repos/odeke-em/drive","id":26109545,"name":"odeke-em/drive"},"type":"WatchEvent"} -46 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:27Z","id":"14690746749","org":{"gravatar_id":"","url":"https://api.github.com/orgs/GO-LiFE","id":38434522,"login":"GO-LiFE","avatar_url":"https://avatars.githubusercontent.com/u/38434522?"},"public":1,"actor":{"gravatar_id":"","display_login":"okbean","url":"https://api.github.com/users/okbean","id":75969386,"login":"okbean","avatar_url":"https://avatars.githubusercontent.com/u/75969386?"},"repo":{"url":"https://api.github [...] -56 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:28Z","id":"14690746773","public":1,"actor":{"gravatar_id":"","display_login":"PWDream","url":"https://api.github.com/users/PWDream","id":4903755,"login":"PWDream","avatar_url":"https://avatars.githubusercontent.com/u/4903755?"},"repo":{"url":"https://api.github.com/repos/MrXujiang/h5-Dooring","id":289417971,"name":"MrXujiang/h5-Dooring"},"type":"WatchEvent"} -86 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:29Z","id":"14690746843","public":1,"actor":{"gravatar_id":"","display_login":"Gui-Yom","url":"https://api.github.com/users/Gui-Yom","id":25181283,"login":"Gui-Yom","avatar_url":"https://avatars.githubusercontent.com/u/25181283?"},"repo":{"url":"https://api.github.com/repos/redsaph/cleartext","id":106453399,"name":"redsaph/cleartext"},"type":"WatchEvent"} -98 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:29Z","id":"14690746866","org":{"gravatar_id":"","url":"https://api.github.com/orgs/sherlock-project","id":48293496,"login":"sherlock-project","avatar_url":"https://avatars.githubusercontent.com/u/48293496?"},"public":1,"actor":{"gravatar_id":"","display_login":"humaidk2","url":"https://api.github.com/users/humaidk2","id":12982026,"login":"humaidk2","avatar_url":"https://avatars.githubusercontent.com/u/12982026?"},"repo":{" [...] -101 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:29Z","id":"14690746870","public":1,"actor":{"gravatar_id":"","display_login":"hasantezcan","url":"https://api.github.com/users/hasantezcan","id":32804505,"login":"hasantezcan","avatar_url":"https://avatars.githubusercontent.com/u/32804505?"},"repo":{"url":"https://api.github.com/repos/okandavut/react-spotify-nowplaying","id":326215605,"name":"okandavut/react-spotify-nowplaying"},"type":"WatchEvent"} -112 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:30Z","id":"14690746899","public":1,"actor":{"gravatar_id":"","display_login":"nicholas-robertson","url":"https://api.github.com/users/nicholas-robertson","id":17681331,"login":"nicholas-robertson","avatar_url":"https://avatars.githubusercontent.com/u/17681331?"},"repo":{"url":"https://api.github.com/repos/sentriz/gonic","id":178435468,"name":"sentriz/gonic"},"type":"WatchEvent"} -122 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:30Z","id":"14690746914","org":{"gravatar_id":"","url":"https://api.github.com/orgs/netlify-labs","id":47546088,"login":"netlify-labs","avatar_url":"https://avatars.githubusercontent.com/u/47546088?"},"public":1,"actor":{"gravatar_id":"","display_login":"javaniecampbell","url":"https://api.github.com/users/javaniecampbell","id":1676496,"login":"javaniecampbell","avatar_url":"https://avatars.githubusercontent.com/u/1676496? [...] -169 {"payload":{"action":"started"},"created_at":"2021-01-02T16:37:32Z","id":"14690747028","org":{"gravatar_id":"","url":"https://api.github.com/orgs/microsoft","id":6154722,"login":"microsoft","avatar_url":"https://avatars.githubusercontent.com/u/6154722?"},"public":1,"actor":{"gravatar_id":"","display_login":"Yxnt","url":"https://api.github.com/users/Yxnt","id":10323352,"login":"Yxnt","avatar_url":"https://avatars.githubusercontent.com/u/10323352?"},"repo":{"url":"https://api.github.co [...] +-- !gh_data_12 -- +27 {"url":"https://api.github.com/repos/leonardomso/33-js-concepts","id":147350463,"name":"leonardomso/33-js-concepts"} +36 {"url":"https://api.github.com/repos/odeke-em/drive","id":26109545,"name":"odeke-em/drive"} +46 {"url":"https://api.github.com/repos/GO-LiFE/GoFIT_SDK_Android","id":141905736,"name":"GO-LiFE/GoFIT_SDK_Android"} +56 {"url":"https://api.github.com/repos/MrXujiang/h5-Dooring","id":289417971,"name":"MrXujiang/h5-Dooring"} +86 {"url":"https://api.github.com/repos/redsaph/cleartext","id":106453399,"name":"redsaph/cleartext"} +98 {"url":"https://api.github.com/repos/sherlock-project/sherlock","id":162998479,"name":"sherlock-project/sherlock"} +101 {"url":"https://api.github.com/repos/okandavut/react-spotify-nowplaying","id":326215605,"name":"okandavut/react-spotify-nowplaying"} +112 {"url":"https://api.github.com/repos/sentriz/gonic","id":178435468,"name":"sentriz/gonic"} +122 {"url":"https://api.github.com/repos/netlify-labs/react-netlify-identity-widget","id":182606378,"name":"netlify-labs/react-netlify-identity-widget"} +169 {"url":"https://api.github.com/repos/microsoft/BotBuilder-Samples","id":68730444,"name":"microsoft/BotBuilder-Samples"} --- !gh_data_11 -- +-- !gh_data_13 -- 2051941 1 10696700 1 32271952 2 @@ -63,3 +69,27 @@ disclose/diodata 2 64890096 1 73801003 1 +-- !gh_data_14 -- +27 14690746717 WatchEvent leonardomso/33-js-concepts +36 14690746732 WatchEvent odeke-em/drive +46 14690746749 WatchEvent GO-LiFE/GoFIT_SDK_Android +56 14690746773 WatchEvent MrXujiang/h5-Dooring +86 14690746843 WatchEvent redsaph/cleartext +98 14690746866 WatchEvent sherlock-project/sherlock +101 14690746870 WatchEvent okandavut/react-spotify-nowplaying +112 14690746899 WatchEvent sentriz/gonic +122 14690746914 WatchEvent netlify-labs/react-netlify-identity-widget +169 14690747028 WatchEvent microsoft/BotBuilder-Samples + +-- !gh_data_15 -- +user +user +user +user +user +user +user +user +user +user + diff --git a/regression-test/suites/variant_p0/load.groovy b/regression-test/suites/variant_p0/load.groovy index 0501d3ce52f..b23ae19d12f 100644 --- a/regression-test/suites/variant_p0/load.groovy +++ b/regression-test/suites/variant_p0/load.groovy @@ -287,7 +287,7 @@ suite("regression_test_variant", "variant_type"){ // 12. streamload remote file table_name = "logdata" create_table.call(table_name, "4") - sql "set enable_two_phase_read_opt = false;" + // sql "set enable_two_phase_read_opt = false;" // no sparse columns set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1") load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""") @@ -340,7 +340,7 @@ suite("regression_test_variant", "variant_type"){ qt_sql_36_1 "select cast(v:a as int), cast(v:b as int), cast(v:c as int) from ${table_name} order by k limit 10" sql "DELETE FROM ${table_name} WHERE k=1" sql "select * from ${table_name}" - qt_sql_36_2 "select * from ${table_name} where k > 3 order by k desc limit 10" + qt_sql_36_2 """select k, json_extract(cast(v as text), "\$.repo") from ${table_name} where k > 3 order by k desc limit 10""" sql "insert into ${table_name} select * from ${table_name}" sql """UPDATE ${table_name} set v = '{"updated_value" : 10}' where k = 2""" qt_sql_36_3 """select * from ${table_name} where k = 2""" @@ -386,13 +386,13 @@ suite("regression_test_variant", "variant_type"){ sql """insert into ${table_name} values (2, "abe", '{"c" : 1}')""" sql """insert into ${table_name} values (3, "abd", '{"d" : 1}')""" sql "delete from ${table_name} where k in (select k from variant_mow where k in (1, 2))" - qt_sql_38 "select * from ${table_name} order by k" + qt_sql_38 "select * from ${table_name} order by k limit 10" // read text from sparse col set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") sql """insert into sparse_columns select 0, '{"a": 1123, "b" : [123, {"xx" : 1}], "c" : {"c" : 456, "d" : null, "e" : 7.111}, "zzz" : null, "oooo" : {"akakaka" : null, "xxxx" : {"xxx" : 123}}}' as json_str union all select 0, '{"a" : 1234, "xxxx" : "kaana", "ddd" : {"aaa" : 123, "mxmxm" : [456, "789"]}}' as json_str from numbers("number" = "4096") limit 4096 ;""" - qt_sql_31 """select cast(v:xxxx as string) from sparse_columns where cast(v:xxxx as string) != 'null' limit 1;""" + qt_sql_31 """select cast(v:xxxx as string) from sparse_columns where cast(v:xxxx as string) != 'null' order by k limit 1;""" sql "truncate table sparse_columns" set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95") } finally { diff --git a/regression-test/suites/variant_p0/sql/gh_data.sql b/regression-test/suites/variant_p0/sql/gh_data.sql index 72a3adb01cf..2fc0ccac14a 100644 --- a/regression-test/suites/variant_p0/sql/gh_data.sql +++ b/regression-test/suites/variant_p0/sql/gh_data.sql @@ -1,4 +1,6 @@ set exec_mem_limit=8G; +set enable_two_phase_read_opt = true; +set topn_opt_limit_threshold = 1024; SELECT count() from ghdata; SELECT cast(v:repo.name as string), count() AS stars FROM ghdata WHERE cast(v:type as string) = 'WatchEvent' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC, cast(v:repo.name as string) LIMIT 5; SELECT max(cast(cast(v:`id` as string) as bigint)) FROM ghdata; @@ -6,8 +8,9 @@ SELECT sum(cast(cast(v:`id` as string) as bigint)) FROM ghdata; SELECT sum(cast(v:payload.member.id as bigint)) FROM ghdata; SELECT sum(cast(v:payload.pull_request.milestone.creator.site_admin as bigint)) FROM ghdata; SELECT sum(length(v:payload.pull_request.base.repo.html_url)) FROM ghdata; --- SELECT v:payload.commits.author.name FROM ghdata ORDER BY k LIMIT 10; SELECT v:payload.member.id FROM ghdata where cast(v:payload.member.id as string) is not null ORDER BY k LIMIT 10; -- select k, v:payload.commits.author.name AS name, e FROM ghdata as t lateral view explode(cast(v:payload.commits.author.name as array<string>)) tm1 as e order by k limit 5; -select k, v from ghdata WHERE cast(v:type as string) = 'WatchEvent' order by k limit 10; -SELECT cast(v:payload.member.id as bigint), count() FROM ghdata where cast(v:payload.member.id as bigint) is not null group by cast(v:payload.member.id as bigint) order by 1, 2 desc LIMIT 10; \ No newline at end of file +select k, json_extract(v, '$.repo') from ghdata WHERE cast(v:type as string) = 'WatchEvent' order by k limit 10; +SELECT cast(v:payload.member.id as bigint), count() FROM ghdata where cast(v:payload.member.id as bigint) is not null group by cast(v:payload.member.id as bigint) order by 1, 2 desc LIMIT 10; +select k, cast(v:`id` as string), cast(v:type as string), cast(v:repo.name as string) from ghdata WHERE cast(v:type as string) = 'WatchEvent' order by k limit 10; +SELECT cast(v:payload.pusher_type as text) FROM ghdata where cast(v:payload.pusher_type as text) is not null ORDER BY k LIMIT 10; \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org