Copilot commented on code in PR #63765: URL: https://github.com/apache/doris/pull/63765#discussion_r3314811677
########## be/src/exprs/function/variant_inverted_index_search.cpp: ########## @@ -0,0 +1,720 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/function/variant_inverted_index_search.h" + +#include <CLucene/config/repl_wchar.h> +#include <fmt/format.h> +#include <glog/logging.h> + +#include <limits> +#include <memory> +#include <utility> + +#include "common/config.h" +#include "common/exception.h" +#include "common/logging.h" +#include "exprs/function/function_search.h" +#include "exprs/vexpr_context.h" +#include "runtime/runtime_state.h" +#include "storage/index/index_file_reader.h" +#include "storage/index/inverted/analyzer/analyzer.h" +#include "storage/index/inverted/inverted_index_compound_reader.h" +#include "storage/index/inverted/inverted_index_parser.h" +#include "storage/index/inverted/inverted_index_searcher.h" +#include "storage/index/inverted/query_v2/bit_set_query/bit_set_scorer.h" +#include "storage/index/inverted/query_v2/doc_set.h" +#include "storage/index/inverted/query_v2/scorer.h" +#include "storage/index/inverted/query_v2/term_query/term_query.h" +#include "storage/index/inverted/query_v2/weight.h" +#include "storage/index/inverted/util/string_helper.h" +#include "storage/segment/segment.h" +#include "storage/segment/variant/nested_group_path.h" +#include "storage/segment/variant/nested_group_provider.h" +#include "storage/segment/variant/variant_column_reader.h" +#include "storage/utils.h" +#include "util/debug_points.h" +#include "util/time.h" + +namespace doris { + +namespace query_v2 = segment_v2::inverted_index::query_v2; + +namespace { + +void add_search_binding_diagnostic(const std::shared_ptr<IndexQueryContext>& context, + const std::string& diagnostic) { + VLOG_DEBUG << diagnostic; + if (context != nullptr && context->stats != nullptr) { + context->stats->inverted_index_stats.add_binding_diagnostic(diagnostic); + } +} + +} // namespace + +FieldReaderResolver::FieldReaderResolver( + const std::unordered_map<std::string, IndexFieldNameAndTypePair>& data_type_with_names, + const std::unordered_map<std::string, IndexIterator*>& iterators, + std::shared_ptr<IndexQueryContext> context, + const std::vector<TSearchFieldBinding>& field_bindings) + : _data_type_with_names(data_type_with_names), + _iterators(iterators), + _context(std::move(context)), + _field_bindings(field_bindings) { + for (const auto& binding : _field_bindings) { + if (binding.__isset.is_variant_subcolumn && binding.is_variant_subcolumn) { + _variant_subcolumn_fields.insert(binding.field_name); + } + _field_binding_map[binding.field_name] = &binding; + } +} + +Status FieldReaderResolver::resolve(const std::string& field_name, + InvertedIndexQueryType query_type, + FieldReaderBinding* binding) { + DCHECK(binding != nullptr); + + const bool is_variant_sub = is_variant_subcolumn(field_name); + + auto data_it = _data_type_with_names.find(field_name); + if (data_it == _data_type_with_names.end()) { + if (is_variant_sub) { + add_search_binding_diagnostic( + _context, + fmt::format("[VariantSearchBinding] phase=field_resolve result=no_metadata " + "logical_field={} query_type={} reason=field_not_found", + field_name, query_type_to_string(query_type))); + *binding = FieldReaderBinding(); + return Status::OK(); + } + return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( + "field '{}' not found in inverted index metadata", field_name); + } + + const auto& stored_field_name = data_it->second.first; + const auto binding_key = binding_key_for(stored_field_name, query_type); + + auto cache_it = _cache.find(binding_key); + if (cache_it != _cache.end()) { + *binding = cache_it->second; + return Status::OK(); + } + + auto iterator_it = _iterators.find(field_name); + if (iterator_it == _iterators.end() || iterator_it->second == nullptr) { + if (is_variant_sub) { + add_search_binding_diagnostic( + _context, + fmt::format("[VariantSearchBinding] phase=field_resolve result=no_iterator " + "logical_field={} stored_field={} query_type={} " + "reason=iterator_not_found", + field_name, stored_field_name, query_type_to_string(query_type))); + *binding = FieldReaderBinding(); + return Status::OK(); + } + return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( + "iterator not found for field '{}'", field_name); + } + + auto* inverted_iterator = dynamic_cast<InvertedIndexIterator*>(iterator_it->second); + if (inverted_iterator == nullptr) { + return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( + "iterator for field '{}' is not InvertedIndexIterator", field_name); + } + + InvertedIndexQueryType effective_query_type = query_type; + const auto& column_type = data_it->second.second; + const bool is_text_field = + column_type != nullptr && is_string_type(column_type->get_storage_field_type()); + auto fb_it = _field_binding_map.find(field_name); + std::string analyzer_key; + if (is_text_field && is_variant_sub && fb_it != _field_binding_map.end() && + fb_it->second->__isset.index_properties && !fb_it->second->index_properties.empty()) { Review Comment: This guard prevents analyzer-key binding for nested Variant string subcolumns whose `column_type` is an `ARRAY` wrapper (for example `array<nullable<string>>`). The previous resolver used FE-provided `index_properties` for all Variant subcolumns; with this check, analyzer-based Variant bindings can fall back to an empty analyzer key and pick the wrong reader when multiple inherited indexes exist. Consider unwrapping nullable/array types before the text check, or applying the FE `index_properties` path for Variant subcolumns regardless of the outer storage type. ########## be/src/exprs/vsearch.cpp: ########## @@ -252,6 +303,11 @@ Status VSearchExpr::evaluate_inverted_index(VExprContext* context, uint32_t segm if (bundle.iterators.empty() && !is_nested_query) { Review Comment: When every referenced field is an unmaterialized Variant subcolumn, `collect_search_inputs` leaves `bundle.iterators` empty and this branch returns an empty result before `FunctionSearch` can build the new UNKNOWN query/null bitmap for the missing fields. That makes predicates on missing Variant paths evaluate as no matches instead of using the all-rows null bitmap semantics added in this PR. Let Variant field bindings fall through to `FunctionSearch`, or synthesize the same UNKNOWN result here when the search parameters contain Variant subcolumn bindings. ########## be/test/exprs/function/function_search_test.cpp: ########## @@ -1630,6 +1736,363 @@ TEST_F(FunctionSearchTest, TestBuildLeafQueryPhrase) { EXPECT_NE(phrase_query, nullptr); } +TEST_F(FunctionSearchTest, TestBuildLeafQueryVariantMissingFieldReturnsUnknown) { + TSearchClause clause; + clause.clause_type = "TERM"; + clause.field_name = "var.items.missing"; + clause.value = "value"; + clause.__isset.field_name = true; + clause.__isset.value = true; + + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + std::unordered_map<std::string, IndexIterator*> iterators; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.missing"; + field_binding.is_variant_subcolumn = true; + field_binding.__isset.is_variant_subcolumn = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + bool mapper_called = false; + resolver.set_leaf_query_mapper([&](const std::string& logical_field, + inverted_index::query_v2::QueryPtr* query) -> Status { + mapper_called = true; + EXPECT_EQ("var.items.missing", logical_field); + EXPECT_NE(nullptr, query); + EXPECT_NE(nullptr, *query); + return Status::OK(); + }); + + inverted_index::query_v2::QueryPtr out; + std::string out_binding_key; + Status st = function_search->build_leaf_query(clause, context, resolver, &out, &out_binding_key, + "OR", 0, 5); + ASSERT_TRUE(st.ok()); + ASSERT_NE(out, nullptr); + EXPECT_TRUE(mapper_called); + EXPECT_TRUE(out_binding_key.empty()); + + auto weight = out->weight(false); + ASSERT_NE(weight, nullptr); + inverted_index::query_v2::QueryExecutionContext exec_ctx; + exec_ctx.segment_num_rows = 5; + auto scorer = weight->scorer(exec_ctx); + ASSERT_NE(scorer, nullptr); + EXPECT_EQ(inverted_index::query_v2::TERMINATED, scorer->doc()); + ASSERT_TRUE(scorer->has_null_bitmap()); + const auto* null_bitmap = scorer->get_null_bitmap(); + ASSERT_NE(null_bitmap, nullptr); + EXPECT_EQ(5u, null_bitmap->cardinality()); +} + +TEST_F(FunctionSearchTest, TestFieldReaderResolverVariantSubcolumnWithMissingIterator) { + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + data_type_with_names.emplace( + "var.items.level", + IndexFieldNameAndTypePair {"1.var.items.level", std::make_shared<DataTypeInt32>()}); + std::unordered_map<std::string, IndexIterator*> iterators; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.level"; + field_binding.is_variant_subcolumn = true; + field_binding.__isset.is_variant_subcolumn = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + FieldReaderBinding binding; + auto status = + resolver.resolve("var.items.level", InvertedIndexQueryType::EQUAL_QUERY, &binding); + + ASSERT_TRUE(status.ok()); + EXPECT_FALSE(binding.is_bound()); + EXPECT_TRUE(resolver.binding_cache().empty()); +} + +TEST_F(FunctionSearchTest, TestFieldReaderResolverVariantSubcolumnWithReaderSelectionError) { + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + data_type_with_names.emplace( + "var.items.level", + IndexFieldNameAndTypePair {"1.var.items.level", std::make_shared<DataTypeInt32>()}); + + segment_v2::InvertedIndexIterator iterator; + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.level"] = &iterator; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.level"; + field_binding.is_variant_subcolumn = true; + field_binding.__isset.is_variant_subcolumn = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + FieldReaderBinding binding; + auto status = + resolver.resolve("var.items.level", InvertedIndexQueryType::EQUAL_QUERY, &binding); + + EXPECT_FALSE(status.ok()); + EXPECT_EQ(ErrorCode::INVERTED_INDEX_NO_TERMS, status.code()); +} + +TEST_F(FunctionSearchTest, + TestFieldReaderResolverVariantAnalyzerUpgradeWithMissingIndexFileReader) { + auto context = std::make_shared<IndexQueryContext>(); + + std::map<std::string, std::string> properties; + properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_STANDARD; + auto index_meta = make_test_inverted_index(11, properties); + auto reader = std::make_shared<DummyInvertedIndexReader>( + &index_meta, nullptr, segment_v2::InvertedIndexReaderType::FULLTEXT); + + segment_v2::InvertedIndexIterator iterator; + iterator.add_reader(segment_v2::InvertedIndexReaderType::FULLTEXT, reader); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + data_type_with_names.emplace( + "var.items.msg", + IndexFieldNameAndTypePair {"1.var.items.msg", std::make_shared<DataTypeString>()}); + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.msg"] = &iterator; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.msg"; + field_binding.is_variant_subcolumn = true; + field_binding.index_properties = properties; + field_binding.__isset.is_variant_subcolumn = true; + field_binding.__isset.index_properties = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + FieldReaderBinding binding; + auto status = resolver.resolve("var.items.msg", InvertedIndexQueryType::EQUAL_QUERY, &binding); + + EXPECT_FALSE(status.ok()); + EXPECT_EQ(ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND, status.code()); +} + +TEST_F(FunctionSearchTest, TestFieldReaderResolverVariantBkdDirectReader) { + auto context = std::make_shared<IndexQueryContext>(); + + auto index_meta = make_test_inverted_index(12); + auto index_file_reader = std::make_shared<segment_v2::IndexFileReader>( + nullptr, "/tmp/variant_direct_idx", InvertedIndexStorageFormatPB::V2); + auto reader = std::make_shared<DummyInvertedIndexReader>( + &index_meta, index_file_reader, segment_v2::InvertedIndexReaderType::BKD); + + segment_v2::InvertedIndexIterator iterator; + iterator.add_reader(segment_v2::InvertedIndexReaderType::BKD, reader); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + data_type_with_names.emplace( + "var.items.level", + IndexFieldNameAndTypePair {"1.var.items.level", std::make_shared<DataTypeInt32>()}); + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.level"] = &iterator; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.level"; + field_binding.is_variant_subcolumn = true; + field_binding.__isset.is_variant_subcolumn = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + FieldReaderBinding binding; + auto status = + resolver.resolve("var.items.level", InvertedIndexQueryType::EQUAL_QUERY, &binding); + + ASSERT_TRUE(status.ok()) << status.to_string(); + EXPECT_TRUE(binding.use_direct_index_reader()); + EXPECT_EQ(reader, binding.inverted_reader); + EXPECT_EQ("var.items.level", binding.logical_field_name); + EXPECT_EQ("1.var.items.level", binding.stored_field_name); + EXPECT_EQ(InvertedIndexQueryType::EQUAL_QUERY, binding.query_type); + + const auto& cache = resolver.binding_cache(); + ASSERT_EQ(1u, cache.size()); + EXPECT_TRUE(cache.begin()->second.use_direct_index_reader()); +} + +TEST_F(FunctionSearchTest, TestBuildLeafQueryDirectUnknownClauseUsesLeafMapper) { + TSearchClause clause; + clause.clause_type = "PHRASE"; + clause.field_name = "var.items.active"; + clause.value = "true"; + clause.__isset.field_name = true; + clause.__isset.value = true; + + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + auto bool_type = + std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeBool>())); + data_type_with_names.emplace("var.items.active", + IndexFieldNameAndTypePair {"1.var.items.active", bool_type}); + + RecordingIndexIterator iterator; + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.active"] = &iterator; + + FieldReaderResolver resolver(data_type_with_names, iterators, context); + + FieldReaderBinding binding; + binding.logical_field_name = "var.items.active"; + binding.stored_field_name = "1.var.items.active"; + binding.stored_field_wstr = L"1.var.items.active"; + binding.column_type = bool_type; + binding.query_type = InvertedIndexQueryType::MATCH_PHRASE_QUERY; + binding.state = SearchFieldBindingState::BOUND; + TabletIndex index_meta; + binding.inverted_reader = std::make_shared<DummyInvertedIndexReader>(&index_meta); + + std::string key = resolver.binding_key_for("1.var.items.active", + InvertedIndexQueryType::MATCH_PHRASE_QUERY); + binding.binding_key = key; + resolver._cache[key] = binding; + + bool mapper_called = false; + resolver.set_leaf_query_mapper([&](const std::string& logical_field, + inverted_index::query_v2::QueryPtr* query) -> Status { + mapper_called = true; + EXPECT_EQ("var.items.active", logical_field); + EXPECT_NE(nullptr, query); + EXPECT_NE(nullptr, *query); + return Status::OK(); + }); + + inverted_index::query_v2::QueryPtr out; + std::string out_binding_key; + Status st = function_search->build_leaf_query(clause, context, resolver, &out, &out_binding_key, + "OR", 0, 4); + ASSERT_TRUE(st.ok()); + ASSERT_NE(out, nullptr); + EXPECT_TRUE(mapper_called); + EXPECT_EQ(key, out_binding_key); + EXPECT_TRUE(iterator.last_column_name.empty()); + + auto weight = out->weight(false); + ASSERT_NE(weight, nullptr); + inverted_index::query_v2::QueryExecutionContext exec_ctx; + exec_ctx.segment_num_rows = 4; + auto scorer = weight->scorer(exec_ctx); + ASSERT_NE(scorer, nullptr); + EXPECT_EQ(inverted_index::query_v2::TERMINATED, scorer->doc()); + ASSERT_TRUE(scorer->has_null_bitmap()); + const auto* null_bitmap = scorer->get_null_bitmap(); + ASSERT_NE(null_bitmap, nullptr); + EXPECT_EQ(4u, null_bitmap->cardinality()); +} + +TEST_F(FunctionSearchTest, TestBuildLeafQueryVariantBoolUsesDirectIndexReader) { + TSearchClause clause; + clause.clause_type = "TERM"; + clause.field_name = "var.items.active"; + clause.value = "true"; + clause.__isset.field_name = true; + clause.__isset.value = true; + + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + auto bool_type = + std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeBool>())); + data_type_with_names.emplace("var.items.active", + IndexFieldNameAndTypePair {"1.var.items.active", bool_type}); + + RecordingIndexIterator iterator; + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.active"] = &iterator; + + FieldReaderResolver resolver(data_type_with_names, iterators, context); + + FieldReaderBinding binding; + binding.logical_field_name = "var.items.active"; + binding.stored_field_name = "1.var.items.active"; + binding.stored_field_wstr = L"1.var.items.active"; + binding.column_type = bool_type; + binding.query_type = InvertedIndexQueryType::MATCH_ANY_QUERY; + binding.state = SearchFieldBindingState::BOUND; + TabletIndex index_meta; + binding.inverted_reader = std::make_shared<DummyInvertedIndexReader>(&index_meta); + + std::string key = + resolver.binding_key_for("1.var.items.active", InvertedIndexQueryType::MATCH_ANY_QUERY); + binding.binding_key = key; + resolver._cache[key] = binding; + + inverted_index::query_v2::QueryPtr out; + std::string out_binding_key; + Status st = function_search->build_leaf_query(clause, context, resolver, &out, &out_binding_key, + "OR", 0, 10); + ASSERT_TRUE(st.ok()); + ASSERT_NE(out, nullptr); + EXPECT_EQ(key, out_binding_key); Review Comment: This test also accesses `FieldReaderResolver::binding_key_for` and `_cache`, which are private members in the new resolver header. As written, the unit test cannot compile; use a public/test-only helper or construct the binding through `resolve()` instead. ########## be/test/exprs/function/function_search_test.cpp: ########## @@ -1630,6 +1736,363 @@ TEST_F(FunctionSearchTest, TestBuildLeafQueryPhrase) { EXPECT_NE(phrase_query, nullptr); } +TEST_F(FunctionSearchTest, TestBuildLeafQueryVariantMissingFieldReturnsUnknown) { + TSearchClause clause; + clause.clause_type = "TERM"; + clause.field_name = "var.items.missing"; + clause.value = "value"; + clause.__isset.field_name = true; + clause.__isset.value = true; + + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + std::unordered_map<std::string, IndexIterator*> iterators; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.missing"; + field_binding.is_variant_subcolumn = true; + field_binding.__isset.is_variant_subcolumn = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + bool mapper_called = false; + resolver.set_leaf_query_mapper([&](const std::string& logical_field, + inverted_index::query_v2::QueryPtr* query) -> Status { + mapper_called = true; + EXPECT_EQ("var.items.missing", logical_field); + EXPECT_NE(nullptr, query); + EXPECT_NE(nullptr, *query); + return Status::OK(); + }); + + inverted_index::query_v2::QueryPtr out; + std::string out_binding_key; + Status st = function_search->build_leaf_query(clause, context, resolver, &out, &out_binding_key, + "OR", 0, 5); + ASSERT_TRUE(st.ok()); + ASSERT_NE(out, nullptr); + EXPECT_TRUE(mapper_called); + EXPECT_TRUE(out_binding_key.empty()); + + auto weight = out->weight(false); + ASSERT_NE(weight, nullptr); + inverted_index::query_v2::QueryExecutionContext exec_ctx; + exec_ctx.segment_num_rows = 5; + auto scorer = weight->scorer(exec_ctx); + ASSERT_NE(scorer, nullptr); + EXPECT_EQ(inverted_index::query_v2::TERMINATED, scorer->doc()); + ASSERT_TRUE(scorer->has_null_bitmap()); + const auto* null_bitmap = scorer->get_null_bitmap(); + ASSERT_NE(null_bitmap, nullptr); + EXPECT_EQ(5u, null_bitmap->cardinality()); +} + +TEST_F(FunctionSearchTest, TestFieldReaderResolverVariantSubcolumnWithMissingIterator) { + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + data_type_with_names.emplace( + "var.items.level", + IndexFieldNameAndTypePair {"1.var.items.level", std::make_shared<DataTypeInt32>()}); + std::unordered_map<std::string, IndexIterator*> iterators; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.level"; + field_binding.is_variant_subcolumn = true; + field_binding.__isset.is_variant_subcolumn = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + FieldReaderBinding binding; + auto status = + resolver.resolve("var.items.level", InvertedIndexQueryType::EQUAL_QUERY, &binding); + + ASSERT_TRUE(status.ok()); + EXPECT_FALSE(binding.is_bound()); + EXPECT_TRUE(resolver.binding_cache().empty()); +} + +TEST_F(FunctionSearchTest, TestFieldReaderResolverVariantSubcolumnWithReaderSelectionError) { + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + data_type_with_names.emplace( + "var.items.level", + IndexFieldNameAndTypePair {"1.var.items.level", std::make_shared<DataTypeInt32>()}); + + segment_v2::InvertedIndexIterator iterator; + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.level"] = &iterator; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.level"; + field_binding.is_variant_subcolumn = true; + field_binding.__isset.is_variant_subcolumn = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + FieldReaderBinding binding; + auto status = + resolver.resolve("var.items.level", InvertedIndexQueryType::EQUAL_QUERY, &binding); + + EXPECT_FALSE(status.ok()); + EXPECT_EQ(ErrorCode::INVERTED_INDEX_NO_TERMS, status.code()); +} + +TEST_F(FunctionSearchTest, + TestFieldReaderResolverVariantAnalyzerUpgradeWithMissingIndexFileReader) { + auto context = std::make_shared<IndexQueryContext>(); + + std::map<std::string, std::string> properties; + properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_STANDARD; + auto index_meta = make_test_inverted_index(11, properties); + auto reader = std::make_shared<DummyInvertedIndexReader>( + &index_meta, nullptr, segment_v2::InvertedIndexReaderType::FULLTEXT); + + segment_v2::InvertedIndexIterator iterator; + iterator.add_reader(segment_v2::InvertedIndexReaderType::FULLTEXT, reader); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + data_type_with_names.emplace( + "var.items.msg", + IndexFieldNameAndTypePair {"1.var.items.msg", std::make_shared<DataTypeString>()}); + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.msg"] = &iterator; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.msg"; + field_binding.is_variant_subcolumn = true; + field_binding.index_properties = properties; + field_binding.__isset.is_variant_subcolumn = true; + field_binding.__isset.index_properties = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + FieldReaderBinding binding; + auto status = resolver.resolve("var.items.msg", InvertedIndexQueryType::EQUAL_QUERY, &binding); + + EXPECT_FALSE(status.ok()); + EXPECT_EQ(ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND, status.code()); +} + +TEST_F(FunctionSearchTest, TestFieldReaderResolverVariantBkdDirectReader) { + auto context = std::make_shared<IndexQueryContext>(); + + auto index_meta = make_test_inverted_index(12); + auto index_file_reader = std::make_shared<segment_v2::IndexFileReader>( + nullptr, "/tmp/variant_direct_idx", InvertedIndexStorageFormatPB::V2); + auto reader = std::make_shared<DummyInvertedIndexReader>( + &index_meta, index_file_reader, segment_v2::InvertedIndexReaderType::BKD); + + segment_v2::InvertedIndexIterator iterator; + iterator.add_reader(segment_v2::InvertedIndexReaderType::BKD, reader); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + data_type_with_names.emplace( + "var.items.level", + IndexFieldNameAndTypePair {"1.var.items.level", std::make_shared<DataTypeInt32>()}); + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.level"] = &iterator; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.level"; + field_binding.is_variant_subcolumn = true; + field_binding.__isset.is_variant_subcolumn = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + FieldReaderBinding binding; + auto status = + resolver.resolve("var.items.level", InvertedIndexQueryType::EQUAL_QUERY, &binding); + + ASSERT_TRUE(status.ok()) << status.to_string(); + EXPECT_TRUE(binding.use_direct_index_reader()); + EXPECT_EQ(reader, binding.inverted_reader); + EXPECT_EQ("var.items.level", binding.logical_field_name); + EXPECT_EQ("1.var.items.level", binding.stored_field_name); + EXPECT_EQ(InvertedIndexQueryType::EQUAL_QUERY, binding.query_type); + + const auto& cache = resolver.binding_cache(); + ASSERT_EQ(1u, cache.size()); + EXPECT_TRUE(cache.begin()->second.use_direct_index_reader()); +} + +TEST_F(FunctionSearchTest, TestBuildLeafQueryDirectUnknownClauseUsesLeafMapper) { + TSearchClause clause; + clause.clause_type = "PHRASE"; + clause.field_name = "var.items.active"; + clause.value = "true"; + clause.__isset.field_name = true; + clause.__isset.value = true; + + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + auto bool_type = + std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeBool>())); + data_type_with_names.emplace("var.items.active", + IndexFieldNameAndTypePair {"1.var.items.active", bool_type}); + + RecordingIndexIterator iterator; + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.active"] = &iterator; + + FieldReaderResolver resolver(data_type_with_names, iterators, context); + + FieldReaderBinding binding; + binding.logical_field_name = "var.items.active"; + binding.stored_field_name = "1.var.items.active"; + binding.stored_field_wstr = L"1.var.items.active"; + binding.column_type = bool_type; + binding.query_type = InvertedIndexQueryType::MATCH_PHRASE_QUERY; + binding.state = SearchFieldBindingState::BOUND; + TabletIndex index_meta; + binding.inverted_reader = std::make_shared<DummyInvertedIndexReader>(&index_meta); + + std::string key = resolver.binding_key_for("1.var.items.active", + InvertedIndexQueryType::MATCH_PHRASE_QUERY); + binding.binding_key = key; + resolver._cache[key] = binding; + + bool mapper_called = false; + resolver.set_leaf_query_mapper([&](const std::string& logical_field, + inverted_index::query_v2::QueryPtr* query) -> Status { + mapper_called = true; + EXPECT_EQ("var.items.active", logical_field); + EXPECT_NE(nullptr, query); + EXPECT_NE(nullptr, *query); + return Status::OK(); + }); + + inverted_index::query_v2::QueryPtr out; + std::string out_binding_key; + Status st = function_search->build_leaf_query(clause, context, resolver, &out, &out_binding_key, + "OR", 0, 4); + ASSERT_TRUE(st.ok()); + ASSERT_NE(out, nullptr); + EXPECT_TRUE(mapper_called); + EXPECT_EQ(key, out_binding_key); + EXPECT_TRUE(iterator.last_column_name.empty()); + + auto weight = out->weight(false); + ASSERT_NE(weight, nullptr); + inverted_index::query_v2::QueryExecutionContext exec_ctx; + exec_ctx.segment_num_rows = 4; + auto scorer = weight->scorer(exec_ctx); + ASSERT_NE(scorer, nullptr); + EXPECT_EQ(inverted_index::query_v2::TERMINATED, scorer->doc()); + ASSERT_TRUE(scorer->has_null_bitmap()); + const auto* null_bitmap = scorer->get_null_bitmap(); + ASSERT_NE(null_bitmap, nullptr); + EXPECT_EQ(4u, null_bitmap->cardinality()); +} + +TEST_F(FunctionSearchTest, TestBuildLeafQueryVariantBoolUsesDirectIndexReader) { + TSearchClause clause; + clause.clause_type = "TERM"; + clause.field_name = "var.items.active"; + clause.value = "true"; + clause.__isset.field_name = true; + clause.__isset.value = true; + + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + auto bool_type = + std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeBool>())); + data_type_with_names.emplace("var.items.active", + IndexFieldNameAndTypePair {"1.var.items.active", bool_type}); + + RecordingIndexIterator iterator; + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.active"] = &iterator; + + FieldReaderResolver resolver(data_type_with_names, iterators, context); + + FieldReaderBinding binding; + binding.logical_field_name = "var.items.active"; + binding.stored_field_name = "1.var.items.active"; + binding.stored_field_wstr = L"1.var.items.active"; + binding.column_type = bool_type; + binding.query_type = InvertedIndexQueryType::MATCH_ANY_QUERY; + binding.state = SearchFieldBindingState::BOUND; + TabletIndex index_meta; + binding.inverted_reader = std::make_shared<DummyInvertedIndexReader>(&index_meta); + + std::string key = + resolver.binding_key_for("1.var.items.active", InvertedIndexQueryType::MATCH_ANY_QUERY); + binding.binding_key = key; + resolver._cache[key] = binding; + + inverted_index::query_v2::QueryPtr out; + std::string out_binding_key; + Status st = function_search->build_leaf_query(clause, context, resolver, &out, &out_binding_key, + "OR", 0, 10); + ASSERT_TRUE(st.ok()); + ASSERT_NE(out, nullptr); + EXPECT_EQ(key, out_binding_key); + EXPECT_EQ("1.var.items.active", iterator.last_column_name); + EXPECT_EQ(FieldType::OLAP_FIELD_TYPE_BOOL, iterator.last_column_storage_type); + EXPECT_EQ(InvertedIndexQueryType::EQUAL_QUERY, iterator.last_query_type); + EXPECT_EQ(TYPE_BOOLEAN, iterator.last_query_value_type); + EXPECT_TRUE(iterator.last_bool_value); + + auto weight = out->weight(false); + ASSERT_NE(weight, nullptr); + inverted_index::query_v2::QueryExecutionContext exec_ctx; + exec_ctx.segment_num_rows = 10; + auto scorer = weight->scorer(exec_ctx, out_binding_key); + ASSERT_NE(scorer, nullptr); + EXPECT_EQ(3u, scorer->doc()); +} + +TEST_F(FunctionSearchTest, TestBuildLeafQueryVariantNestedIntUsesDirectIndexReader) { + TSearchClause clause; + clause.clause_type = "TERM"; + clause.field_name = "var.items.flags.level"; + clause.value = "3"; + clause.__isset.field_name = true; + clause.__isset.value = true; + + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + auto int_type = std::make_shared<DataTypeArray>(make_nullable( + std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeInt32>())))); + data_type_with_names.emplace("var.items.flags.level", + IndexFieldNameAndTypePair {"1.var.items.flags.level", int_type}); + + RecordingIndexIterator iterator; + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.flags.level"] = &iterator; + + FieldReaderResolver resolver(data_type_with_names, iterators, context); + + FieldReaderBinding binding; + binding.logical_field_name = "var.items.flags.level"; + binding.stored_field_name = "1.var.items.flags.level"; + binding.stored_field_wstr = L"1.var.items.flags.level"; + binding.column_type = int_type; + binding.query_type = InvertedIndexQueryType::MATCH_ANY_QUERY; + binding.state = SearchFieldBindingState::BOUND; + TabletIndex index_meta; + binding.inverted_reader = std::make_shared<DummyInvertedIndexReader>(&index_meta); + + std::string key = resolver.binding_key_for("1.var.items.flags.level", + InvertedIndexQueryType::MATCH_ANY_QUERY); + binding.binding_key = key; + resolver._cache[key] = binding; + + inverted_index::query_v2::QueryPtr out; + std::string out_binding_key; + Status st = function_search->build_leaf_query(clause, context, resolver, &out, &out_binding_key, + "OR", 0, 10); + ASSERT_TRUE(st.ok()); + ASSERT_NE(out, nullptr); + EXPECT_EQ(key, out_binding_key); Review Comment: This repeats the private-member access to `binding_key_for` and `_cache`, so the test target will fail to compile. Please replace it with a public/test helper or a resolver setup that populates the cache through `resolve()`. ########## be/test/exprs/function/function_search_test.cpp: ########## @@ -1630,6 +1736,363 @@ TEST_F(FunctionSearchTest, TestBuildLeafQueryPhrase) { EXPECT_NE(phrase_query, nullptr); } +TEST_F(FunctionSearchTest, TestBuildLeafQueryVariantMissingFieldReturnsUnknown) { + TSearchClause clause; + clause.clause_type = "TERM"; + clause.field_name = "var.items.missing"; + clause.value = "value"; + clause.__isset.field_name = true; + clause.__isset.value = true; + + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + std::unordered_map<std::string, IndexIterator*> iterators; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.missing"; + field_binding.is_variant_subcolumn = true; + field_binding.__isset.is_variant_subcolumn = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + bool mapper_called = false; + resolver.set_leaf_query_mapper([&](const std::string& logical_field, + inverted_index::query_v2::QueryPtr* query) -> Status { + mapper_called = true; + EXPECT_EQ("var.items.missing", logical_field); + EXPECT_NE(nullptr, query); + EXPECT_NE(nullptr, *query); + return Status::OK(); + }); + + inverted_index::query_v2::QueryPtr out; + std::string out_binding_key; + Status st = function_search->build_leaf_query(clause, context, resolver, &out, &out_binding_key, + "OR", 0, 5); + ASSERT_TRUE(st.ok()); + ASSERT_NE(out, nullptr); + EXPECT_TRUE(mapper_called); + EXPECT_TRUE(out_binding_key.empty()); + + auto weight = out->weight(false); + ASSERT_NE(weight, nullptr); + inverted_index::query_v2::QueryExecutionContext exec_ctx; + exec_ctx.segment_num_rows = 5; + auto scorer = weight->scorer(exec_ctx); + ASSERT_NE(scorer, nullptr); + EXPECT_EQ(inverted_index::query_v2::TERMINATED, scorer->doc()); + ASSERT_TRUE(scorer->has_null_bitmap()); + const auto* null_bitmap = scorer->get_null_bitmap(); + ASSERT_NE(null_bitmap, nullptr); + EXPECT_EQ(5u, null_bitmap->cardinality()); +} + +TEST_F(FunctionSearchTest, TestFieldReaderResolverVariantSubcolumnWithMissingIterator) { + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + data_type_with_names.emplace( + "var.items.level", + IndexFieldNameAndTypePair {"1.var.items.level", std::make_shared<DataTypeInt32>()}); + std::unordered_map<std::string, IndexIterator*> iterators; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.level"; + field_binding.is_variant_subcolumn = true; + field_binding.__isset.is_variant_subcolumn = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + FieldReaderBinding binding; + auto status = + resolver.resolve("var.items.level", InvertedIndexQueryType::EQUAL_QUERY, &binding); + + ASSERT_TRUE(status.ok()); + EXPECT_FALSE(binding.is_bound()); + EXPECT_TRUE(resolver.binding_cache().empty()); +} + +TEST_F(FunctionSearchTest, TestFieldReaderResolverVariantSubcolumnWithReaderSelectionError) { + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + data_type_with_names.emplace( + "var.items.level", + IndexFieldNameAndTypePair {"1.var.items.level", std::make_shared<DataTypeInt32>()}); + + segment_v2::InvertedIndexIterator iterator; + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.level"] = &iterator; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.level"; + field_binding.is_variant_subcolumn = true; + field_binding.__isset.is_variant_subcolumn = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + FieldReaderBinding binding; + auto status = + resolver.resolve("var.items.level", InvertedIndexQueryType::EQUAL_QUERY, &binding); + + EXPECT_FALSE(status.ok()); + EXPECT_EQ(ErrorCode::INVERTED_INDEX_NO_TERMS, status.code()); +} + +TEST_F(FunctionSearchTest, + TestFieldReaderResolverVariantAnalyzerUpgradeWithMissingIndexFileReader) { + auto context = std::make_shared<IndexQueryContext>(); + + std::map<std::string, std::string> properties; + properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_STANDARD; + auto index_meta = make_test_inverted_index(11, properties); + auto reader = std::make_shared<DummyInvertedIndexReader>( + &index_meta, nullptr, segment_v2::InvertedIndexReaderType::FULLTEXT); + + segment_v2::InvertedIndexIterator iterator; + iterator.add_reader(segment_v2::InvertedIndexReaderType::FULLTEXT, reader); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + data_type_with_names.emplace( + "var.items.msg", + IndexFieldNameAndTypePair {"1.var.items.msg", std::make_shared<DataTypeString>()}); + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.msg"] = &iterator; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.msg"; + field_binding.is_variant_subcolumn = true; + field_binding.index_properties = properties; + field_binding.__isset.is_variant_subcolumn = true; + field_binding.__isset.index_properties = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + FieldReaderBinding binding; + auto status = resolver.resolve("var.items.msg", InvertedIndexQueryType::EQUAL_QUERY, &binding); + + EXPECT_FALSE(status.ok()); + EXPECT_EQ(ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND, status.code()); +} + +TEST_F(FunctionSearchTest, TestFieldReaderResolverVariantBkdDirectReader) { + auto context = std::make_shared<IndexQueryContext>(); + + auto index_meta = make_test_inverted_index(12); + auto index_file_reader = std::make_shared<segment_v2::IndexFileReader>( + nullptr, "/tmp/variant_direct_idx", InvertedIndexStorageFormatPB::V2); + auto reader = std::make_shared<DummyInvertedIndexReader>( + &index_meta, index_file_reader, segment_v2::InvertedIndexReaderType::BKD); + + segment_v2::InvertedIndexIterator iterator; + iterator.add_reader(segment_v2::InvertedIndexReaderType::BKD, reader); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + data_type_with_names.emplace( + "var.items.level", + IndexFieldNameAndTypePair {"1.var.items.level", std::make_shared<DataTypeInt32>()}); + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.level"] = &iterator; + + TSearchFieldBinding field_binding; + field_binding.field_name = "var.items.level"; + field_binding.is_variant_subcolumn = true; + field_binding.__isset.is_variant_subcolumn = true; + + FieldReaderResolver resolver(data_type_with_names, iterators, context, {field_binding}); + FieldReaderBinding binding; + auto status = + resolver.resolve("var.items.level", InvertedIndexQueryType::EQUAL_QUERY, &binding); + + ASSERT_TRUE(status.ok()) << status.to_string(); + EXPECT_TRUE(binding.use_direct_index_reader()); + EXPECT_EQ(reader, binding.inverted_reader); + EXPECT_EQ("var.items.level", binding.logical_field_name); + EXPECT_EQ("1.var.items.level", binding.stored_field_name); + EXPECT_EQ(InvertedIndexQueryType::EQUAL_QUERY, binding.query_type); + + const auto& cache = resolver.binding_cache(); + ASSERT_EQ(1u, cache.size()); + EXPECT_TRUE(cache.begin()->second.use_direct_index_reader()); +} + +TEST_F(FunctionSearchTest, TestBuildLeafQueryDirectUnknownClauseUsesLeafMapper) { + TSearchClause clause; + clause.clause_type = "PHRASE"; + clause.field_name = "var.items.active"; + clause.value = "true"; + clause.__isset.field_name = true; + clause.__isset.value = true; + + auto context = std::make_shared<IndexQueryContext>(); + + std::unordered_map<std::string, IndexFieldNameAndTypePair> data_type_with_names; + auto bool_type = + std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeBool>())); + data_type_with_names.emplace("var.items.active", + IndexFieldNameAndTypePair {"1.var.items.active", bool_type}); + + RecordingIndexIterator iterator; + std::unordered_map<std::string, IndexIterator*> iterators; + iterators["var.items.active"] = &iterator; + + FieldReaderResolver resolver(data_type_with_names, iterators, context); + + FieldReaderBinding binding; + binding.logical_field_name = "var.items.active"; + binding.stored_field_name = "1.var.items.active"; + binding.stored_field_wstr = L"1.var.items.active"; + binding.column_type = bool_type; + binding.query_type = InvertedIndexQueryType::MATCH_PHRASE_QUERY; + binding.state = SearchFieldBindingState::BOUND; + TabletIndex index_meta; + binding.inverted_reader = std::make_shared<DummyInvertedIndexReader>(&index_meta); + + std::string key = resolver.binding_key_for("1.var.items.active", + InvertedIndexQueryType::MATCH_PHRASE_QUERY); + binding.binding_key = key; + resolver._cache[key] = binding; Review Comment: These test setup lines call `binding_key_for` and mutate `_cache`, but both members are declared private in `FieldReaderResolver`, so this test will not compile. Please expose a test helper/public insertion API, make the test drive `resolve()` with a suitable iterator, or add a friend fixture instead of accessing private members directly. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
