This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 08e7d920dbc branch-2.1: [fix](index build) Correct inverted index behavior after dynamically adding a column #48389 (#48546) 08e7d920dbc is described below commit 08e7d920dbc640f7ff2083ea16057aea7b176a7f Author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> AuthorDate: Wed Mar 5 09:26:54 2025 +0800 branch-2.1: [fix](index build) Correct inverted index behavior after dynamically adding a column #48389 (#48546) Cherry-picked from #48389 --------- Co-authored-by: airborne12 <jiang...@selectdb.com> --- be/src/olap/task/index_builder.cpp | 11 +- be/src/olap/task/index_builder.h | 18 +- be/test/olap/index_builder_test.cpp | 2401 ++++++++++++++++++++ .../test_index_change_on_new_column.out | Bin 245 -> 611 bytes .../test_index_change_on_new_column.groovy | 90 +- 5 files changed, 2505 insertions(+), 15 deletions(-) diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 94feffcc059..a4646a90d04 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -389,14 +389,13 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta } } + // DO NOT forget inverted_index_file_writer for the segment, otherwise, original inverted index will be deleted. + _inverted_index_file_writers.emplace(seg_ptr->id(), + std::move(inverted_index_file_writer)); if (return_columns.empty()) { // no columns to read - break; + continue; } - - _inverted_index_file_writers.emplace(seg_ptr->id(), - std::move(inverted_index_file_writer)); - // create iterator for each segment StorageReadOptions read_options; OlapReaderStatistics stats; @@ -749,10 +748,12 @@ Status IndexBuilder::modify_rowsets(const Merger::Statistics* stats) { RETURN_IF_ERROR(_tablet->modify_rowsets(_output_rowsets, _input_rowsets, true)); } +#ifndef BE_TEST { std::shared_lock rlock(_tablet->get_header_lock()); _tablet->save_meta(); } +#endif return Status::OK(); } diff --git a/be/src/olap/task/index_builder.h b/be/src/olap/task/index_builder.h index 70fdd6cba2f..2e7924b119e 100644 --- a/be/src/olap/task/index_builder.h +++ b/be/src/olap/task/index_builder.h @@ -43,16 +43,16 @@ public: IndexBuilder(const TabletSharedPtr& tablet, const std::vector<TColumn>& columns, const std::vector<doris::TOlapTableIndex>& alter_inverted_indexes, bool is_drop_op = false); - ~IndexBuilder(); + virtual ~IndexBuilder(); - Status init(); - Status do_build_inverted_index(); - Status update_inverted_index_info(); - Status handle_inverted_index_data(); - Status handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta, - std::vector<segment_v2::SegmentSharedPtr>& segments); - Status modify_rowsets(const Merger::Statistics* stats = nullptr); - void gc_output_rowset(); + virtual Status init(); + virtual Status do_build_inverted_index(); + virtual Status update_inverted_index_info(); + virtual Status handle_inverted_index_data(); + virtual Status handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta, + std::vector<segment_v2::SegmentSharedPtr>& segments); + virtual Status modify_rowsets(const Merger::Statistics* stats = nullptr); + virtual void gc_output_rowset(); private: Status _write_inverted_index_data(TabletSchemaSPtr tablet_schema, int32_t segment_idx, diff --git a/be/test/olap/index_builder_test.cpp b/be/test/olap/index_builder_test.cpp new file mode 100644 index 00000000000..d29f09a25c7 --- /dev/null +++ b/be/test/olap/index_builder_test.cpp @@ -0,0 +1,2401 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/task/index_builder.h" + +#include <gmock/gmock.h> +#include <gtest/gtest.h> + +#include "olap/olap_common.h" +#include "olap/rowset/beta_rowset.h" +#include "olap/rowset/rowset_factory.h" +#include "olap/rowset/rowset_writer_context.h" +#include "olap/storage_engine.h" +#include "olap/tablet_fwd.h" +#include "olap/tablet_schema.h" + +namespace doris { +using namespace testing; + +class IndexBuilderTest : public ::testing::Test { +protected: + void SetUp() override { + char buffer[MAX_PATH_LEN]; + ASSERT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); + _current_dir = std::string(buffer); + _absolute_dir = _current_dir + "/" + std::string(dest_dir); + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok()); + + std::vector<StorePath> paths; + paths.emplace_back(config::storage_root_path, -1); + + // tmp dir + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok()); + paths.emplace_back(std::string(tmp_dir), 1024000000); + auto tmp_file_dirs = std::make_unique<segment_v2::TmpFileDirs>(paths); + EXPECT_TRUE(tmp_file_dirs->init().ok()); + ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs)); + + doris::EngineOptions options; + options.store_paths = paths; + + _engine = std::make_unique<StorageEngine>(options); + _data_dir = std::make_unique<DataDir>(_absolute_dir); + ASSERT_TRUE(_data_dir->update_capacity().ok()); + ExecEnv::GetInstance()->set_storage_engine(_engine.get()); + + _tablet_meta = create_tablet_meta(); + + // Create tablet meta + // auto* tablet_schema = _tablet_meta->mutable_tablet_schema(); + _tablet_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(_tablet_schema, KeysType::DUP_KEYS); + // Initialize tablet + _tablet = std::make_shared<Tablet>(*_engine, _tablet_meta, _data_dir.get()); + ASSERT_TRUE(_tablet->init().ok()); + } + + void TearDown() override { + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok()); + ExecEnv::GetInstance()->set_storage_engine(nullptr); + _tablet.reset(); + } + + void create_tablet_schema(TabletSchemaSPtr tablet_schema, KeysType keystype, + int num_value_col = 1) { + // Set basic properties of TabletSchema directly + tablet_schema->_keys_type = keystype; + tablet_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V2; + + // Create the first key column + TabletColumn column_1; + column_1.set_type(FieldType::OLAP_FIELD_TYPE_INT); + column_1.set_unique_id(1); + column_1.set_name("k1"); + column_1.set_is_key(true); + tablet_schema->append_column(column_1); + + // Create the second key column + TabletColumn column_2; + column_2.set_type(FieldType::OLAP_FIELD_TYPE_INT); + column_2.set_unique_id(2); + column_2.set_name("k2"); + column_2.set_is_key(false); + tablet_schema->append_column(column_2); + } + + TabletMetaSharedPtr create_tablet_meta() { + TabletMetaPB tablet_meta_pb; + tablet_meta_pb.set_table_id(1); + tablet_meta_pb.set_tablet_id(15673); + tablet_meta_pb.set_schema_hash(567997577); + tablet_meta_pb.set_shard_id(0); + tablet_meta_pb.set_creation_time(1575351212); + + TabletMetaSharedPtr tablet_meta(new TabletMeta()); + tablet_meta->init_from_pb(tablet_meta_pb); + return tablet_meta; + } + + // Helper to create rowset meta + void init_rs_meta(RowsetMetaSharedPtr& rs_meta, TabletSchemaSPtr tablet_schema, int64_t start, + int64_t end) { + RowsetMetaPB rowset_meta_pb; + rowset_meta_pb.set_rowset_id(540081); + rowset_meta_pb.set_tablet_id(15673); + rowset_meta_pb.set_tablet_schema_hash(567997577); + rowset_meta_pb.set_rowset_type(RowsetTypePB::BETA_ROWSET); + rowset_meta_pb.set_rowset_state(RowsetStatePB::VISIBLE); + rowset_meta_pb.set_start_version(start); + rowset_meta_pb.set_end_version(end); + rowset_meta_pb.set_num_rows(3929); + rowset_meta_pb.set_total_disk_size(84699); + rowset_meta_pb.set_data_disk_size(84464); + rowset_meta_pb.set_index_disk_size(235); + rowset_meta_pb.set_num_segments(2); + + rs_meta->init_from_pb(rowset_meta_pb); + rs_meta->set_tablet_schema(tablet_schema); + } + + std::unique_ptr<StorageEngine> _engine; + TabletSharedPtr _tablet; + TabletMetaSharedPtr _tablet_meta; + TabletSchemaSPtr _tablet_schema; + std::vector<TColumn> _columns; + std::vector<doris::TOlapTableIndex> _alter_indexes; + std::unique_ptr<DataDir> _data_dir = nullptr; + std::string _current_dir; + std::string _absolute_dir; + constexpr static uint32_t MAX_PATH_LEN = 1024; + constexpr static std::string_view dest_dir = "./ut_dir/index_builder_test"; + constexpr static std::string_view tmp_dir = "./ut_dir/index_builder_test"; + + std::string local_segment_path(const std::string& tablet_path, const std::string& rowset_id, + int segment_id) { + return tablet_path + "/" + rowset_id + "_" + std::to_string(segment_id); + } +}; + +TEST_F(IndexBuilderTest, BasicBuildTest) { + // 1. Prepare test data + TOlapTableIndex index; + index.index_id = 1; + index.columns.emplace_back("col1"); + _alter_indexes.push_back(index); + + // 2. Create IndexBuilder + IndexBuilder builder(_tablet, _columns, _alter_indexes, false); + + // 3. Verify initialization + auto status = builder.init(); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(builder._alter_index_ids.size(), 1); +} + +TEST_F(IndexBuilderTest, DropIndexTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(15676); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. First add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + _tablet_schema->append_index(std::move(initial_index)); + + // 3. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15676); + writer_context.tablet_id = 15676; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.rowset_dir = tablet_path; + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); + + // 4. Create a rowset writer + std::unique_ptr<RowsetWriter> rowset_writer; + auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + ASSERT_TRUE(status.ok()) << status.to_string(); + + // 5. Write data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 6. Verify index exists before dropping + EXPECT_TRUE(_tablet_schema->has_inverted_index()); + EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(1, "")); + + // 7. Prepare index for dropping + TOlapTableIndex drop_index; + drop_index.index_id = 1; + drop_index.columns.emplace_back("k1"); + _alter_indexes.push_back(drop_index); + + // 8. Create IndexBuilder with drop operation + IndexBuilder builder(_tablet, _columns, _alter_indexes, true); + + // 9. Initialize and verify + status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); + + // 10. Execute drop operation + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 11. Verify the index has been removed + // check old tablet path and new tablet path + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &exists).ok()); + EXPECT_TRUE(exists); + + // Check files in old and new directories + std::vector<io::FileInfo> files; + bool dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->list(tablet_path, true, &files, &dir_exists).ok()); + EXPECT_TRUE(dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + int old_idx_file_count = 0; + int old_dat_file_count = 0; + for (const auto& file : files) { + std::string filename = file.file_name; + if (filename.find("15676_0.idx") != std::string::npos) { + old_idx_file_count++; + } + if (filename.find("15676_0.dat") != std::string::npos) { + old_dat_file_count++; + } + if (filename.find("020000000000000100000000000000000000000000000000_0.idx") != + std::string::npos) { + new_idx_file_count++; + } + if (filename.find("020000000000000100000000000000000000000000000000_0.dat") != + std::string::npos) { + new_dat_file_count++; + } + } + // The index should have been removed + EXPECT_EQ(old_idx_file_count, 1) << "Tablet path should have 1 .idx file before drop"; + EXPECT_EQ(old_dat_file_count, 1) << "Tablet path should have 1 .dat file before drop"; + EXPECT_EQ(new_idx_file_count, 0) << "Tablet path should have no .idx file after drop"; + EXPECT_EQ(new_dat_file_count, 1) << "Tablet path should have 1 .dat file after drop"; + + //auto tablet_schema = _tablet->tablet_schema(); + //EXPECT_FALSE(tablet_schema->has_inverted_index_with_index_id(1)); +} + +TEST_F(IndexBuilderTest, BuildIndexAfterWritingDataTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14673); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15673); + writer_context.tablet_id = 15673; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15673); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); + + // 3. Create a rowset writer + std::unique_ptr<RowsetWriter> rowset_writer; + auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + ASSERT_TRUE(status.ok()) << status.to_string(); + + // 4. Write data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns according to the schema + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 5. Prepare index for building + TOlapTableIndex index1; + index1.index_id = 1; + index1.columns.emplace_back("k1"); + index1.index_name = "k1_index"; + index1.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index1); + + TOlapTableIndex index2; + index2.index_id = 2; + index2.columns.emplace_back("k2"); + index2.index_name = "k2_index"; + index2.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index2); + + // 6. Create IndexBuilder + IndexBuilder builder(_tablet, _columns, _alter_indexes, false); + + // 7. Initialize and verify + status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 2); + + // 8. Build index + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // check old tablet path and new tablet path + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15673); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14673); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int idx_file_count = 0; + int dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + dat_file_count++; + } + } + EXPECT_EQ(idx_file_count, 0) << "Old directory should contain exactly 0 .idx file"; + EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly 1 .idx files"; + EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; + + // 9. Verify the result (indexes should be built successfully) + //auto tablet_schema = _tablet->tablet_schema(); + //EXPECT_TRUE(tablet_schema->has_inverted_index_with_index_id(1)); + //EXPECT_TRUE(tablet_schema->has_inverted_index_with_index_id(2)); +} + +TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14675); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. First add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + _tablet_schema->append_index(std::move(initial_index)); + + // 3. Create rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15675); + writer_context.tablet_id = 15675; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15675); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); + + // 4. Create rowset writer + std::unique_ptr<RowsetWriter> rowset_writer; + auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + ASSERT_TRUE(status.ok()) << status.to_string(); + + // 5. Write data to rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add block to rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add rowset to tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 6. Prepare new index information (only add for k2 column) + TOlapTableIndex new_index; + new_index.index_id = 2; // New index ID is 2 + new_index.columns.emplace_back("k2"); + new_index.index_name = "k2_index"; + new_index.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(new_index); + + // 7. Create IndexBuilder + IndexBuilder builder(_tablet, _columns, _alter_indexes, false); + + // 8. Initialize and verify + status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only one new index needs to be built + + // 9. Build index + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // check old tablet path and new tablet path + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15675); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14675); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int idx_file_count = 0; + int dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + dat_file_count++; + } + } + EXPECT_EQ(idx_file_count, 1) << "Old directory should contain exactly 1 .idx file"; + EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly 1 .idx files"; + EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; + + // 10. Verify results (both indexes should exist) + // Verify initial index (k1) still exists + //EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(1)); + // Verify newly added index (k2) is successfully built + //EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(2)); +} + +TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTestV1) { + // 1. Create new schema using V1 format + auto v1_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(v1_schema, KeysType::DUP_KEYS); + + // 2. Modify to V1 format + v1_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; + + // 3. First add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + v1_schema->append_index(std::move(initial_index)); + + // 4. Update schema in tablet + TabletMetaPB tablet_meta_pb; + _tablet_meta->to_meta_pb(&tablet_meta_pb); + + TabletSchemaPB v1_schema_pb; + v1_schema->to_schema_pb(&v1_schema_pb); + tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb); + + _tablet_meta->init_from_pb(tablet_meta_pb); + + // Reinitialize tablet to use new schema + _tablet = std::make_shared<Tablet>(*_engine, _tablet_meta, _data_dir.get()); + ASSERT_TRUE(_tablet->init().ok()); + auto tablet_path = _absolute_dir + "/" + std::to_string(14674); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 5. Prepare data + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 6. Create rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15674); + writer_context.tablet_id = 15674; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15674); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = v1_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); + + // 7. Create rowset writer + std::unique_ptr<RowsetWriter> rowset_writer; + auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + ASSERT_TRUE(status.ok()) << status.to_string(); + + // 8. Write data to rowset + { + vectorized::Block block = v1_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add block to rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add rowset to tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 9. Clear existing index list, prepare new index + _alter_indexes.clear(); + + // 10. Prepare new index information (only add for k2 column) + TOlapTableIndex new_index; + new_index.index_id = 2; // New index ID is 2 + new_index.columns.emplace_back("k2"); + new_index.index_name = "k2_index"; + new_index.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(new_index); + + // 11. Create IndexBuilder + IndexBuilder builder(_tablet, _columns, _alter_indexes, false); + + // 12. Initialize and verify + status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only one new index needs to be built + + // 13. Build index + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // check old tablet path and new tablet path + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15674); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14674); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int idx_file_count = 0; + int dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + dat_file_count++; + } + } + EXPECT_EQ(idx_file_count, 1) << "Old directory should contain exactly 1 .idx file"; + EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + EXPECT_EQ(new_idx_file_count, 2) << "New directory should contain exactly 2 .idx files"; + EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; + // 14. Verify results (both indexes should exist) + // Verify initial index (k1) still exists + //EXPECT_TRUE(v1_schema->has_inverted_index_with_index_id(1)); + // Verify newly added index (k2) is successfully built + //EXPECT_TRUE(_tablet->tablet_schema()->has_inverted_index_with_index_id(2)); + + // 15. Confirm storage format is still V1 + //EXPECT_EQ(v1_schema->_inverted_index_storage_format, InvertedIndexStorageFormatPB::V1); +} + +TEST_F(IndexBuilderTest, MultiSegmentBuildIndexTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14677); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int rows_per_segment = 500; + const int num_segments = 3; + + // 2. Create a rowset writer context with segment size set to trigger multiple segments + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15677); + writer_context.tablet_id = 15677; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15677); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + // Set small segment size to ensure we create multiple segments + writer_context.max_rows_per_segment = rows_per_segment; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); + + // 3. Create a rowset writer + std::unique_ptr<RowsetWriter> rowset_writer; + auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + ASSERT_TRUE(status.ok()) << status.to_string(); + + // 4. Write data to the rowset in multiple batches to ensure we get multiple segments + for (int segment = 0; segment < num_segments; segment++) { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < rows_per_segment; ++i) { + // k1 column (int) - make values different across segments + int32_t k1 = (segment * rows_per_segment + i) * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = (segment * rows_per_segment + i) % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush to ensure we create a new segment + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + } + + // 5. Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Verify we have the expected number of segments + ASSERT_EQ(rowset->num_segments(), num_segments) + << "Rowset should have " << num_segments << " segments but has " + << rowset->num_segments(); + + // 6. Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + + // 7. Prepare indexes for building + TOlapTableIndex index1; + index1.index_id = 1; + index1.columns.emplace_back("k1"); + index1.index_name = "k1_index"; + index1.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index1); + + TOlapTableIndex index2; + index2.index_id = 2; + index2.columns.emplace_back("k2"); + index2.index_name = "k2_index"; + index2.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index2); + + // 8. Create IndexBuilder + IndexBuilder builder(_tablet, _columns, _alter_indexes, false); + + // 9. Initialize and verify + status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 2); + + // 10. Build indexes + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 11. Check paths and files + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15677); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14677); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // 12. Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int old_idx_file_count = 0; + int old_dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + old_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + old_dat_file_count++; + } + } + EXPECT_EQ(old_idx_file_count, 0) << "Old directory should contain exactly 0 .idx files"; + EXPECT_EQ(old_dat_file_count, num_segments) + << "Old directory should contain exactly " << num_segments << " .dat files"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + EXPECT_EQ(new_idx_file_count, num_segments) + << "New directory should contain exactly " << num_segments << " .idx files"; + EXPECT_EQ(new_dat_file_count, num_segments) + << "New directory should contain exactly " << num_segments << " .dat files"; +} + +TEST_F(IndexBuilderTest, NonExistentColumnIndexTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14678); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15678); + writer_context.tablet_id = 15678; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15678); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); + + // 3. Create a rowset writer + std::unique_ptr<RowsetWriter> rowset_writer; + auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + ASSERT_TRUE(status.ok()) << status.to_string(); + + // 4. Write data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 5. Prepare indexes for building - including one for a non-existent column + _alter_indexes.clear(); + + // Index for non-existent column "k3" + TOlapTableIndex index2; + index2.index_id = 2; + index2.columns.emplace_back("k3"); // This column doesn't exist in the schema + index2.index_name = "k3_index"; + index2.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index2); + + // 6. Create IndexBuilder + IndexBuilder builder(_tablet, _columns, _alter_indexes, false); + + // 7. Initialize and verify + status = builder.init(); + // The init should succeed, as we'll skip non-existent columns later + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 8. Build indexes - should only build for existing columns + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 9. Check paths and files + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15678); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14678); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // 10. Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int old_idx_file_count = 0; + int old_dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + old_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + old_dat_file_count++; + } + } + EXPECT_EQ(old_idx_file_count, 0) << "Old directory should contain exactly 0 .idx files"; + EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + // Should only have index for k1, not for non-existent k3 + EXPECT_EQ(new_idx_file_count, 0) + << "New directory should contain exactly 0 .idx file for the existing column"; + EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; + + // 11. Verify logs in the output to confirm k3 index was skipped + // This would require examining the log output which isn't easily done in unit tests, + // but the file count verification above should be sufficient to confirm behavior +} + +TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14679); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. First add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + _tablet_schema->append_index(std::move(initial_index)); + + // 3. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15679); + writer_context.tablet_id = 15679; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15679); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); + + // 4. Create a rowset writer + std::unique_ptr<RowsetWriter> rowset_writer; + auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + ASSERT_TRUE(status.ok()) << status.to_string(); + + // 5. Write data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 6. Prepare indexes for building - valid k2 and non-existent k3 + _alter_indexes.clear(); + + // Index for non-existent column "k3" + TOlapTableIndex index2; + index2.index_id = 3; + index2.columns.emplace_back("k3"); // This column doesn't exist in the schema + index2.index_name = "k3_index"; + index2.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index2); + + // 7. Create IndexBuilder + IndexBuilder builder(_tablet, _columns, _alter_indexes, false); + + // 8. Initialize and verify + status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only k1 is considered for building + + // 9. Build indexes - should only build for existing columns + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 10. Check paths and files + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15679); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14679); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // 11. Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int old_idx_file_count = 0; + int old_dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + old_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + old_dat_file_count++; + } + } + EXPECT_EQ(old_idx_file_count, 1) + << "Old directory should contain exactly 1 .idx file for the original k1 index"; + EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + // Should have 2 index files: original k1 index and new k2 index (k3 should be skipped) + EXPECT_EQ(new_idx_file_count, 1) + << "New directory should contain exactly 1 .idx files (for k1 and k2, not k3)"; + EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; + + // 12. Verify the tablet schema - would need to examine tablet_schema here + // k1 and k2 indexes should exist, k3 index should not + // Note: In production code, additional verification of schema would be done here +} + +TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTestV1) { + // 1. Create new schema using V1 format + auto v1_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(v1_schema, KeysType::DUP_KEYS); + + // 2. Modify to V1 format + v1_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; + + // 3. First add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + v1_schema->append_index(std::move(initial_index)); + + // 4. Update schema in tablet + TabletMetaPB tablet_meta_pb; + _tablet_meta->to_meta_pb(&tablet_meta_pb); + + TabletSchemaPB v1_schema_pb; + v1_schema->to_schema_pb(&v1_schema_pb); + tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb); + + _tablet_meta->init_from_pb(tablet_meta_pb); + + // 5. Reinitialize tablet to use new schema + _tablet = std::make_shared<Tablet>(*_engine, _tablet_meta, _data_dir.get()); + ASSERT_TRUE(_tablet->init().ok()); + auto tablet_path = _absolute_dir + "/" + std::to_string(14680); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 6. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 7. Create rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15680); + writer_context.tablet_id = 15680; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15680); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = v1_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); + + // 8. Create rowset writer + std::unique_ptr<RowsetWriter> rowset_writer; + auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + ASSERT_TRUE(status.ok()) << status.to_string(); + + // 9. Write data to rowset + { + vectorized::Block block = v1_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add block to rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add rowset to tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 10. Prepare indexes for building - valid k2 and non-existent k3 + _alter_indexes.clear(); + + // Index for non-existent column "k3" + TOlapTableIndex index2; + index2.index_id = 3; + index2.columns.emplace_back("k3"); // This column doesn't exist in the schema + index2.index_name = "k3_index"; + index2.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index2); + + // Add column information for the non-existent column + _columns.clear(); // Clear previous column info + TColumn non_existent_column; + non_existent_column.column_name = "k3"; + non_existent_column.column_type.type = TPrimitiveType::INT; + _columns.push_back(non_existent_column); + + // 11. Create IndexBuilder + IndexBuilder builder(_tablet, _columns, _alter_indexes, false); + + // 12. Initialize and verify + status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); + // 13. Build indexes - should only build for existing columns + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 14. Check paths and files + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15680); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14680); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // 15. Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + int old_idx_file_count = 0; + int old_dat_file_count = 0; + for (const auto& file : old_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + old_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + old_dat_file_count++; + } + } + EXPECT_EQ(old_idx_file_count, 1) + << "Old directory should contain exactly 1 .idx file for the original k1 index"; + EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + int new_dat_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + if (filename.find(".dat") != std::string::npos) { + new_dat_file_count++; + } + } + // Should have 2 index files: original k1 index and new k2 index (k3 should be skipped) + EXPECT_EQ(new_idx_file_count, 1) + << "New directory should contain exactly 1 .idx files (for k1 and k2, not k3)"; + EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; + + // 16. Confirm storage format is still V1 + EXPECT_EQ(v1_schema->_inverted_index_storage_format, InvertedIndexStorageFormatPB::V1); +} + +TEST_F(IndexBuilderTest, NonNullIndexDataTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14681); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15681); + writer_context.tablet_id = 15681; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15681); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); + + // 3. Create a rowset writer with non-null values + std::unique_ptr<RowsetWriter> rowset_writer; + auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + ASSERT_TRUE(status.ok()) << status.to_string(); + + // 4. Write non-null data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns with no null values + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 5. Prepare indexes for building - only use non-nullable fields + TOlapTableIndex index1; + index1.index_id = 1; + index1.columns.emplace_back("k1"); + index1.index_name = "k1_index"; + index1.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index1); + + // 6. Force columns to be treated as non-null by modifying schema + TabletSchemaSPtr non_null_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(non_null_schema, KeysType::DUP_KEYS); + // Set the second column to be non-nullable explicitly + TabletColumn& k2_column = non_null_schema->mutable_column(1); + k2_column.set_is_nullable(false); + + // 7. Create IndexBuilder with the modified schema + IndexBuilder builder(_tablet, _columns, _alter_indexes, false); + + // 8. Initialize and verify + status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); + + // 9. Build index - should trigger _add_data rather than _add_nullable + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 10. Verify results + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15681); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14681); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); + + // 11. Check files in old and new directories + std::vector<io::FileInfo> old_files; + bool old_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(old_tablet_path, true, &old_files, &old_dir_exists) + .ok()); + EXPECT_TRUE(old_dir_exists); + + std::vector<io::FileInfo> new_files; + bool new_dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem() + ->list(new_tablet_path, true, &new_files, &new_dir_exists) + .ok()); + EXPECT_TRUE(new_dir_exists); + int new_idx_file_count = 0; + for (const auto& file : new_files) { + std::string filename = file.file_name; + if (filename.find(".idx") != std::string::npos) { + new_idx_file_count++; + } + } + EXPECT_EQ(new_idx_file_count, 1) << "Should have created 1 index file"; +} + +TEST_F(IndexBuilderTest, NonExistentColumnUniqueIdTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14682); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15682); + writer_context.tablet_id = 15682; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15682); + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); + + // 3. Create a rowset writer + std::unique_ptr<RowsetWriter> rowset_writer; + auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + ASSERT_TRUE(status.ok()) << status.to_string(); + + // 4. Write data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 5. First add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + _tablet_schema->append_index(std::move(initial_index)); + + // 6. Prepare indexes for building - specifying column by unique_id that doesn't exist + _alter_indexes.clear(); + + // Use drop operation to test column_unique_ids path + TOlapTableIndex drop_index; + drop_index.index_id = 1; + drop_index.columns.emplace_back("non_existent_column"); + drop_index.column_unique_ids.push_back(999); // This unique ID doesn't exist + _alter_indexes.push_back(drop_index); + + // 7. Create IndexBuilder with drop operation + IndexBuilder builder(_tablet, _columns, _alter_indexes, true); + + // 8. Initialize and verify + status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); + + // 9. Execute drop operation - should handle non-existent column gracefully + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 10. Verify paths exists - operations should complete without errors + auto old_tablet_path = _absolute_dir + "/" + std::to_string(15682); + auto new_tablet_path = _absolute_dir + "/" + std::to_string(14682); + bool old_exists = false; + bool new_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); + EXPECT_TRUE(old_exists); + EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); + EXPECT_TRUE(new_exists); +} + +TEST_F(IndexBuilderTest, DropIndexV1FormatTest) { + // 1. Create new schema using V1 format + auto v1_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(v1_schema, KeysType::DUP_KEYS); + + // 2. Modify to V1 format + v1_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; + + // 3. Add an initial index to the schema (for k1 column) + TabletIndex initial_index; + initial_index._index_id = 1; + initial_index._index_name = "k1_index"; + initial_index._index_type = IndexType::INVERTED; + initial_index._col_unique_ids.push_back(1); // unique_id for k1 + v1_schema->append_index(std::move(initial_index)); + + // 4. Update schema in tablet + TabletMetaPB tablet_meta_pb; + _tablet_meta->to_meta_pb(&tablet_meta_pb); + + TabletSchemaPB v1_schema_pb; + v1_schema->to_schema_pb(&v1_schema_pb); + tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb); + + _tablet_meta->init_from_pb(tablet_meta_pb); + + // 5. Reinitialize tablet to use new schema + _tablet = std::make_shared<Tablet>(*_engine, _tablet_meta, _data_dir.get()); + ASSERT_TRUE(_tablet->init().ok()); + auto tablet_path = _absolute_dir + "/" + std::to_string(15683); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 6. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 7. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15683); + writer_context.tablet_id = 15683; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.rowset_dir = tablet_path; + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = v1_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); + + // 8. Create a rowset writer + std::unique_ptr<RowsetWriter> rowset_writer; + auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + ASSERT_TRUE(status.ok()) << status.to_string(); + + // 9. Write data to the rowset + { + vectorized::Block block = v1_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 10. Prepare to drop the k1 index + _alter_indexes.clear(); + TOlapTableIndex drop_index; + drop_index.index_id = 1; + drop_index.columns.emplace_back("k1"); + drop_index.index_name = "k1_index"; + drop_index.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(drop_index); + + // 11. Create IndexBuilder with drop operation + IndexBuilder builder(_tablet, _columns, _alter_indexes, true); + + // 12. Initialize and verify + status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); + + // 13. Execute drop operation + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 14. Verify paths exists + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &exists).ok()); + EXPECT_TRUE(exists); + + // 15. Verify the index has been removed + std::vector<io::FileInfo> files; + bool dir_exists = false; + EXPECT_TRUE(io::global_local_filesystem()->list(tablet_path, true, &files, &dir_exists).ok()); + EXPECT_TRUE(dir_exists); + + // Verify no index files in the new directory + int new_idx_file_count = 0; + int new_dat_file_count = 0; + int old_idx_file_count = 0; + int old_dat_file_count = 0; + for (const auto& file : files) { + std::string filename = file.file_name; + if (filename.find("15683_0_1.idx") != std::string::npos) { + old_idx_file_count++; + } + if (filename.find("15683_0.dat") != std::string::npos) { + old_dat_file_count++; + } + if (filename.find("020000000000000100000000000000000000000000000000_0_1.idx") != + std::string::npos) { + new_idx_file_count++; + } + if (filename.find("020000000000000100000000000000000000000000000000_0.dat") != + std::string::npos) { + new_dat_file_count++; + } + } + // The index should have been removed + EXPECT_EQ(old_idx_file_count, 1) << "Tablet path should have 1 .idx file before drop"; + EXPECT_EQ(old_dat_file_count, 1) << "Tablet path should have 1 .dat file before drop"; + EXPECT_EQ(new_idx_file_count, 0) << "Tablet path should have no .idx file after drop"; + EXPECT_EQ(new_dat_file_count, 1) << "Tablet path should have 1 .dat file after drop"; +} + +TEST_F(IndexBuilderTest, ResourceCleanupTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(15684); + _tablet->_tablet_path = tablet_path; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int num_rows = 1000; + + // 2. Create a rowset writer context + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15684); + writer_context.tablet_id = 15684; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.rowset_dir = tablet_path; + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); + + // 3. Create a rowset writer + std::unique_ptr<RowsetWriter> rowset_writer; + auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + ASSERT_TRUE(status.ok()) << status.to_string(); + + // 4. Write data to the rowset + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < num_rows; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 5. Prepare indexes for building + TOlapTableIndex index1; + index1.index_id = 1; + index1.columns.emplace_back("k1"); + index1.index_name = "k1_index"; + index1.index_type = TIndexType::INVERTED; + _alter_indexes.push_back(index1); + + // Create a custom IndexBuilder with a spy function to test resource cleanup + class TestIndexBuilder : public IndexBuilder { + public: + TestIndexBuilder(TabletSharedPtr tablet, const std::vector<TColumn>& columns, + const std::vector<doris::TOlapTableIndex>& alter_inverted_indexes, + bool is_drop_op) + : IndexBuilder(tablet, columns, alter_inverted_indexes, is_drop_op) {} + + ~TestIndexBuilder() override = default; + // Override update_inverted_index_info to inject failure + Status update_inverted_index_info() override { + RETURN_IF_ERROR(IndexBuilder::update_inverted_index_info()); + // Create a fake error to trigger cleanup + return Status::Error<ErrorCode::INTERNAL_ERROR>("Simulated error for testing cleanup"); + } + }; + + // 6. Create our test builder + TestIndexBuilder builder(_tablet, _columns, _alter_indexes, false); + + // 7. Initialize and verify + status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + EXPECT_EQ(builder._alter_index_ids.size(), 1); + + // 8. Build index - should fail with our simulated error + status = builder.do_build_inverted_index(); + EXPECT_FALSE(status.ok()) << "Expected failure, but got success"; + EXPECT_TRUE(status.is<ErrorCode::INTERNAL_ERROR>()) << "Expected internal error"; + EXPECT_EQ(status.to_string(), "[INTERNAL_ERROR]Simulated error for testing cleanup") + << "Error message doesn't match expected"; + + // Verify the paths haven't been modified since the operation failed + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &exists).ok()); + EXPECT_TRUE(exists); + + auto rowset_id = extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat"); + EXPECT_TRUE(_engine->check_rowset_id_in_unused_rowsets(rowset_id)) + << "Rowset id should be in unused rowsets"; +} + +TEST_F(IndexBuilderTest, ArrayTypeIndexTest) { + // 1. Prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14685); + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 2. Create tablet schema with array type + auto tablet_schema = std::make_shared<TabletSchema>(); + TabletColumn column_1(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, + FieldType::OLAP_FIELD_TYPE_INT, true); + column_1.set_unique_id(1); + column_1.set_is_key(true); + column_1.set_name("k1"); + tablet_schema->append_column(column_1); + + // Array type column + TabletColumn column_2; + column_2.set_unique_id(2); + column_2.set_is_key(false); + column_2.set_name("array_col"); + column_2.set_type(FieldType::OLAP_FIELD_TYPE_ARRAY); + column_2.set_is_nullable(false); + // Add a primitive type for array items + TabletColumn array_item_column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, + FieldType::OLAP_FIELD_TYPE_VARCHAR, true); + array_item_column.set_is_key(false); + array_item_column.set_length(64); + column_2.add_sub_column(array_item_column); + tablet_schema->append_column(column_2); + + // 3. Create tablet + auto tablet_meta = create_tablet_meta(); + auto tablet = std::make_shared<Tablet>(*_engine, tablet_meta, _data_dir.get()); + tablet->_tablet_path = tablet_path; + ASSERT_TRUE(tablet->init().ok()); + + // 4. Add inverted index for array column + _columns.clear(); + TColumn tc1; + tc1.column_name = "array_col"; + _columns.push_back(tc1); + + _alter_indexes.clear(); + TOlapTableIndex tt_index; + tt_index.index_id = 1; + tt_index.index_name = "array_index"; + tt_index.columns.emplace_back("array_col"); + tt_index.column_unique_ids.push_back(2); + tt_index.index_type = TIndexType::type::INVERTED; + _alter_indexes.push_back(tt_index); + + // 5. Create a rowset writer + RowsetWriterContext writer_context; + writer_context.rowset_id.init(14685); + writer_context.tablet_id = 14685; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.rowset_dir = tablet_path; + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); + + // 6. Create rowset writer + std::unique_ptr<RowsetWriter> rowset_writer; + auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + ASSERT_TRUE(status.ok()) << status.to_string(); + + // 7. Create data block and write data + { + vectorized::Block block = tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Prepare columns for k1 and array_col + for (int i = 0; i < 1000; i++) { + // k1 column (int) + int32_t k1 = i; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // array_col column + // Create array data with 1-5 elements + int array_size = i % 5 + 1; + + // For array type, we need to create a complex nested column structure + auto& array_col = static_cast<vectorized::ColumnArray&>(*columns[1]); + vectorized::Array arr; + // Add string elements to the array + for (int j = 0; j < array_size; j++) { + std::string val = "item_" + std::to_string(i) + "_" + std::to_string(j); + arr.push_back(vectorized::Field(val)); + } + array_col.insert(arr); + } + + // Add block to rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + } + + // 8. Build rowset + RowsetSharedPtr rowset; + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + ASSERT_TRUE(rowset != nullptr); + ASSERT_TRUE(tablet->add_rowset(rowset).ok()); + + // 9. Initialize and build inverted index + IndexBuilder builder(tablet, _columns, _alter_indexes, false); + status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 10. Verify that the index has been created + std::string segment_path = local_segment_path( + tablet->tablet_path(), + extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat").to_string(), + 0); + + if (tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { + // V1 format + auto index_path = InvertedIndexDescriptor::inverted_index_file_path( + tablet->tablet_path(), + extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat"), 0, 1, + ""); + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); + EXPECT_TRUE(exists) << "Index file not found: " << index_path; + } else { + // V2+ format + auto index_path = InvertedIndexDescriptor::get_index_file_name(segment_path); + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); + EXPECT_TRUE(exists) << "Index file not found: " << index_path; + } +} + +TEST_F(IndexBuilderTest, UniqueKeysTableIndexTest) { + // 0. prepare tablet path + auto tablet_path = _absolute_dir + "/" + std::to_string(14688); + _tablet->_tablet_path = tablet_path; + _tablet->_tablet_meta->_schema = _tablet_schema; + _tablet->_tablet_meta->_schema->_keys_type = KeysType::UNIQUE_KEYS; + _tablet->_tablet_meta->_enable_unique_key_merge_on_write = true; + ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 1. Prepare data for writing + RowsetSharedPtr rowset; + const int rows_per_segment = 500; + + // 2. Create a rowset writer context with segment size set to trigger multiple segments + RowsetWriterContext writer_context; + writer_context.rowset_id.init(15677); + writer_context.tablet_id = 15677; + writer_context.tablet_schema_hash = 567997577; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.rowset_dir = tablet_path; + writer_context.rowset_state = VISIBLE; + writer_context.tablet_schema = _tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + // Set small segment size to ensure we create multiple segments + writer_context.max_rows_per_segment = rows_per_segment; + + ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); + + std::unique_ptr<RowsetWriter> rowset_writer; + auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); + ASSERT_TRUE(status.ok()) << status.to_string(); + + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < 1000; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); + } + + // 6. Create test class that overrides methods to simulate unique key table behavior + class TestIndexBuilder : public IndexBuilder { + public: + TestIndexBuilder(TabletSharedPtr tablet, const std::vector<TColumn>& columns, + const std::vector<doris::TOlapTableIndex>& alter_inverted_indexes, + bool is_drop_op) + : IndexBuilder(tablet, columns, alter_inverted_indexes, is_drop_op) {} + + ~TestIndexBuilder() override = default; + + // Override to make sure modify_rowsets with UNIQUE_KEYS path is called + Status modify_rowsets(const Merger::Statistics* stats = nullptr) override { + // Call parent method which should use the UNIQUE_KEYS path + return IndexBuilder::modify_rowsets(stats); + } + }; + + _alter_indexes.clear(); + TOlapTableIndex tt_index; + tt_index.index_id = 1; + tt_index.index_name = "k1_index"; + tt_index.columns.emplace_back("k1"); + tt_index.column_unique_ids.push_back(1); + tt_index.index_type = TIndexType::type::INVERTED; + _alter_indexes.push_back(tt_index); + // 7. Initialize and build inverted index + TestIndexBuilder builder(_tablet, _columns, _alter_indexes, false); + status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 8. Execute build index, which should go through UNIQUE_KEYS path in modify_rowsets + status = builder.do_build_inverted_index(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // 9. Verify that the index was created successfully + std::string segment_path = local_segment_path( + _tablet->tablet_path(), + extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat").to_string(), + 0); + + if (_tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { + auto index_path = InvertedIndexDescriptor::inverted_index_file_path( + _tablet->tablet_path(), + extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat"), 0, 1, + ""); + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); + EXPECT_TRUE(exists) << "Index file not found: " << index_path; + } else { + auto index_path = InvertedIndexDescriptor::get_index_file_name(segment_path); + bool exists = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); + EXPECT_TRUE(exists) << "Index file not found: " << index_path; + } +} + +TEST_F(IndexBuilderTest, HandleSingleRowsetErrorTest) { + // 1. Create a test class that overrides handle_single_rowset to simulate error scenarios + class TestIndexBuilder : public IndexBuilder { + public: + TestIndexBuilder(TabletSharedPtr tablet, const std::vector<TColumn>& columns, + const std::vector<doris::TOlapTableIndex>& alter_inverted_indexes, + bool is_drop_op, bool simulate_non_local_rowset_error = false) + : IndexBuilder(tablet, columns, alter_inverted_indexes, is_drop_op), + _simulate_non_local_rowset_error(simulate_non_local_rowset_error) {} + + ~TestIndexBuilder() override = default; + + // Override to simulate error conditions + Status handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta, + std::vector<segment_v2::SegmentSharedPtr>& segments) override { + if (_simulate_non_local_rowset_error) { + // Simulate the condition where is_local_rowset is false + return Status::InternalError("should be local rowset. tablet_id={} rowset_id={}", + 123, "test_rowset_id"); + } + + // Call parent method for normal processing + return IndexBuilder::handle_single_rowset(output_rowset_meta, segments); + } + + private: + bool _simulate_non_local_rowset_error; + }; + + // 2. Prepare tablet path + std::string tablet_path = _absolute_dir + "/" + std::to_string(14687); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 3. Set up tablet schema and tablet + TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(tablet_schema, KeysType::DUP_KEYS, 2); + + auto tablet_meta = create_tablet_meta(); + tablet_meta->_schema = tablet_schema; + auto tablet = std::make_shared<Tablet>(*_engine, tablet_meta, _data_dir.get()); + tablet->_tablet_path = tablet_path; + ASSERT_TRUE(tablet->init().ok()); + + // 4. Create inverted index definition + _alter_indexes.clear(); + TOlapTableIndex tt_index; + tt_index.index_id = 1; + tt_index.index_name = "k1_index"; + tt_index.columns.emplace_back("k1"); + tt_index.column_unique_ids.push_back(1); + tt_index.index_type = TIndexType::type::INVERTED; + _alter_indexes.push_back(tt_index); + + // 5. Create a rowset + RowsetWriterContext writer_context; + writer_context.rowset_id = _engine->next_rowset_id(); + writer_context.tablet_id = 14687; + writer_context.rowset_dir = tablet_path; + writer_context.tablet_schema_hash = 1111; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.segments_overlap = NONOVERLAPPING; + writer_context.tablet_schema = tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + auto result = tablet->create_rowset_writer(writer_context, false); + EXPECT_TRUE(result.has_value()) << result.error(); + auto rowset_writer = std::move(result).value(); + + { + vectorized::Block block = _tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < 1000; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + RowsetSharedPtr rowset; + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(tablet->add_rowset(rowset).ok()); + } + + // 6. Test error scenario with non-local rowset + TestIndexBuilder builder(tablet, _columns, _alter_indexes, false, true); + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // Execute build_index, which should fail due to simulated error + status = builder.do_build_inverted_index(); + EXPECT_FALSE(status.ok()) << "Expected failure but got success"; + EXPECT_TRUE(status.is<ErrorCode::INTERNAL_ERROR>()) + << "Expected internal error but got: " << status.to_string(); + EXPECT_TRUE(status.to_string().find("should be local rowset") != std::string::npos) + << "Error message doesn't match expected: " << status.to_string(); +} + +TEST_F(IndexBuilderTest, UpdateInvertedIndexInfoErrorTest) { + // 1. Create a test class that overrides update_inverted_index_info to simulate error scenarios + class TestIndexBuilder : public IndexBuilder { + public: + TestIndexBuilder(TabletSharedPtr tablet, const std::vector<TColumn>& columns, + const std::vector<doris::TOlapTableIndex>& alter_inverted_indexes, + bool is_drop_op, int error_type = 0) + : IndexBuilder(tablet, columns, alter_inverted_indexes, is_drop_op), + _error_type(error_type) {} + + ~TestIndexBuilder() override = default; + + // Override update_inverted_index_info to inject errors + Status update_inverted_index_info() override { + if (_error_type == 1) { + // Simulate non-local rowset error in update_inverted_index_info + return Status::InternalError("should be local rowset. tablet_id={} rowset_id={}", + 123, "test_rowset_id"); + } else if (_error_type == 2) { + // Simulate size retrieval error + return Status::Error<ErrorCode::INIT_FAILED>("debug point: get fs failed"); + } + + // Call parent method for normal processing + return IndexBuilder::update_inverted_index_info(); + } + + private: + int _error_type; // 0: no error, 1: non-local rowset error, 2: size retrieval error + }; + + // 2. Prepare tablet path + std::string tablet_path = _absolute_dir + "/" + std::to_string(14688); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); + + // 3. Set up tablet schema and tablet + TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); + create_tablet_schema(tablet_schema, KeysType::DUP_KEYS, 2); + + auto tablet_meta = create_tablet_meta(); + tablet_meta->_schema = tablet_schema; + auto tablet = std::make_shared<Tablet>(*_engine, tablet_meta, _data_dir.get()); + tablet->_tablet_path = tablet_path; + ASSERT_TRUE(tablet->init().ok()); + + // 4. Create inverted index definition + _alter_indexes.clear(); + TOlapTableIndex tt_index; + tt_index.index_id = 1; + tt_index.index_name = "k1_index"; + tt_index.columns.emplace_back("k1"); + tt_index.column_unique_ids.push_back(1); + tt_index.index_type = TIndexType::type::INVERTED; + _alter_indexes.push_back(tt_index); + + // 5. Create a rowset + RowsetWriterContext writer_context; + writer_context.rowset_id = _engine->next_rowset_id(); + writer_context.tablet_id = 14688; + writer_context.rowset_dir = tablet_path; + writer_context.tablet_schema_hash = 1111; + writer_context.partition_id = 10; + writer_context.rowset_type = BETA_ROWSET; + writer_context.segments_overlap = NONOVERLAPPING; + writer_context.tablet_schema = tablet_schema; + writer_context.version.first = 10; + writer_context.version.second = 10; + + auto result = tablet->create_rowset_writer(writer_context, false); + EXPECT_TRUE(result.has_value()) << result.error(); + auto rowset_writer = std::move(result).value(); + + // Write data + { + vectorized::Block block = tablet_schema->create_block(); + auto columns = block.mutate_columns(); + + // Add data for k1 and k2 columns + for (int i = 0; i < 1000; ++i) { + // k1 column (int) + int32_t k1 = i * 10; + columns[0]->insert_data((const char*)&k1, sizeof(k1)); + + // k2 column (int) + int32_t k2 = i % 100; + columns[1]->insert_data((const char*)&k2, sizeof(k2)); + } + + // Add the block to the rowset + Status s = rowset_writer->add_block(&block); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Flush the writer + s = rowset_writer->flush(); + ASSERT_TRUE(s.ok()) << s.to_string(); + + // Build the rowset + RowsetSharedPtr rowset; + ASSERT_TRUE(rowset_writer->build(rowset).ok()); + + // Add the rowset to the tablet + ASSERT_TRUE(tablet->add_rowset(rowset).ok()); + } + // 6. Test error scenarios + + // 6.1 Test non-local rowset error + { + TestIndexBuilder builder(tablet, _columns, _alter_indexes, false, 1); + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // Execute build_index, which should fail due to simulated error + status = builder.do_build_inverted_index(); + EXPECT_FALSE(status.ok()) << "Expected failure but got success"; + EXPECT_TRUE(status.is<ErrorCode::INTERNAL_ERROR>()) + << "Expected internal error but got: " << status.to_string(); + EXPECT_TRUE(status.to_string().find("should be local rowset") != std::string::npos) + << "Error message doesn't match expected: " << status.to_string(); + } + + // 6.2 Test size retrieval error + { + TestIndexBuilder builder(tablet, _columns, _alter_indexes, false, 2); + auto status = builder.init(); + EXPECT_TRUE(status.ok()) << status.to_string(); + + // Execute build_index, which should fail due to simulated error + status = builder.do_build_inverted_index(); + EXPECT_FALSE(status.ok()) << "Expected failure but got success"; + EXPECT_TRUE(status.is<ErrorCode::INIT_FAILED>()) + << "Expected INIT_FAILED but got: " << status.to_string(); + EXPECT_TRUE(status.to_string().find("debug point: get fs failed") != std::string::npos) + << "Error message doesn't match expected: " << status.to_string(); + } +} + +} // namespace doris diff --git a/regression-test/data/inverted_index_p0/index_change/test_index_change_on_new_column.out b/regression-test/data/inverted_index_p0/index_change/test_index_change_on_new_column.out index 78a797ead64..d538508a204 100644 Binary files a/regression-test/data/inverted_index_p0/index_change/test_index_change_on_new_column.out and b/regression-test/data/inverted_index_p0/index_change/test_index_change_on_new_column.out differ diff --git a/regression-test/suites/inverted_index_p0/index_change/test_index_change_on_new_column.groovy b/regression-test/suites/inverted_index_p0/index_change/test_index_change_on_new_column.groovy index eba77b8396c..45cd76debaa 100644 --- a/regression-test/suites/inverted_index_p0/index_change/test_index_change_on_new_column.groovy +++ b/regression-test/suites/inverted_index_p0/index_change/test_index_change_on_new_column.groovy @@ -99,5 +99,93 @@ suite("test_index_change_on_new_column") { assertEquals(show_result[0][2], "idx_s1") qt_select2 """ SELECT * FROM ${tableName} order by id; """ - qt_select3 """ SELECT * FROM ${tableName} where s1 match 'welcome'; """ + qt_select3 """ SELECT /*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM ${tableName} where s1 match 'welcome'; """ + + tableName = "test_index_change_on_new_column1" + + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `id` INT COMMENT "", + `s` STRING COMMENT "", + INDEX idx_s(s) USING INVERTED + ) + DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) + PROPERTIES ( "replication_num" = "1" ); + """ + + sql """ INSERT INTO ${tableName} VALUES + (1, 'hello world') + """ + + // add new column + sql """ alter table ${tableName} add column s1 varchar(50) default null after s; """ + + qt_select1 """ SELECT * FROM ${tableName}; """ + + // create inverted index on new column + sql """ alter table ${tableName} add index idx_s1(s1) USING INVERTED PROPERTIES('parser' = 'english')""" + wait_for_latest_op_on_table_finish(tableName, timeout) + + sql """ INSERT INTO ${tableName} VALUES + (2, 'hello wold', 'welcome to the world') + """ + // build inverted index on new column + if (!isCloudMode()) { + sql """ build index idx_s1 on ${tableName} """ + wait_for_build_index_on_partition_finish(tableName, timeout) + } + + show_result = sql "show index from ${tableName}" + logger.info("show index from " + tableName + " result: " + show_result) + assertEquals(show_result.size(), 2) + assertEquals(show_result[0][2], "idx_s") + assertEquals(show_result[1][2], "idx_s1") + qt_select2 """ SELECT * FROM ${tableName} order by id; """ + qt_select3 """ SELECT /*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM ${tableName} where s1 match 'welcome'; """ + qt_select4 """ SELECT /*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM ${tableName} where s match 'hello world'; """ + + tableName = "test_index_change_on_new_column1_index_v1" + + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `id` INT COMMENT "", + `s` STRING COMMENT "", + INDEX idx_s(s) USING INVERTED + ) + DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) + PROPERTIES ( "inverted_index_storage_format" = "v1", "replication_num" = "1" ); + """ + + sql """ INSERT INTO ${tableName} VALUES + (1, 'hello world') + """ + + // add new column + sql """ alter table ${tableName} add column s1 varchar(50) default null after s; """ + + qt_select1 """ SELECT * FROM ${tableName}; """ + + // create inverted index on new column + sql """ alter table ${tableName} add index idx_s1(s1) USING INVERTED PROPERTIES('parser' = 'english')""" + wait_for_latest_op_on_table_finish(tableName, timeout) + + sql """ INSERT INTO ${tableName} VALUES + (2, 'hello wold', 'welcome to the world') + """ + // build inverted index on new column + if (!isCloudMode()) { + sql """ build index idx_s1 on ${tableName} """ + wait_for_build_index_on_partition_finish(tableName, timeout) + } + + show_result = sql "show index from ${tableName}" + logger.info("show index from " + tableName + " result: " + show_result) + assertEquals(show_result.size(), 2) + assertEquals(show_result[0][2], "idx_s") + assertEquals(show_result[1][2], "idx_s1") + qt_select2 """ SELECT * FROM ${tableName} order by id; """ + qt_select3 """ SELECT /*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM ${tableName} where s1 match 'welcome'; """ + qt_select4 """ SELECT /*+SET_VAR(enable_fallback_on_missing_inverted_index=false) */ * FROM ${tableName} where s match 'hello world'; """ } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org