This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 7d7e3a3 [refactor] Remove snapshot converter and unused Protobuf Definitions (#8026) 7d7e3a3 is described below commit 7d7e3a39f56baafa76266d8dc3c26226a5ce2709 Author: yiguolei <guole...@tencent.com> AuthorDate: Sat Feb 12 16:06:04 2022 +0800 [refactor] Remove snapshot converter and unused Protobuf Definitions (#8026) 1. remove snapshot converter 2. remove unused protobuf definitions 3. move some macro as const variables --- be/src/olap/CMakeLists.txt | 1 - be/src/olap/data_dir.cpp | 1 - be/src/olap/olap_snapshot_converter.cpp | 306 -------------------------- be/src/olap/olap_snapshot_converter.h | 65 ------ be/src/olap/rowset/alpha_rowset.h | 2 - be/src/olap/snapshot_manager.cpp | 1 - be/src/olap/storage_engine.cpp | 1 - be/src/olap/task/engine_clone_task.cpp | 1 - be/src/vec/io/io_helper.h | 8 +- be/test/olap/CMakeLists.txt | 1 - be/test/olap/olap_snapshot_converter_test.cpp | 250 --------------------- gensrc/proto/olap_file.proto | 90 -------- 12 files changed, 5 insertions(+), 722 deletions(-) diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt index 36f4515..ced9307 100644 --- a/be/src/olap/CMakeLists.txt +++ b/be/src/olap/CMakeLists.txt @@ -120,7 +120,6 @@ add_library(Olap STATIC task/engine_storage_migration_task.cpp task/engine_publish_version_task.cpp task/engine_alter_tablet_task.cpp - olap_snapshot_converter.cpp column_vector.cpp segment_loader.cpp ) diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index a3ddab2..5bb86df 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -35,7 +35,6 @@ #include "gutil/strings/substitute.h" #include "olap/file_helper.h" #include "olap/olap_define.h" -#include "olap/olap_snapshot_converter.h" #include "olap/rowset/alpha_rowset_meta.h" #include "olap/rowset/rowset_factory.h" #include "olap/rowset/rowset_meta_manager.h" diff --git a/be/src/olap/olap_snapshot_converter.cpp b/be/src/olap/olap_snapshot_converter.cpp deleted file mode 100644 index 334af43..0000000 --- a/be/src/olap/olap_snapshot_converter.cpp +++ /dev/null @@ -1,306 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "olap/olap_snapshot_converter.h" - -#include "olap/rowset/alpha_rowset.h" -#include "olap/rowset/alpha_rowset_meta.h" -#include "olap/rowset/rowset_id_generator.h" -#include "olap/storage_engine.h" - -namespace doris { - -OLAPStatus OlapSnapshotConverter::convert_to_pdelta(const RowsetMetaPB& rowset_meta_pb, - PDelta* delta) { - if (!rowset_meta_pb.has_start_version()) { - LOG(FATAL) << "rowset does not have start_version." - << " rowset id = " << rowset_meta_pb.rowset_id(); - } - delta->set_start_version(rowset_meta_pb.start_version()); - if (!rowset_meta_pb.has_end_version()) { - LOG(FATAL) << "rowset does not have end_version." - << " rowset id = " << rowset_meta_pb.rowset_id(); - } - delta->set_end_version(rowset_meta_pb.end_version()); - if (!rowset_meta_pb.has_version_hash()) { - LOG(FATAL) << "rowset does not have version_hash." - << " rowset id = " << rowset_meta_pb.rowset_id(); - } - delta->set_version_hash(rowset_meta_pb.version_hash()); - if (!rowset_meta_pb.has_creation_time()) { - LOG(FATAL) << "rowset does not have creation_time." - << " rowset id = " << rowset_meta_pb.rowset_id(); - } - delta->set_creation_time(rowset_meta_pb.creation_time()); - AlphaRowsetExtraMetaPB extra_meta_pb = rowset_meta_pb.alpha_rowset_extra_meta_pb(); - - for (auto& segment_group : extra_meta_pb.segment_groups()) { - SegmentGroupPB* new_segment_group = delta->add_segment_group(); - *new_segment_group = segment_group; - } - if (rowset_meta_pb.has_delete_predicate()) { - DeletePredicatePB* delete_condition = delta->mutable_delete_condition(); - *delete_condition = rowset_meta_pb.delete_predicate(); - } - return OLAP_SUCCESS; -} - -OLAPStatus OlapSnapshotConverter::convert_to_rowset_meta(const PDelta& delta, - const RowsetId& rowset_id, - int64_t tablet_id, int32_t schema_hash, - RowsetMetaPB* rowset_meta_pb) { - rowset_meta_pb->set_rowset_id(0); - rowset_meta_pb->set_rowset_id_v2(rowset_id.to_string()); - rowset_meta_pb->set_tablet_id(tablet_id); - rowset_meta_pb->set_tablet_schema_hash(schema_hash); - rowset_meta_pb->set_rowset_type(RowsetTypePB::ALPHA_ROWSET); - rowset_meta_pb->set_rowset_state(RowsetStatePB::VISIBLE); - rowset_meta_pb->set_start_version(delta.start_version()); - rowset_meta_pb->set_end_version(delta.end_version()); - rowset_meta_pb->set_version_hash(delta.version_hash()); - - bool empty = true; - int64_t num_rows = 0; - int64_t index_size = 0; - int64_t data_size = 0; - AlphaRowsetExtraMetaPB* extra_meta_pb = rowset_meta_pb->mutable_alpha_rowset_extra_meta_pb(); - for (auto& segment_group : delta.segment_group()) { - SegmentGroupPB* new_segment_group = extra_meta_pb->add_segment_groups(); - *new_segment_group = segment_group; - // if segment group does not has empty property, then it is not empty - // if segment group's empty == false, then it is not empty - if (!segment_group.has_empty() || !segment_group.empty()) { - empty = false; - } - num_rows += segment_group.num_rows(); - index_size += segment_group.index_size(); - data_size += segment_group.data_size(); - } - - rowset_meta_pb->set_empty(empty); - rowset_meta_pb->set_num_rows(num_rows); - rowset_meta_pb->set_data_disk_size(data_size); - rowset_meta_pb->set_index_disk_size(index_size); - rowset_meta_pb->set_total_disk_size(data_size + index_size); - if (delta.has_delete_condition()) { - DeletePredicatePB* delete_condition = rowset_meta_pb->mutable_delete_predicate(); - *delete_condition = delta.delete_condition(); - } - rowset_meta_pb->set_creation_time(delta.creation_time()); - LOG(INFO) << "convert visible delta start_version = " << delta.start_version() - << " end_version = " << delta.end_version() - << " version_hash = " << delta.version_hash() << " to rowset id = " << rowset_id - << " tablet_id = " << tablet_id; - return OLAP_SUCCESS; -} - -OLAPStatus OlapSnapshotConverter::convert_to_rowset_meta(const PPendingDelta& pending_delta, - const RowsetId& rowset_id, - int64_t tablet_id, int32_t schema_hash, - RowsetMetaPB* rowset_meta_pb) { - rowset_meta_pb->set_rowset_id(0); - rowset_meta_pb->set_rowset_id_v2(rowset_id.to_string()); - rowset_meta_pb->set_tablet_id(tablet_id); - rowset_meta_pb->set_tablet_schema_hash(schema_hash); - rowset_meta_pb->set_rowset_type(RowsetTypePB::ALPHA_ROWSET); - rowset_meta_pb->set_rowset_state(RowsetStatePB::COMMITTED); - rowset_meta_pb->set_partition_id(pending_delta.partition_id()); - rowset_meta_pb->set_txn_id(pending_delta.transaction_id()); - rowset_meta_pb->set_creation_time(pending_delta.creation_time()); - - bool empty = true; - int64_t num_rows = 0; - int64_t index_size = 0; - int64_t data_size = 0; - AlphaRowsetExtraMetaPB* extra_meta_pb = rowset_meta_pb->mutable_alpha_rowset_extra_meta_pb(); - for (auto& pending_segment_group : pending_delta.pending_segment_group()) { - SegmentGroupPB* new_segment_group = extra_meta_pb->add_segment_groups(); - new_segment_group->set_segment_group_id(pending_segment_group.pending_segment_group_id()); - new_segment_group->set_num_segments(pending_segment_group.num_segments()); - new_segment_group->set_index_size(0); - new_segment_group->set_data_size(0); - new_segment_group->set_num_rows(0); - for (auto& pending_zone_map : pending_segment_group.zone_maps()) { - ZoneMap* zone_map = new_segment_group->add_zone_maps(); - *zone_map = pending_zone_map; - } - new_segment_group->set_empty(pending_segment_group.empty()); - PUniqueId* load_id = new_segment_group->mutable_load_id(); - *load_id = pending_segment_group.load_id(); - - if (!pending_segment_group.empty()) { - empty = false; - } - } - - rowset_meta_pb->set_empty(empty); - rowset_meta_pb->set_num_rows(num_rows); - rowset_meta_pb->set_data_disk_size(data_size); - rowset_meta_pb->set_index_disk_size(index_size); - rowset_meta_pb->set_total_disk_size(data_size + index_size); - if (pending_delta.has_delete_condition()) { - DeletePredicatePB* delete_condition = rowset_meta_pb->mutable_delete_predicate(); - *delete_condition = pending_delta.delete_condition(); - } - rowset_meta_pb->set_creation_time(pending_delta.creation_time()); - LOG(INFO) << "convert pending delta txn id = " << pending_delta.transaction_id() - << " tablet_id = " << tablet_id << " schema_hash = " << schema_hash - << " to rowset id = " << rowset_id; - return OLAP_SUCCESS; -} - -OLAPStatus OlapSnapshotConverter::to_column_pb(const ColumnMessage& column_msg, - ColumnPB* column_pb) { - if (column_msg.has_unique_id()) { - column_pb->set_unique_id(column_msg.unique_id()); - } - column_pb->set_name(column_msg.name()); - column_pb->set_type(column_msg.type()); - column_pb->set_is_key(column_msg.is_key()); - column_pb->set_aggregation(column_msg.aggregation()); - if (column_msg.has_is_allow_null()) { - column_pb->set_is_nullable(column_msg.is_allow_null()); - } - if (column_msg.has_default_value()) { - column_pb->set_default_value(column_msg.default_value()); - } - if (column_msg.has_precision()) { - column_pb->set_precision(column_msg.precision()); - } - if (column_msg.has_frac()) { - column_pb->set_frac(column_msg.frac()); - } - column_pb->set_length(column_msg.length()); - if (column_msg.has_index_length()) { - column_pb->set_index_length(column_msg.index_length()); - } - if (column_msg.has_is_bf_column()) { - column_pb->set_is_bf_column(column_msg.is_bf_column()); - } - if (column_msg.has_has_bitmap_index()) { - column_pb->set_has_bitmap_index(column_msg.has_bitmap_index()); - } - // TODO(ygl) calculate column id from column list - // column_pb->set_referenced_column_id(column_msg.()); - - if (column_msg.has_referenced_column()) { - column_pb->set_referenced_column(column_msg.referenced_column()); - } - return OLAP_SUCCESS; -} - -OLAPStatus OlapSnapshotConverter::to_column_msg(const ColumnPB& column_pb, - ColumnMessage* column_msg) { - if (!column_pb.has_name()) { - LOG(FATAL) << "column pb does not have name" - << " column id " << column_pb.unique_id(); - } - column_msg->set_name(column_pb.name()); - column_msg->set_type(column_pb.type()); - if (!column_pb.has_aggregation()) { - LOG(FATAL) << "column pb does not have aggregation" - << " column id " << column_pb.unique_id(); - } - column_msg->set_aggregation(column_pb.aggregation()); - if (!column_pb.has_length()) { - LOG(FATAL) << "column pb does not have length" - << " column id " << column_pb.unique_id(); - } - column_msg->set_length(column_pb.length()); - if (!column_pb.has_is_key()) { - LOG(FATAL) << "column pb does not have is_key" - << " column id " << column_pb.unique_id(); - } - column_msg->set_is_key(column_pb.is_key()); - if (column_pb.has_default_value()) { - column_msg->set_default_value(column_pb.default_value()); - } - if (column_pb.has_referenced_column()) { - column_msg->set_referenced_column(column_pb.referenced_column()); - } - if (column_pb.has_index_length()) { - column_msg->set_index_length(column_pb.index_length()); - } - if (column_pb.has_precision()) { - column_msg->set_precision(column_pb.precision()); - } - if (column_pb.has_frac()) { - column_msg->set_frac(column_pb.frac()); - } - if (column_pb.has_is_nullable()) { - column_msg->set_is_allow_null(column_pb.is_nullable()); - } - column_msg->set_unique_id(column_pb.unique_id()); - if (column_pb.has_is_bf_column()) { - column_msg->set_is_bf_column(column_pb.is_bf_column()); - } - if (column_pb.has_has_bitmap_index()) { - column_msg->set_has_bitmap_index(column_pb.has_bitmap_index()); - } - column_msg->set_is_root_column(true); - return OLAP_SUCCESS; -} - -OLAPStatus OlapSnapshotConverter::save(const string& file_path, - const OLAPHeaderMessage& olap_header) { - DCHECK(!file_path.empty()); - - FileHeader<OLAPHeaderMessage> file_header; - FileHandler file_handler; - - if (file_handler.open_with_mode(file_path.c_str(), O_CREAT | O_WRONLY | O_TRUNC, - S_IRUSR | S_IWUSR) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to open header file. file='" << file_path; - return OLAP_ERR_IO_ERROR; - } - - try { - file_header.mutable_message()->CopyFrom(olap_header); - } catch (...) { - LOG(WARNING) << "fail to copy protocol buffer object. file='" << file_path; - return OLAP_ERR_OTHER_ERROR; - } - - if (file_header.prepare(&file_handler) != OLAP_SUCCESS || - file_header.serialize(&file_handler) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to serialize to file header. file='" << file_path; - return OLAP_ERR_SERIALIZE_PROTOBUF_ERROR; - } - - return OLAP_SUCCESS; -} - -void OlapSnapshotConverter::_modify_old_segment_group_id(RowsetMetaPB& rowset_meta) { - if (!rowset_meta.has_alpha_rowset_extra_meta_pb()) { - return; - } - AlphaRowsetExtraMetaPB* alpha_rowset_extra_meta_pb = - rowset_meta.mutable_alpha_rowset_extra_meta_pb(); - for (auto& segment_group_pb : alpha_rowset_extra_meta_pb->segment_groups()) { - if (segment_group_pb.segment_group_id() == -1) { - // check if segment groups size == 1 - if (alpha_rowset_extra_meta_pb->segment_groups().size() != 1) { - LOG(FATAL) << "the rowset has a segment group's id == -1 but it contains more than " - "one segment group" - << " it should not happen"; - } - (const_cast<SegmentGroupPB&>(segment_group_pb)).set_segment_group_id(0); - } - } -} - -} // namespace doris diff --git a/be/src/olap/olap_snapshot_converter.h b/be/src/olap/olap_snapshot_converter.h deleted file mode 100644 index 05a8233..0000000 --- a/be/src/olap/olap_snapshot_converter.h +++ /dev/null @@ -1,65 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef DORIS_BE_SRC_OLAP_OLAP_SNAPSHOT_CONVERTER_H -#define DORIS_BE_SRC_OLAP_OLAP_SNAPSHOT_CONVERTER_H - -#include <functional> -#include <map> -#include <string> - -#include "gen_cpp/olap_file.pb.h" -#include "olap/data_dir.h" -#include "olap/delete_handler.h" -#include "olap/olap_common.h" -#include "olap/olap_define.h" -#include "olap/rowset/rowset.h" -#include "olap/rowset/rowset_meta.h" -#include "olap/tablet_schema.h" - -using std::ifstream; -using std::string; -using std::vector; - -namespace doris { - -class OlapSnapshotConverter { -public: - - OLAPStatus convert_to_pdelta(const RowsetMetaPB& rowset_meta_pb, PDelta* delta); - - OLAPStatus convert_to_rowset_meta(const PDelta& delta, const RowsetId& rowset_id, - int64_t tablet_id, int32_t schema_hash, - RowsetMetaPB* rowset_meta_pb); - - OLAPStatus convert_to_rowset_meta(const PPendingDelta& pending_delta, const RowsetId& rowset_id, - int64_t tablet_id, int32_t schema_hash, - RowsetMetaPB* rowset_meta_pb); - - OLAPStatus to_column_pb(const ColumnMessage& column_msg, ColumnPB* column_pb); - - OLAPStatus to_column_msg(const ColumnPB& column_pb, ColumnMessage* column_msg); - - OLAPStatus save(const string& file_path, const OLAPHeaderMessage& olap_header); - -private: - void _modify_old_segment_group_id(RowsetMetaPB& rowset_meta); -}; - -} // namespace doris - -#endif // DORIS_BE_SRC_OLAP_OLAP_SNAPSHOT_CONVERTER_H diff --git a/be/src/olap/rowset/alpha_rowset.h b/be/src/olap/rowset/alpha_rowset.h index 84b17ad..78ad5f1 100644 --- a/be/src/olap/rowset/alpha_rowset.h +++ b/be/src/olap/rowset/alpha_rowset.h @@ -33,7 +33,6 @@ class AlphaRowset; using AlphaRowsetSharedPtr = std::shared_ptr<AlphaRowset>; class AlphaRowsetWriter; class AlphaRowsetReader; -class OlapSnapshotConverter; class RowsetFactory; class AlphaRowset : public Rowset { @@ -89,7 +88,6 @@ private: private: friend class AlphaRowsetWriter; friend class AlphaRowsetReader; - friend class OlapSnapshotConverter; std::vector<std::shared_ptr<SegmentGroup>> _segment_groups; }; diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp index 3789a1e..52836f7 100644 --- a/be/src/olap/snapshot_manager.cpp +++ b/be/src/olap/snapshot_manager.cpp @@ -29,7 +29,6 @@ #include "env/env.h" #include "gen_cpp/Types_constants.h" -#include "olap/olap_snapshot_converter.h" #include "olap/rowset/alpha_rowset_meta.h" #include "olap/rowset/rowset.h" #include "olap/rowset/rowset_converter.h" diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index aeac350..201bffb 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -42,7 +42,6 @@ #include "olap/fs/file_block_manager.h" #include "olap/lru_cache.h" #include "olap/memtable_flush_executor.h" -#include "olap/olap_snapshot_converter.h" #include "olap/push_handler.h" #include "olap/reader.h" #include "olap/rowset/alpha_rowset.h" diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index 25a863c..71e73a3 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -26,7 +26,6 @@ #include "gutil/strings/stringpiece.h" #include "gutil/strings/substitute.h" #include "http/http_client.h" -#include "olap/olap_snapshot_converter.h" #include "olap/rowset/rowset.h" #include "olap/rowset/rowset_factory.h" #include "olap/snapshot_manager.h" diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h index 87d3683..bc09fe8 100644 --- a/be/src/vec/io/io_helper.h +++ b/be/src/vec/io/io_helper.h @@ -33,11 +33,13 @@ #include "vec/io/var_int.h" #include "vec/runtime/vdatetime_value.h" -#define DEFAULT_MAX_STRING_SIZE (1ULL << 30) -#define WRITE_HELPERS_MAX_INT_WIDTH 40U - namespace doris::vectorized { +// Define in the namespace and avoid defining global macros, +// because it maybe conflict with other libs +static constexpr size_t DEFAULT_MAX_STRING_SIZE = 1073741824; // 1GB +static constexpr auto WRITE_HELPERS_MAX_INT_WIDTH = 40U; + template <typename T> inline T decimal_scale_multiplier(UInt32 scale); template <> diff --git a/be/test/olap/CMakeLists.txt b/be/test/olap/CMakeLists.txt index 442314c..5e4d15f 100644 --- a/be/test/olap/CMakeLists.txt +++ b/be/test/olap/CMakeLists.txt @@ -77,7 +77,6 @@ ADD_BE_TEST(rowset/alpha_rowset_test) ADD_BE_TEST(rowset/beta_rowset_test) ADD_BE_TEST(rowset/unique_rowset_id_generator_test) ADD_BE_TEST(rowset/rowset_converter_test) -# ADD_BE_TEST(olap_snapshot_converter_test) ADD_BE_TEST(txn_manager_test) ADD_BE_TEST(generic_iterators_test) ADD_BE_TEST(key_coder_test) diff --git a/be/test/olap/olap_snapshot_converter_test.cpp b/be/test/olap/olap_snapshot_converter_test.cpp deleted file mode 100644 index ab5a4e4..0000000 --- a/be/test/olap/olap_snapshot_converter_test.cpp +++ /dev/null @@ -1,250 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "olap/olap_snapshot_converter.h" - -#include <boost/algorithm/string.hpp> -#include <filesystem> -#include <fstream> -#include <iostream> -#include <sstream> -#include <string> - -#include "gmock/gmock.h" -#include "gtest/gtest.h" -#include "json2pb/json_to_pb.h" -#include "olap/lru_cache.h" -#include "olap/olap_meta.h" -#include "olap/rowset/alpha_rowset.h" -#include "olap/rowset/alpha_rowset_meta.h" -#include "olap/rowset/rowset_meta_manager.h" -#include "olap/storage_engine.h" -#include "olap/txn_manager.h" -#include "util/file_utils.h" - -#ifndef BE_TEST -#define BE_TEST -#endif - -using ::testing::_; -using ::testing::Return; -using ::testing::SetArgPointee; -using std::string; - -namespace doris { - -static StorageEngine* k_engine = nullptr; - -class OlapSnapshotConverterTest : public testing::Test { -public: - virtual void SetUp() { - config::tablet_map_shard_size = 1; - config::txn_map_shard_size = 1; - config::txn_shard_size = 1; - EngineOptions options; - // won't open engine, options.path is needless - options.backend_uid = UniqueId::gen_uid(); - if (k_engine == nullptr) { - k_engine = new StorageEngine(options); - } - - string test_engine_data_path = "./be/test/olap/test_data/converter_test_data/data"; - _engine_data_path = "./be/test/olap/test_data/converter_test_data/tmp"; - std::filesystem::remove_all(_engine_data_path); - FileUtils::create_dir(_engine_data_path); - - _data_dir = new DataDir(_engine_data_path, 1000000000); - _data_dir->init(); - _meta_path = "./meta"; - string tmp_data_path = _engine_data_path + "/data"; - if (std::filesystem::exists(tmp_data_path)) { - std::filesystem::remove_all(tmp_data_path); - } - FileUtils::copy_file(test_engine_data_path, tmp_data_path); - _tablet_id = 15007; - _schema_hash = 368169781; - _tablet_data_path = tmp_data_path + "/" + std::to_string(0) + "/" + - std::to_string(_tablet_id) + "/" + std::to_string(_schema_hash); - if (std::filesystem::exists(_meta_path)) { - std::filesystem::remove_all(_meta_path); - } - ASSERT_TRUE(std::filesystem::create_directory(_meta_path)); - ASSERT_TRUE(std::filesystem::exists(_meta_path)); - _meta = new (std::nothrow) OlapMeta(_meta_path); - ASSERT_NE(nullptr, _meta); - OLAPStatus st = _meta->init(); - ASSERT_TRUE(st == OLAP_SUCCESS); - } - - virtual void TearDown() { - delete _meta; - delete _data_dir; - if (std::filesystem::exists(_meta_path)) { - ASSERT_TRUE(std::filesystem::remove_all(_meta_path)); - } - if (std::filesystem::exists(_engine_data_path)) { - ASSERT_TRUE(std::filesystem::remove_all(_engine_data_path)); - } - } - -private: - DataDir* _data_dir; - OlapMeta* _meta; - std::string _json_rowset_meta; - std::string _engine_data_path; - std::string _meta_path; - int64_t _tablet_id; - int32_t _schema_hash; - string _tablet_data_path; -}; - -TEST_F(OlapSnapshotConverterTest, ToNewAndToOldSnapshot) { - // --- start to convert old snapshot to new snapshot - string header_file_path = _tablet_data_path + "/" + "olap_header.json"; - std::ifstream infile(header_file_path); - string buffer; - std::string json_header; - while (getline(infile, buffer)) { - json_header = json_header + buffer; - } - boost::algorithm::trim(json_header); - OLAPHeaderMessage header_msg; - bool ret = json2pb::JsonToProtoMessage(json_header, &header_msg); - ASSERT_TRUE(ret); - OlapSnapshotConverter converter; - TabletMetaPB tablet_meta_pb; - std::vector<RowsetMetaPB> pending_rowsets; - OLAPStatus status = converter.to_new_snapshot(header_msg, _tablet_data_path, _tablet_data_path, - &tablet_meta_pb, &pending_rowsets, true); - ASSERT_TRUE(status == OLAP_SUCCESS); - - TabletSchema tablet_schema; - tablet_schema.init_from_pb(tablet_meta_pb.schema()); - string data_path_prefix = _data_dir->get_absolute_tablet_path( - tablet_meta_pb.shard_id(), tablet_meta_pb.tablet_id(), tablet_meta_pb.schema_hash()); - // check converted new tabletmeta pb and its files - // check visible delta - ASSERT_TRUE(tablet_meta_pb.rs_metas().size() == header_msg.delta().size()); - for (auto& pdelta : header_msg.delta()) { - int64_t start_version = pdelta.start_version(); - int64_t end_version = pdelta.end_version(); - bool found = false; - for (auto& visible_rowset : tablet_meta_pb.rs_metas()) { - if (visible_rowset.start_version() == start_version && - visible_rowset.end_version() == end_version) { - found = true; - } - } - ASSERT_TRUE(found); - } - for (auto& visible_rowset : tablet_meta_pb.rs_metas()) { - RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta()); - alpha_rowset_meta->init_from_pb(visible_rowset); - AlphaRowset rowset(&tablet_schema, data_path_prefix, alpha_rowset_meta); - ASSERT_TRUE(rowset.init() == OLAP_SUCCESS); - ASSERT_TRUE(rowset.load() == OLAP_SUCCESS); - std::vector<std::string> old_files; - rowset.remove_old_files(&old_files); - } - // check incremental delta - ASSERT_TRUE(tablet_meta_pb.inc_rs_metas().size() == header_msg.incremental_delta().size()); - for (auto& pdelta : header_msg.incremental_delta()) { - int64_t start_version = pdelta.start_version(); - int64_t end_version = pdelta.end_version(); - int64_t version_hash = pdelta.version_hash(); - bool found = false; - for (auto& inc_rowset : tablet_meta_pb.inc_rs_metas()) { - if (inc_rowset.start_version() == start_version && - inc_rowset.end_version() == end_version && - inc_rowset.version_hash() == version_hash) { - found = true; - } - } - ASSERT_TRUE(found); - } - for (auto& inc_rowset : tablet_meta_pb.inc_rs_metas()) { - RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta()); - alpha_rowset_meta->init_from_pb(inc_rowset); - AlphaRowset rowset(&tablet_schema, data_path_prefix, alpha_rowset_meta); - ASSERT_TRUE(rowset.init() == OLAP_SUCCESS); - ASSERT_TRUE(rowset.load() == OLAP_SUCCESS); - AlphaRowset tmp_rowset(&tablet_schema, data_path_prefix + "/incremental_delta", - alpha_rowset_meta); - ASSERT_TRUE(tmp_rowset.init() == OLAP_SUCCESS); - std::vector<std::string> old_files; - tmp_rowset.remove_old_files(&old_files); - } - // check pending delta - ASSERT_TRUE(pending_rowsets.size() == header_msg.pending_delta().size()); - for (auto& pdelta : header_msg.pending_delta()) { - int64_t partition_id = pdelta.partition_id(); - int64_t transaction_id = pdelta.transaction_id(); - bool found = false; - for (auto& pending_rowset : pending_rowsets) { - if (pending_rowset.partition_id() == partition_id && - pending_rowset.txn_id() == transaction_id && - pending_rowset.tablet_uid().hi() == tablet_meta_pb.tablet_uid().hi() && - pending_rowset.tablet_uid().lo() == tablet_meta_pb.tablet_uid().lo()) { - found = true; - } - } - ASSERT_TRUE(found); - } - for (auto& pending_rowset : pending_rowsets) { - RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta()); - alpha_rowset_meta->init_from_pb(pending_rowset); - AlphaRowset rowset(&tablet_schema, data_path_prefix, alpha_rowset_meta); - ASSERT_TRUE(rowset.init() == OLAP_SUCCESS); - ASSERT_TRUE(rowset.load() == OLAP_SUCCESS); - std::vector<std::string> old_files; - rowset.remove_old_files(&old_files); - } - - // old files are removed, then convert new snapshot to old snapshot - OLAPHeaderMessage old_header_msg; - status = converter.to_old_snapshot(tablet_meta_pb, _tablet_data_path, _tablet_data_path, - &old_header_msg); - ASSERT_TRUE(status == OLAP_SUCCESS); - for (auto& pdelta : header_msg.delta()) { - bool found = false; - for (auto& converted_pdelta : old_header_msg.delta()) { - if (converted_pdelta.start_version() == pdelta.start_version() && - converted_pdelta.end_version() == pdelta.end_version()) { - found = true; - } - } - ASSERT_TRUE(found); - } - for (auto& pdelta : header_msg.incremental_delta()) { - bool found = false; - for (auto& converted_pdelta : old_header_msg.incremental_delta()) { - if (converted_pdelta.start_version() == pdelta.start_version() && - converted_pdelta.end_version() == pdelta.end_version() && - converted_pdelta.version_hash() == pdelta.version_hash()) { - found = true; - } - } - ASSERT_TRUE(found); - } -} - -} // namespace doris - -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 8dce011..5f3b226 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -30,37 +30,6 @@ message ZoneMap { optional bool null_flag = 3; } -message DeltaPruning { - repeated ZoneMap zone_maps = 1; -} - -// define OLAP FileVersion Message, Base, delta and cumulative delta -// are both instance of Version -// Base Version: (start_version == 0 && end_version > start_version) || [0, 0] -// Cumulative Version: (start_version >= 1 && end_version > start_version) -// Delta Version: start_version == end_version -message FileVersionMessage { // Deprecated, Use PDelta instead - required uint32 num_segments = 1 [default = 0]; - required int32 start_version = 2; - required int32 end_version = 3; - required int64 version_hash = 4 [default = 0]; - required int64 max_timestamp = 5 [default = 0]; - required int64 index_size = 6 [default = 0]; - required int64 data_size = 7 [default = 0]; - optional int64 num_rows = 8 [default = 0]; - required int64 creation_time = 9 [default = 0]; - optional DeltaPruning delta_pruning = 10; -} - -message PDelta { - required int64 start_version = 1; // RowsetMetaPB.start_version - required int64 end_version = 2; // RowsetMetaPB.end_version - required int64 version_hash = 3; // RowsetMetaPB.version_hash - required int64 creation_time = 4; // RowsetMetaPB.creation_time - repeated SegmentGroupPB segment_group = 5; // RowsetMetaPB.extra_properties - optional DeletePredicatePB delete_condition = 6; // RowsetMetaPB.delete_predicate -} - enum RowsetTypePB { ALPHA_ROWSET = 0; // doris原有的列存格式 BETA_ROWSET = 1; // 新列存 @@ -140,33 +109,6 @@ message SegmentGroupPB { optional PUniqueId load_id = 8; } -message PPendingDelta { - required int64 partition_id = 1; // RowsetMetaPB.partition_id - required int64 transaction_id = 2; // RowsetMetaPB.txn_id - required int64 creation_time = 3; // RowsetMetaPB.creation_time - repeated PendingSegmentGroupPB pending_segment_group = 4; // RowsetMetaPB.extra_properties - optional DeletePredicatePB delete_condition = 5; // RowsetMetaPB.delete_predicate -} - -message PendingSegmentGroupPB { - required int32 pending_segment_group_id = 1; - required int32 num_segments = 2; - required PUniqueId load_id = 3; - repeated ZoneMap zone_maps = 4; - optional bool empty = 5; -} - -message SchemaChangeStatusMessage { - required int64 related_tablet_id = 1; - required int32 related_schema_hash = 2; - - repeated FileVersionMessage versions_to_be_changed = 3; // Deprecated. Use PDelta instead - - required int32 schema_change_type = 4; - - repeated PDelta versions_to_changed = 5; -} - enum DataFileType { OLAP_DATA_FILE = 0; //Deprecated. Only columnar-wise format is supported. COLUMN_ORIENTED_FILE = 1; @@ -190,38 +132,6 @@ message InPredicatePB { repeated string values = 3; } -message OLAPHeaderMessage { - required uint32 num_rows_per_data_block = 1; // TabletSchemaPB.num_rows_per_row_block - - repeated FileVersionMessage file_version = 2; // Deprecated. Use PDelta instead after stream load - - required int32 cumulative_layer_point = 3; // TabletMetaPB.cumulative_layer_point - required uint32 num_short_key_fields = 4; // TabletSchemaPB.num_short_key_columns - repeated ColumnMessage column = 5; // TabletSchemaPB.column - required int64 creation_time = 6; // TabletMetaPB.creation_time - repeated int32 selectivity = 7; // Deprecated. - optional SchemaChangeStatusMessage schema_change_status = 8; // TabletMetaPB.alter_task - optional DataFileType data_file_type = 9 [default = OLAP_DATA_FILE]; // ? only column oriented - optional uint32 next_column_unique_id = 10 [default = 0]; // TabletSchemaPB.next_column_unique_id - optional CompressKind compress_kind = 11 [default = COMPRESS_LZO]; // TabletSchemaPB.compress_kind - optional uint32 segment_size = 12 [default = 4292870144]; // ? not used - repeated DeletePredicatePB delete_data_conditions = 13; // not serialized any more, just read from PDelta - // bloom filter false positive probability - optional double bf_fpp = 14; // TabletSchemaPB.bf_fpp - optional KeysType keys_type = 15; // TabletSchemaPB.keys_type - repeated PDelta delta = 16; // TabletMetaPB.rs_metas - repeated PPendingDelta pending_delta = 17; // need write to olap meta store - // @Deprecated - repeated PDelta incremental_delta = 18; // TabletMetaPB.inc_rs_metas - - // if true, this tablet will not do compaction, - // and does not create init version - optional bool in_restore_mode = 19 [default = false]; // TabletMetaPB.is_restore_mode - optional int64 tablet_id = 20; // TabletMetaPB.tablet_id - optional int32 schema_hash = 21; // TabletMetaPB.schema_hash? int32 vs int64 - optional uint64 shard_id = 22; // TabletMetaPB.shard_id? int64 vs int32 -} - enum AlterTabletState { ALTER_PREPARED = 0; ALTER_RUNNING = 1; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org