github-actions[bot] commented on code in PR #33298:
URL: https://github.com/apache/doris/pull/33298#discussion_r1554806695
##########
be/src/cloud/pb_convert.h:
##########
@@ -17,17 +17,23 @@
#include <gen_cpp/olap_file.pb.h>
Review Comment:
warning: 'gen_cpp/olap_file.pb.h' file not found [clang-diagnostic-error]
```cpp
#include <gen_cpp/olap_file.pb.h>
^
```
##########
cloud/src/meta-service/meta_service.cpp:
##########
@@ -1066,14 +1067,11 @@ void
MetaServiceImpl::commit_rowset(::google::protobuf::RpcController* controlle
DCHECK(rowset_meta.tablet_schema().has_schema_version());
DCHECK_GE(rowset_meta.tablet_schema().schema_version(), 0);
rowset_meta.set_schema_version(rowset_meta.tablet_schema().schema_version());
- std::string schema_key;
- if (rowset_meta.has_variant_type_in_schema()) {
- // encodes schema in a seperate kv, since variant schema is
volatile
- schema_key = meta_rowset_schema_key(
- {instance_id, rowset_meta.tablet_id(),
rowset_meta.rowset_id_v2()});
- } else {
- schema_key = meta_schema_key(
+ std::string schema_key = meta_schema_key(
{instance_id, rowset_meta.index_id(),
rowset_meta.schema_version()});
+ if (rowset_meta.has_variant_type_in_schema()) {
+ std::tie(code, msg) = write_schema_dict(instance_id, txn.get(),
&rowset_meta);
+ if (code != MetaServiceCode::OK) return;
Review Comment:
warning: statement should be inside braces
[readability-braces-around-statements]
```suggestion
if (code != MetaServiceCode::OK) { return;
}
```
##########
cloud/src/meta-service/meta_service_schema.cpp:
##########
@@ -18,6 +18,14 @@
#include "meta-service/meta_service_schema.h"
Review Comment:
warning: 'meta-service/meta_service_schema.h' file not found
[clang-diagnostic-error]
```cpp
#include "meta-service/meta_service_schema.h"
^
```
##########
cloud/src/meta-service/meta_service_schema.cpp:
##########
@@ -119,4 +127,163 @@
return buf.to_pb(schema);
}
+// Map item to dictionary key, and add key to rowset meta, if it is a new one,
generate it and increase item id
+// Need to remove dynamic parts from original RowsetMeta's TabletSchema, to
make fdb schema kv stable
+template<typename ItemPB>
+void process_dictionary(
+ SchemaCloudDictionary& dict,
+ const google::protobuf::Map<int32_t, ItemPB>& item_dict,
+ google::protobuf::RepeatedPtrField<ItemPB>* result,
+ RowsetMetaCloudPB* rowset_meta,
+ const google::protobuf::RepeatedPtrField<ItemPB>& items,
+ const std::function<bool(const ItemPB&)>& filter,
+ const std::function<void(int32_t)>& add_dict_key_fn) {
+ if (items.empty()) {
+ return;
+ }
+ // Use deterministic method to do serialization since structure like
+ // `google::protobuf::Map`'s serialization is unstable
+ auto serialize_fn = [](const ItemPB& item) -> std::string {
+ std::string output;
+ google::protobuf::io::StringOutputStream string_output_stream(&output);
+ google::protobuf::io::CodedOutputStream
output_stream(&string_output_stream);
+ output_stream.SetSerializationDeterministic(true);
+ item.SerializeToCodedStream(&output_stream);
+ return output;
+ };
+
+ google::protobuf::RepeatedPtrField<ItemPB> none_ext_items;
+ std::unordered_map<std::string, int> reversed_dict;
+ for (const auto& [key, val] : item_dict) {
+ reversed_dict[serialize_fn(val)] = key;
+ }
+
+ for (const auto& item : items) {
+ if (filter(item)) {
+ // Filter none extended items, mainly extended columns and
extended indexes
+ *none_ext_items.Add() = item;
+ continue;
+ }
+ const std::string serialized_key = serialize_fn(item);
+ auto it = reversed_dict.find(serialized_key);
+ if (it != reversed_dict.end()) {
+ // Add existed dict key to related dict
+ add_dict_key_fn(it->second);
+ } else {
+ // Add new dictionary key-value pair and update
current_xxx_dict_id.
+ int64_t current_dict_id = 0;
+ if constexpr (std::is_same_v<ItemPB, ColumnPB>) {
+ current_dict_id = dict.current_column_dict_id() + 1;
+ dict.set_current_column_dict_id(current_dict_id);
+ dict.mutable_column_dict()->emplace(current_dict_id, item);
+ }
+ if constexpr (std::is_same_v<ItemPB, doris::TabletIndexPB>) {
+ current_dict_id = dict.current_index_dict_id() + 1;
+ dict.set_current_index_dict_id(current_dict_id);
+ dict.mutable_index_dict()->emplace(current_dict_id, item);
+ }
+ add_dict_key_fn(current_dict_id);
+ reversed_dict[serialized_key] = current_dict_id;
+ // LOG(INFO) << "Add dict key = " << current_dict_id << " dict
value = " << item.ShortDebugString();
+ }
+ }
+ if (result != nullptr) {
+ result->Swap(&none_ext_items);
+ }
+}
+
+// Writes schema dictionary metadata to RowsetMetaCloudPB.
+// Schema was extended in BE side, we need to reset schema to original
frontend schema and store
+// such restored schema in fdb. And also add extra dict key info to
RowsetMetaCloudPB.
+std::pair<MetaServiceCode, std::string> write_schema_dict(
Review Comment:
warning: function 'write_schema_dict' exceeds recommended size/complexity
thresholds [readability-function-size]
```cpp
std::pair<MetaServiceCode, std::string> write_schema_dict(
^
```
<details>
<summary>Additional context</summary>
**cloud/src/meta-service/meta_service_schema.cpp:197:** 86 lines including
whitespace and comments (threshold 80)
```cpp
std::pair<MetaServiceCode, std::string> write_schema_dict(
^
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]