github-actions[bot] commented on code in PR #17594:
URL: https://github.com/apache/doris/pull/17594#discussion_r1130374070


##########
be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp:
##########
@@ -43,6 +43,17 @@
             assert_cast<ColumnDictI32&>(*doris_column)
                     .insert_many_dict_data(&dict_items[0], dict_items.size());
         }
+        if (doris_column->is_column_dictionary()) {
+            ColumnDictI32& dict_column = 
assert_cast<ColumnDictI32&>(*doris_column);
+            if (dict_column.is_copy_dict_to_column() && 
dict_column.dict_size() == 0) {
+                std::vector<StringRef> dict_items;
+                dict_items.reserve(_dict_items.size());
+                for (int i = 0; i < _dict_items.size(); ++i) {
+                    dict_items.emplace_back((char*)(&_dict_items[i]), 
_type_length);
+                }
+                dict_column.insert_many_dict_data(&dict_items[0], 
dict_items.size());
+            }
+        }
         _indexes.resize(non_null_size);

Review Comment:
   warning: use of undeclared identifier '_indexes' [clang-diagnostic-error]
   ```cpp
           _indexes.resize(non_null_size);
           ^
   ```
   



##########
be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp:
##########
@@ -43,6 +43,17 @@
             assert_cast<ColumnDictI32&>(*doris_column)
                     .insert_many_dict_data(&dict_items[0], dict_items.size());
         }
+        if (doris_column->is_column_dictionary()) {
+            ColumnDictI32& dict_column = 
assert_cast<ColumnDictI32&>(*doris_column);
+            if (dict_column.is_copy_dict_to_column() && 
dict_column.dict_size() == 0) {
+                std::vector<StringRef> dict_items;
+                dict_items.reserve(_dict_items.size());
+                for (int i = 0; i < _dict_items.size(); ++i) {
+                    dict_items.emplace_back((char*)(&_dict_items[i]), 
_type_length);
+                }
+                dict_column.insert_many_dict_data(&dict_items[0], 
dict_items.size());
+            }
+        }
         _indexes.resize(non_null_size);
         _index_batch_decoder->GetBatch(&_indexes[0], non_null_size);

Review Comment:
   warning: use of undeclared identifier '_index_batch_decoder' 
[clang-diagnostic-error]
   ```cpp
           _index_batch_decoder->GetBatch(&_indexes[0], non_null_size);
           ^
   ```
   



##########
be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp:
##########
@@ -43,6 +43,17 @@ class FixLengthDictDecoder final : public BaseDictDecoder {
             assert_cast<ColumnDictI32&>(*doris_column)
                     .insert_many_dict_data(&dict_items[0], dict_items.size());
         }
+        if (doris_column->is_column_dictionary()) {
+            ColumnDictI32& dict_column = 
assert_cast<ColumnDictI32&>(*doris_column);
+            if (dict_column.is_copy_dict_to_column() && 
dict_column.dict_size() == 0) {
+                std::vector<StringRef> dict_items;
+                dict_items.reserve(_dict_items.size());
+                for (int i = 0; i < _dict_items.size(); ++i) {
+                    dict_items.emplace_back((char*)(&_dict_items[i]), 
_type_length);

Review Comment:
   warning: use of undeclared identifier '_type_length' [clang-diagnostic-error]
   ```cpp
                       dict_items.emplace_back((char*)(&_dict_items[i]), 
_type_length);
                                                                         ^
   ```
   



##########
be/src/vec/exec/format/parquet/vparquet_group_reader.cpp:
##########
@@ -608,4 +907,77 @@ ParquetColumnReader::Statistics 
RowGroupReader::statistics() {
     return st;
 }
 
+// TODO Performance Optimization
+Status RowGroupReader::_execute_conjuncts(const std::vector<VExprContext*>& 
ctxs,
+                                          const std::vector<IColumn::Filter*>& 
filters,
+                                          Block* block, IColumn::Filter* 
result_filter,
+                                          bool* can_filter_all) {
+    *can_filter_all = false;
+    auto* __restrict result_filter_data = result_filter->data();
+    for (auto* ctx : ctxs) {
+        int result_column_id = -1;
+        RETURN_IF_ERROR(ctx->execute(block, &result_column_id));
+        ColumnPtr& filter_column = 
block->get_by_position(result_column_id).column;
+        if (auto* nullable_column = 
check_and_get_column<ColumnNullable>(*filter_column)) {
+            size_t column_size = nullable_column->size();
+            if (column_size == 0) {
+                *can_filter_all = true;
+                return Status::OK();
+            } else {
+                const ColumnPtr& nested_column = 
nullable_column->get_nested_column_ptr();
+                const IColumn::Filter& filter =
+                        assert_cast<const 
ColumnUInt8&>(*nested_column).get_data();
+                auto* __restrict filter_data = filter.data();
+                const size_t size = filter.size();
+                auto* __restrict null_map_data = 
nullable_column->get_null_map_data().data();
+
+                for (size_t i = 0; i < size; ++i) {
+                    result_filter_data[i] &= (!null_map_data[i]) & 
filter_data[i];
+                }
+            }
+        } else if (auto* const_column = 
check_and_get_column<ColumnConst>(*filter_column)) {
+            // filter all
+            if (!const_column->get_bool(0)) {
+                *can_filter_all = true;
+                return Status::OK();
+            }
+        } else {
+            const IColumn::Filter& filter =
+                    assert_cast<const ColumnUInt8&>(*filter_column).get_data();
+            auto* __restrict filter_data = filter.data();
+
+            const size_t size = filter.size();
+            for (size_t i = 0; i < size; ++i) {
+                result_filter_data[i] &= filter_data[i];
+            }
+        }
+    }
+    for (auto* filter : filters) {
+        auto* __restrict filter_data = filter->data();
+        const size_t size = filter->size();
+        for (size_t i = 0; i < size; ++i) {
+            result_filter_data[i] &= filter_data[i];
+        }
+    }
+    return Status::OK();
+}
+
+// TODO Performance Optimization
+Status RowGroupReader::_execute_conjuncts_and_filter_block(
+        const std::vector<VExprContext*>& ctxs, const 
std::vector<IColumn::Filter*>& filters,
+        Block* block, std::vector<uint32_t>& columns_to_filter, int 
column_to_keep) {
+    IColumn::Filter result_filter(block->rows(), 1);
+    bool can_filter_all;
+    RETURN_IF_ERROR(_execute_conjuncts(ctxs, filters, block, &result_filter, 
&can_filter_all));
+    if (can_filter_all) {
+        for (auto& col : columns_to_filter) {
+            
std::move(*block->get_by_position(col).column).assume_mutable()->clear();

Review Comment:
   warning: std::move of the const expression has no effect; remove std::move() 
[performance-move-const-arg]
   
   ```suggestion
               *block->get_by_position(col).column.assume_mutable()->clear();
   ```
   



##########
be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp:
##########
@@ -43,6 +43,17 @@
             assert_cast<ColumnDictI32&>(*doris_column)
                     .insert_many_dict_data(&dict_items[0], dict_items.size());
         }
+        if (doris_column->is_column_dictionary()) {
+            ColumnDictI32& dict_column = 
assert_cast<ColumnDictI32&>(*doris_column);
+            if (dict_column.is_copy_dict_to_column() && 
dict_column.dict_size() == 0) {
+                std::vector<StringRef> dict_items;
+                dict_items.reserve(_dict_items.size());
+                for (int i = 0; i < _dict_items.size(); ++i) {
+                    dict_items.emplace_back((char*)(&_dict_items[i]), 
_type_length);
+                }
+                dict_column.insert_many_dict_data(&dict_items[0], 
dict_items.size());
+            }
+        }
         _indexes.resize(non_null_size);
         _index_batch_decoder->GetBatch(&_indexes[0], non_null_size);

Review Comment:
   warning: use of undeclared identifier '_indexes' [clang-diagnostic-error]
   ```cpp
           _index_batch_decoder->GetBatch(&_indexes[0], non_null_size);
                                           ^
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to