morningman commented on code in PR #12048:
URL: https://github.com/apache/doris/pull/12048#discussion_r958461562


##########
be/src/vec/exec/scan/vscanner.h:
##########
@@ -151,6 +154,11 @@ class VScanner {
     // watch to count the time wait for scanner thread
     MonotonicStopWatch _watch;
     int64_t _scanner_wait_worker_timer = 0;
+    int64_t _raw_rows_read = 0;

Review Comment:
   Not used?



##########
be/src/vec/exec/scan/new_file_scanner.h:
##########
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exec/text_converter.h"
+#include "exprs/bloomfilter_predicate.h"
+#include "exprs/function_filter.h"
+#include "vec/exec/scan/vscanner.h"
+
+namespace doris::vectorized {
+
+class NewFileScanNode;
+
+class NewFileScanner : public VScanner {
+public:
+    NewFileScanner(RuntimeState* state, NewFileScanNode* parent, int64_t limit,
+                   const TFileScanRange& scan_range, MemTracker* tracker, 
RuntimeProfile* profile);
+
+    Status open(RuntimeState *state) override;
+
+    Status prepare(VExprContext** vconjunct_ctx_ptr);
+
+protected:
+    virtual void _init_profiles(RuntimeProfile* profile) = 0;
+
+    Status finalize_block(vectorized::Block* dest_block, bool* eof);
+    Status _fill_columns_from_path(vectorized::Block* output_block, size_t 
rows);
+    Status init_block(vectorized::Block* block);
+
+    std::unique_ptr<TextConverter> _text_converter;
+
+//    RuntimeState* _state;
+    const TFileScanRangeParams& _params;
+
+    const std::vector<TFileRangeDesc>& _ranges;
+    int _next_range;
+
+    // Used for constructing tuple
+    std::vector<SlotDescriptor*> _required_slot_descs;
+    std::vector<SlotDescriptor*> _file_slot_descs;
+    std::map<SlotId, int> _file_slot_index_map;
+    std::vector<SlotDescriptor*> _partition_slot_descs;
+    std::map<SlotId, int> _partition_slot_index_map;
+
+    std::unique_ptr<RowDescriptor> _row_desc;
+
+    // Mem pool used to allocate _src_tuple and _src_tuple_row
+    std::unique_ptr<MemPool> _mem_pool;
+
+    const std::vector<TExpr> _pre_filter_texprs;
+
+    // Profile
+    RuntimeProfile* _profile;
+    RuntimeProfile::Counter* _rows_read_counter;
+    RuntimeProfile::Counter* _read_timer;
+
+    bool _scanner_eof = false;
+    int _rows = 0;
+    long _read_row_counter = 0;
+
+    std::unique_ptr<vectorized::VExprContext*> _vpre_filter_ctx_ptr;
+    int _num_of_columns_from_file;
+
+    // to record which runtime filters have been used
+    std::vector<bool> _runtime_filter_marks;

Review Comment:
   Not used



##########
be/src/vec/exec/scan/new_file_scanner.cpp:
##########
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/scan/new_file_scanner.h"
+
+#include <fmt/format.h>
+
+#include <vec/data_types/data_type_factory.hpp>
+
+#include "common/logging.h"
+#include "common/utils.h"
+#include "exec/exec_node.h"
+#include "exec/text_converter.hpp"
+#include "exprs/expr_context.h"
+#include "runtime/descriptors.h"
+#include "runtime/raw_value.h"
+#include "runtime/runtime_state.h"
+#include "runtime/tuple.h"
+#include "vec/exec/scan/new_file_scan_node.h"
+
+namespace doris::vectorized {
+
+    NewFileScanner::NewFileScanner(RuntimeState* state, NewFileScanNode* 
parent, int64_t limit,
+                                   const TFileScanRange& scan_range, 
MemTracker* tracker, RuntimeProfile* profile)
+            : VScanner(state, static_cast<VScanNode*>(parent), limit, tracker),
+//            _state(state),

Review Comment:
   Removed



##########
be/src/vec/exec/scan/new_file_scan_node.cpp:
##########
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/scan/new_file_scan_node.h"
+
+#include "vec/columns/column_const.h"
+#include "vec/exec/scan/new_olap_scanner.h"
+#include "vec/functions/in.h"
+#include "vec/exec/scan/new_file_arrow_scanner.h"
+#include "vec/exec/scan/new_file_text_scanner.h"
+
+namespace doris::vectorized {
+
+NewFileScanNode::NewFileScanNode(ObjectPool* pool, const TPlanNode& tnode,
+                                 const DescriptorTbl& descs)
+        : VScanNode(pool, tnode, descs), _file_scan_node(tnode.file_scan_node) 
{
+    _output_tuple_id = tnode.file_scan_node.tuple_id;
+    LOG(INFO) << "Using NewFileScanNode";

Review Comment:
   Remove



##########
be/src/vec/exec/scan/vscan_node.h:
##########
@@ -34,7 +34,9 @@ class VSlotRef;
 class VScanNode : public ExecNode {
 public:
     VScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& 
descs)
-            : ExecNode(pool, tnode, descs), 
_runtime_filter_descs(tnode.runtime_filters) {}
+            : ExecNode(pool, tnode, descs), 
_runtime_filter_descs(tnode.runtime_filters) {
+        _runtime_filter_descs = tnode.runtime_filters;

Review Comment:
   Duplicated?



##########
be/src/vec/exec/scan/new_file_scanner.h:
##########
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exec/text_converter.h"
+#include "exprs/bloomfilter_predicate.h"
+#include "exprs/function_filter.h"
+#include "vec/exec/scan/vscanner.h"
+
+namespace doris::vectorized {
+
+class NewFileScanNode;
+
+class NewFileScanner : public VScanner {
+public:
+    NewFileScanner(RuntimeState* state, NewFileScanNode* parent, int64_t limit,
+                   const TFileScanRange& scan_range, MemTracker* tracker, 
RuntimeProfile* profile);
+
+    Status open(RuntimeState *state) override;
+
+    Status prepare(VExprContext** vconjunct_ctx_ptr);
+
+protected:
+    virtual void _init_profiles(RuntimeProfile* profile) = 0;
+
+    Status finalize_block(vectorized::Block* dest_block, bool* eof);
+    Status _fill_columns_from_path(vectorized::Block* output_block, size_t 
rows);
+    Status init_block(vectorized::Block* block);
+
+    std::unique_ptr<TextConverter> _text_converter;
+
+//    RuntimeState* _state;
+    const TFileScanRangeParams& _params;
+
+    const std::vector<TFileRangeDesc>& _ranges;
+    int _next_range;
+
+    // Used for constructing tuple
+    std::vector<SlotDescriptor*> _required_slot_descs;
+    std::vector<SlotDescriptor*> _file_slot_descs;
+    std::map<SlotId, int> _file_slot_index_map;
+    std::vector<SlotDescriptor*> _partition_slot_descs;
+    std::map<SlotId, int> _partition_slot_index_map;
+
+    std::unique_ptr<RowDescriptor> _row_desc;
+
+    // Mem pool used to allocate _src_tuple and _src_tuple_row
+    std::unique_ptr<MemPool> _mem_pool;

Review Comment:
   Not used?



##########
be/src/vec/exec/scan/new_file_scan_node.h:
##########
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "vec/exec/scan/vscan_node.h"
+
+namespace doris::vectorized {
+
+class NewFileScanNode : public VScanNode {
+public:
+    NewFileScanNode(ObjectPool* pool, const TPlanNode& tnode, const 
DescriptorTbl& descs);
+
+    Status prepare(RuntimeState* state) override;
+
+    void set_scan_ranges(const std::vector<TScanRangeParams>& scan_ranges) 
override;
+
+protected:
+    Status _init_profile() override;
+    Status _process_conjuncts() override;
+    Status _init_scanners(std::list<VScanner*>* scanners) override;
+private:
+    VScanner* create_scanner(const TFileScanRange& scan_range);

Review Comment:
   ```suggestion
       VScanner* _create_scanner(const TFileScanRange& scan_range);
   ```



##########
be/src/vec/exec/scan/vscanner.h:
##########
@@ -151,6 +154,11 @@ class VScanner {
     // watch to count the time wait for scanner thread
     MonotonicStopWatch _watch;
     int64_t _scanner_wait_worker_timer = 0;
+    int64_t _raw_rows_read = 0;
+    int64_t _num_rows_return = 0;

Review Comment:
   Not used?



##########
be/src/vec/exec/scan/new_file_scanner.h:
##########
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exec/text_converter.h"
+#include "exprs/bloomfilter_predicate.h"
+#include "exprs/function_filter.h"
+#include "vec/exec/scan/vscanner.h"
+
+namespace doris::vectorized {
+
+class NewFileScanNode;
+
+class NewFileScanner : public VScanner {
+public:
+    NewFileScanner(RuntimeState* state, NewFileScanNode* parent, int64_t limit,
+                   const TFileScanRange& scan_range, MemTracker* tracker, 
RuntimeProfile* profile);
+
+    Status open(RuntimeState *state) override;
+
+    Status prepare(VExprContext** vconjunct_ctx_ptr);
+
+protected:
+    virtual void _init_profiles(RuntimeProfile* profile) = 0;
+
+    Status finalize_block(vectorized::Block* dest_block, bool* eof);
+    Status _fill_columns_from_path(vectorized::Block* output_block, size_t 
rows);
+    Status init_block(vectorized::Block* block);
+
+    std::unique_ptr<TextConverter> _text_converter;
+
+//    RuntimeState* _state;
+    const TFileScanRangeParams& _params;
+
+    const std::vector<TFileRangeDesc>& _ranges;
+    int _next_range;
+
+    // Used for constructing tuple

Review Comment:
   add more comment to describe these fields



##########
be/src/vec/exec/scan/new_file_scanner.h:
##########
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exec/text_converter.h"
+#include "exprs/bloomfilter_predicate.h"
+#include "exprs/function_filter.h"
+#include "vec/exec/scan/vscanner.h"
+
+namespace doris::vectorized {
+
+class NewFileScanNode;
+
+class NewFileScanner : public VScanner {
+public:
+    NewFileScanner(RuntimeState* state, NewFileScanNode* parent, int64_t limit,
+                   const TFileScanRange& scan_range, MemTracker* tracker, 
RuntimeProfile* profile);
+
+    Status open(RuntimeState *state) override;
+
+    Status prepare(VExprContext** vconjunct_ctx_ptr);
+
+protected:
+    virtual void _init_profiles(RuntimeProfile* profile) = 0;
+
+    Status finalize_block(vectorized::Block* dest_block, bool* eof);
+    Status _fill_columns_from_path(vectorized::Block* output_block, size_t 
rows);
+    Status init_block(vectorized::Block* block);
+
+    std::unique_ptr<TextConverter> _text_converter;
+
+//    RuntimeState* _state;
+    const TFileScanRangeParams& _params;
+
+    const std::vector<TFileRangeDesc>& _ranges;
+    int _next_range;
+
+    // Used for constructing tuple
+    std::vector<SlotDescriptor*> _required_slot_descs;
+    std::vector<SlotDescriptor*> _file_slot_descs;
+    std::map<SlotId, int> _file_slot_index_map;
+    std::vector<SlotDescriptor*> _partition_slot_descs;
+    std::map<SlotId, int> _partition_slot_index_map;
+
+    std::unique_ptr<RowDescriptor> _row_desc;
+
+    // Mem pool used to allocate _src_tuple and _src_tuple_row
+    std::unique_ptr<MemPool> _mem_pool;
+
+    const std::vector<TExpr> _pre_filter_texprs;

Review Comment:
   prefilter expr should be in parent class `VScanner`



##########
be/src/vec/exec/scan/new_file_scanner.h:
##########
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exec/text_converter.h"
+#include "exprs/bloomfilter_predicate.h"
+#include "exprs/function_filter.h"
+#include "vec/exec/scan/vscanner.h"
+
+namespace doris::vectorized {
+
+class NewFileScanNode;
+
+class NewFileScanner : public VScanner {
+public:
+    NewFileScanner(RuntimeState* state, NewFileScanNode* parent, int64_t limit,
+                   const TFileScanRange& scan_range, MemTracker* tracker, 
RuntimeProfile* profile);
+
+    Status open(RuntimeState *state) override;
+
+    Status prepare(VExprContext** vconjunct_ctx_ptr);
+
+protected:
+    virtual void _init_profiles(RuntimeProfile* profile) = 0;
+
+    Status finalize_block(vectorized::Block* dest_block, bool* eof);
+    Status _fill_columns_from_path(vectorized::Block* output_block, size_t 
rows);
+    Status init_block(vectorized::Block* block);
+
+    std::unique_ptr<TextConverter> _text_converter;
+
+//    RuntimeState* _state;

Review Comment:
   Remove



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to