duongcongtoai commented on code in PR #16016: URL: https://github.com/apache/datafusion/pull/16016#discussion_r2134056643
########## datafusion/optimizer/src/rewrite_dependent_join.rs: ########## @@ -0,0 +1,1901 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! [`DependentJoinRewriter`] converts correlated subqueries to `DependentJoin` + +use std::ops::Deref; +use std::sync::Arc; + +use crate::{ApplyOrder, OptimizerConfig, OptimizerRule}; + +use arrow::datatypes::DataType; +use datafusion_common::alias::AliasGenerator; +use datafusion_common::tree_node::{ + Transformed, TreeNode, TreeNodeRecursion, TreeNodeRewriter, +}; +use datafusion_common::{internal_err, Column, HashMap, Result}; +use datafusion_expr::{ + col, lit, Aggregate, Expr, Filter, LogicalPlan, LogicalPlanBuilder, Projection, +}; + +use indexmap::map::Entry; +use indexmap::IndexMap; +use itertools::Itertools; + +pub struct DependentJoinRewriter { + // each logical plan traversal will assign it a integer id + current_id: usize, + subquery_depth: usize, + // each newly visted `LogicalPlan` is inserted inside this map for tracking + nodes: IndexMap<usize, Node>, + // all the node ids from root to the current node + // this is mutated duri traversal + stack: Vec<usize>, + // track for each column, the nodes/logical plan that reference to its within the tree + all_outer_ref_columns: IndexMap<Column, Vec<ColumnAccess>>, + alias_generator: Arc<AliasGenerator>, +} + +#[derive(Debug, Hash, PartialEq, PartialOrd, Eq, Clone)] +struct ColumnAccess { + // node ids from root to the node that is referencing the column + stack: Vec<usize>, + // the node referencing the column + node_id: usize, + col: Column, + data_type: DataType, + subquery_depth: usize, +} + +impl DependentJoinRewriter { + fn rewrite_filter( + &mut self, + filter: &Filter, + dependent_join_node: &Node, + current_subquery_depth: usize, + mut current_plan: LogicalPlanBuilder, + subquery_alias_by_offset: HashMap<usize, String>, + ) -> Result<LogicalPlanBuilder> { + // everytime we meet a subquery during traversal, we increment this by 1 + // we can use this offset to lookup the original subquery info + // in subquery_alias_by_offset + // the reason why we cannot create a hashmap keyed by Subquery object HashMap<Subquery,String> + // is that the subquery inside this filter expr may have been rewritten in + // the lower level + let mut offset = 0; + let offset_ref = &mut offset; + let mut subquery_expr_by_offset = HashMap::new(); + let new_predicate = filter + .predicate + .clone() + .transform(|e| { + // replace any subquery expr with subquery_alias.output + // column + let alias = match e { + Expr::InSubquery(_) | Expr::Exists(_) | Expr::ScalarSubquery(_) => { + subquery_alias_by_offset.get(offset_ref).unwrap() Review Comment: resolved, convert into error in case of unwrap fail -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org