(doris) 07/16: [fix](Nereids) should derive stats asap to avoid npe (#34238)

yiguolei Mon, 06 May 2024 16:59:54 -0700

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


commit 8ee7bc430da56c23fd3a5f886be5bfd1cf3fc650
Author: morrySnow <[email protected]>
AuthorDate: Mon Apr 29 11:12:03 2024 +0800

    [fix](Nereids) should derive stats asap to avoid npe (#34238)
    
    we do derive stats job eager to avoid un derive stats due to merge group 
and optimize group
    consider:
      we have two groups burned by order: G1 and G2
      then we have job by order derive G2, optimize group expression in G2,
        derive G1, optimize group expression in G1
      if G1 merged into G2, then we maybe generated job optimize group G2 
before derive G1
      in this case, we will do get stats from G1's child before derive G1's 
child stats
      then we will meet NPE in CostModel.
---
 .../doris/nereids/jobs/cascades/ApplyRuleJob.java      | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
index 5560c369dd6..eb4f86bb0ca 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
@@ -34,6 +34,8 @@ import org.apache.doris.nereids.rules.RuleType;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
 
+import com.google.common.collect.Lists;
+
 import java.util.HashMap;
 import java.util.List;
 
@@ -68,6 +70,7 @@ public class ApplyRuleJob extends Job {
         }
         countJobExecutionTimesOfGroupExpressions(groupExpression);
 
+        List<DeriveStatsJob> deriveStatsJobs = Lists.newArrayList();
         GroupExpressionMatching groupExpressionMatching
                 = new GroupExpressionMatching(rule.getPattern(), 
groupExpression);
         for (Plan plan : groupExpressionMatching) {
@@ -87,7 +90,7 @@ public class ApplyRuleJob extends Job {
                 if (newPlan instanceof LogicalPlan) {
                     pushJob(new OptimizeGroupExpressionJob(newGroupExpression, 
context));
                     if 
(!rule.getRuleType().equals(RuleType.LOGICAL_JOIN_COMMUTE)) {
-                        pushJob(new DeriveStatsJob(newGroupExpression, 
context));
+                        deriveStatsJobs.add(new 
DeriveStatsJob(newGroupExpression, context));
                     } else {
                         // The Join Commute rule preserves the operator's 
expression and children,
                         // thereby not altering the statistics. Hence, there 
is no need to derive statistics for it.
@@ -101,7 +104,7 @@ public class ApplyRuleJob extends Job {
                         // logicalTopN ==> GlobalPhysicalTopN
                         //                   -> localPhysicalTopN
                         // These implementation rules integrate rules for plan 
shape transformation.
-                        pushJob(new DeriveStatsJob(newGroupExpression, 
context));
+                        deriveStatsJobs.add(new 
DeriveStatsJob(newGroupExpression, context));
                     } else {
                         newGroupExpression.setStatDerived(true);
                     }
@@ -111,6 +114,17 @@ public class ApplyRuleJob extends Job {
                 APPLY_RULE_TRACER.log(TransformEvent.of(groupExpression, plan, 
newPlans, rule.getRuleType()),
                         rule::isRewrite);
             }
+            // we do derive stats job eager to avoid un derive stats due to 
merge group and optimize group
+            // consider:
+            //   we have two groups burned by order: G1 and G2
+            //   then we have job by order derive G2, optimize group 
expression in G2,
+            //     derive G1, optimize group expression in G1
+            //   if G1 merged into G2, then we maybe generated job optimize 
group G2 before derive G1
+            //   in this case, we will do get stats from G1's child before 
derive G1's child stats
+            //   then we will meet NPE in CostModel.
+            for (DeriveStatsJob deriveStatsJob : deriveStatsJobs) {
+                pushJob(deriveStatsJob);
+            }
         }
         groupExpression.setApplied(rule);
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) 07/16: [fix](Nereids) should derive stats asap to avoid npe (#34238)

Reply via email to