ygerzhedovich commented on code in PR #5211:
URL: https://github.com/apache/ignite-3/pull/5211#discussion_r1952814262


##########
modules/sql-engine/src/main/java/org/apache/ignite/internal/sql/engine/metadata/IgniteMdSelectivity.java:
##########
@@ -17,49 +17,226 @@
 
 package org.apache.ignite.internal.sql.engine.metadata;
 
+import static org.apache.calcite.rex.RexUtil.expandSearch;
+
+import com.google.common.collect.ImmutableList;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMdSelectivity;
 import org.apache.calcite.rel.metadata.RelMdUtil;
 import org.apache.calcite.rel.metadata.RelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexLocalRef;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexVisitor;
+import org.apache.calcite.rex.RexVisitorImpl;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.Util;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.ExactBounds;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.MultiBounds;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.RangeBounds;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.SearchBounds;
 import org.apache.ignite.internal.sql.engine.rel.IgniteHashIndexSpool;
 import org.apache.ignite.internal.sql.engine.rel.IgniteSortedIndexSpool;
 import 
org.apache.ignite.internal.sql.engine.rel.ProjectableFilterableTableScan;
+import org.apache.ignite.internal.sql.engine.util.Commons;
 import org.apache.ignite.internal.sql.engine.util.RexUtils;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
- * IgniteMdSelectivity.
- * TODO Documentation https://issues.apache.org/jira/browse/IGNITE-15859
+ * IgniteMdSelectivity supplies implementation of {@link 
RelMetadataQuery#getSelectivity}.
  */
 public class IgniteMdSelectivity extends RelMdSelectivity {
     public static final RelMetadataProvider SOURCE =
             ReflectiveRelMetadataProvider.reflectiveSource(
                     BuiltInMethod.SELECTIVITY.method, new 
IgniteMdSelectivity());
 
+    private static final double EQ_SELECTIVITY = 0.333;
+
+    private static double computeOpsSelectivity(Map<RexNode, List<SqlKind>> 
operands, double baseSelectivity) {

Review Comment:
   let's extract these magic numbers into constants, like :
   ```
   private static final double IS_NOT_NULL_SELECTIVITY = 0.9;
   private static final double COMPARISON_SELECTIVITY = 0.5;
   private static final double DEFAULT_SELECTIVITY_INCREMENT = 0.05;
   ```



##########
modules/sql-engine/src/main/java/org/apache/ignite/internal/sql/engine/metadata/IgniteMdSelectivity.java:
##########
@@ -17,49 +17,226 @@
 
 package org.apache.ignite.internal.sql.engine.metadata;
 
+import static org.apache.calcite.rex.RexUtil.expandSearch;
+
+import com.google.common.collect.ImmutableList;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMdSelectivity;
 import org.apache.calcite.rel.metadata.RelMdUtil;
 import org.apache.calcite.rel.metadata.RelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexLocalRef;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexVisitor;
+import org.apache.calcite.rex.RexVisitorImpl;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.Util;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.ExactBounds;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.MultiBounds;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.RangeBounds;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.SearchBounds;
 import org.apache.ignite.internal.sql.engine.rel.IgniteHashIndexSpool;
 import org.apache.ignite.internal.sql.engine.rel.IgniteSortedIndexSpool;
 import 
org.apache.ignite.internal.sql.engine.rel.ProjectableFilterableTableScan;
+import org.apache.ignite.internal.sql.engine.util.Commons;
 import org.apache.ignite.internal.sql.engine.util.RexUtils;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
- * IgniteMdSelectivity.
- * TODO Documentation https://issues.apache.org/jira/browse/IGNITE-15859
+ * IgniteMdSelectivity supplies implementation of {@link 
RelMetadataQuery#getSelectivity}.
  */
 public class IgniteMdSelectivity extends RelMdSelectivity {
     public static final RelMetadataProvider SOURCE =
             ReflectiveRelMetadataProvider.reflectiveSource(
                     BuiltInMethod.SELECTIVITY.method, new 
IgniteMdSelectivity());
 
+    private static final double EQ_SELECTIVITY = 0.333;
+
+    private static double computeOpsSelectivity(Map<RexNode, List<SqlKind>> 
operands, double baseSelectivity) {
+        double result = baseSelectivity;
+
+        for (Map.Entry<RexNode, List<SqlKind>> e : operands.entrySet()) {
+            int eqNum = 0;
+            double result0 = 0.0;
+
+            for (SqlKind kind : e.getValue()) {
+                switch (kind) {
+                    case IS_NOT_NULL:
+                        result0 = Math.max(result0, 0.9);
+                        break;
+                    case EQUALS:
+                        // Take into account Zipf`s distribution.
+                        result0 = Math.min(result0 + (EQ_SELECTIVITY / 
Math.sqrt(++eqNum)), 1.0);
+                        break;
+                    case GREATER_THAN:
+                    case LESS_THAN:
+                    case GREATER_THAN_OR_EQUAL:
+                    case LESS_THAN_OR_EQUAL:
+                        result0 = Math.min(result0 + 0.5, 1.0);
+                        break;
+                    default:
+                        // Not clear here, proceed with default.
+                        result0 += 0.05;
+                }
+            }
+
+            result = Math.max(result, result0);
+        }
+
+        return result;
+    }
+
+    /**
+     * Implements selectivity prediction algorithm. <br>
+     * Current implementation work as follows: <br><br>
+     * 1. If mixed OR-related operands are processed i.e: OR(=($t1, 'D'), 
=($t1, 'M'), =($t2, 'W')) selectivity computes separately
+     * for each local ref and a big one is chosen <br>
+     * 2. If mixed OR or AND related operands are processed i.e:
+     * OR(<($t3, 110), >($t3, 150), AND(>=($t2, -($t1, 2)), <=($t2, +($t3, 
2))), >($t4, $t2), <($t4, $t3)) selectivity computes separately
+     * for each local ref with AND selectivity adjustment. <br>
+     */
+    private static double computeSelectivity(RexCall call) {
+        ImmutableList<RexNode> operands = call.operands;
+        List<RexNode> andOperands = null;
+        List<RexNode> otherOperands = null;
+        double baseSelectivity = 0.0;
+        Map<RexNode, List<SqlKind>> processOperands = new HashMap<>();
+
+        assert !operands.isEmpty();
+
+        boolean andConsist = operands.stream().anyMatch(op -> 
op.isA(SqlKind.AND));
+
+        if (andConsist) {
+            for (RexNode op : operands) {
+                if (op.isA(SqlKind.AND)) {
+                    if (andOperands == null) {

Review Comment:
   it will be more readable if we initialize `andOperands` and `otherOperands` 
in the beginning and avoid this check everytime.



##########
modules/sql-engine/src/main/java/org/apache/ignite/internal/sql/engine/metadata/IgniteMdSelectivity.java:
##########
@@ -17,49 +17,226 @@
 
 package org.apache.ignite.internal.sql.engine.metadata;
 
+import static org.apache.calcite.rex.RexUtil.expandSearch;
+
+import com.google.common.collect.ImmutableList;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMdSelectivity;
 import org.apache.calcite.rel.metadata.RelMdUtil;
 import org.apache.calcite.rel.metadata.RelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexLocalRef;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexVisitor;
+import org.apache.calcite.rex.RexVisitorImpl;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.Util;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.ExactBounds;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.MultiBounds;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.RangeBounds;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.SearchBounds;
 import org.apache.ignite.internal.sql.engine.rel.IgniteHashIndexSpool;
 import org.apache.ignite.internal.sql.engine.rel.IgniteSortedIndexSpool;
 import 
org.apache.ignite.internal.sql.engine.rel.ProjectableFilterableTableScan;
+import org.apache.ignite.internal.sql.engine.util.Commons;
 import org.apache.ignite.internal.sql.engine.util.RexUtils;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
- * IgniteMdSelectivity.
- * TODO Documentation https://issues.apache.org/jira/browse/IGNITE-15859
+ * IgniteMdSelectivity supplies implementation of {@link 
RelMetadataQuery#getSelectivity}.
  */
 public class IgniteMdSelectivity extends RelMdSelectivity {
     public static final RelMetadataProvider SOURCE =
             ReflectiveRelMetadataProvider.reflectiveSource(
                     BuiltInMethod.SELECTIVITY.method, new 
IgniteMdSelectivity());
 
+    private static final double EQ_SELECTIVITY = 0.333;
+
+    private static double computeOpsSelectivity(Map<RexNode, List<SqlKind>> 
operands, double baseSelectivity) {
+        double result = baseSelectivity;
+
+        for (Map.Entry<RexNode, List<SqlKind>> e : operands.entrySet()) {
+            int eqNum = 0;
+            double result0 = 0.0;
+
+            for (SqlKind kind : e.getValue()) {
+                switch (kind) {
+                    case IS_NOT_NULL:
+                        result0 = Math.max(result0, 0.9);
+                        break;
+                    case EQUALS:
+                        // Take into account Zipf`s distribution.
+                        result0 = Math.min(result0 + (EQ_SELECTIVITY / 
Math.sqrt(++eqNum)), 1.0);
+                        break;
+                    case GREATER_THAN:
+                    case LESS_THAN:
+                    case GREATER_THAN_OR_EQUAL:
+                    case LESS_THAN_OR_EQUAL:
+                        result0 = Math.min(result0 + 0.5, 1.0);
+                        break;
+                    default:
+                        // Not clear here, proceed with default.
+                        result0 += 0.05;
+                }
+            }
+
+            result = Math.max(result, result0);
+        }
+
+        return result;
+    }
+
+    /**
+     * Implements selectivity prediction algorithm. <br>
+     * Current implementation work as follows: <br><br>
+     * 1. If mixed OR-related operands are processed i.e: OR(=($t1, 'D'), 
=($t1, 'M'), =($t2, 'W')) selectivity computes separately
+     * for each local ref and a big one is chosen <br>
+     * 2. If mixed OR or AND related operands are processed i.e:
+     * OR(<($t3, 110), >($t3, 150), AND(>=($t2, -($t1, 2)), <=($t2, +($t3, 
2))), >($t4, $t2), <($t4, $t3)) selectivity computes separately
+     * for each local ref with AND selectivity adjustment. <br>
+     */
+    private static double computeSelectivity(RexCall call) {
+        ImmutableList<RexNode> operands = call.operands;
+        List<RexNode> andOperands = null;
+        List<RexNode> otherOperands = null;
+        double baseSelectivity = 0.0;
+        Map<RexNode, List<SqlKind>> processOperands = new HashMap<>();
+
+        assert !operands.isEmpty();
+
+        boolean andConsist = operands.stream().anyMatch(op -> 
op.isA(SqlKind.AND));
+
+        if (andConsist) {
+            for (RexNode op : operands) {
+                if (op.isA(SqlKind.AND)) {
+                    if (andOperands == null) {
+                        andOperands = new ArrayList<>();
+                    }
+
+                    andOperands.add(op);
+                } else {
+                    if (otherOperands == null) {
+                        otherOperands = new ArrayList<>();
+                    }
+
+                    otherOperands.add(op);
+                }
+            }
+        }
+
+        // AND inside OR
+        if (andOperands != null) {
+            for (RexNode andOp : andOperands) {
+                baseSelectivity = Math.max(baseSelectivity, 
RelMdUtil.guessSelectivity(andOp));
+            }
+        }
+
+        List<RexNode> operandsToProcess = otherOperands == null ? 
call.getOperands() : otherOperands;
+
+        for (RexNode node : operandsToProcess) {
+            RexNode res = getLocalRef(node);
+
+            // It can be null for case with expression for example: 
LENGTH('abc') = 3
+            if (res != null) {
+                List<SqlKind> vals = processOperands.computeIfAbsent(res, (k) 
-> new ArrayList<>());
+
+                vals.add(node.getKind());
+            } else {
+                baseSelectivity = Math.max(baseSelectivity, 
nonColumnRefSelectivity(call));
+            }
+        }
+
+        return computeOpsSelectivity(processOperands, baseSelectivity);
+    }
+
+    private static double nonColumnRefSelectivity(RexCall call) {
+        if (call.isA(SqlKind.EQUALS)) {
+            return EQ_SELECTIVITY;
+        } else if (call.isA(SqlKind.COMPARISON)) {
+            return 0.5;
+        } else {
+            // different OTHER_FUNCTION and other uncovered cases
+            return 0;
+        }
+    }
+
+    private static RexLocalRef getLocalRef(RexNode node) {
+        RexVisitor<Void> v = new RexVisitorImpl<>(true) {
+            @Override
+            public Void visitLocalRef(RexLocalRef locRef) {
+                throw new Util.FoundOne(locRef);
+            }
+        };
+
+        try {
+            node.accept(v);
+
+            return null;
+        } catch (Util.FoundOne e) {
+            return (RexLocalRef) e.getNode();
+        }
+    }
+
+    private static double guessSelectivity(@Nullable RexNode predicate) {
+        double sel = 1.0;
+        if ((predicate == null) || predicate.isAlwaysTrue()) {
+            return sel;
+        }
+
+        double artificialSel = 1.0;
+
+        // make set of AND ... AND calls
+        List<RexNode> conjunctions = RelOptUtil.conjunctions(predicate);
+
+        for (RexNode pred : conjunctions) {
+            // Expand sarg`s
+            RexNode predicateExpanded = expandSearch(Commons.rexBuilder(), 
null, pred);
+
+            if (predicateExpanded.isA(SqlKind.OR)) {
+                double processed = computeSelectivity((RexCall) 
predicateExpanded);
+                sel *= processed;
+            } else if (predicateExpanded.getKind() == SqlKind.IS_NOT_NULL) {
+                sel *= 0.9;
+            } else if (
+                    (predicateExpanded instanceof RexCall)
+                            && (((RexCall) predicateExpanded).getOperator()
+                            == RelMdUtil.ARTIFICIAL_SELECTIVITY_FUNC)) {
+                artificialSel *= 
RelMdUtil.getSelectivityValue(predicateExpanded);
+            } else if (predicateExpanded.isA(SqlKind.EQUALS)) {
+                sel *= 0.15;
+            } else if (predicateExpanded.isA(SqlKind.COMPARISON)) {
+                sel *= 0.5;
+            } else {
+                sel *= 0.25;
+            }
+        }
+
+        return sel * artificialSel;
+    }
+
     /**
      * GetSelectivity.
      * TODO Documentation https://issues.apache.org/jira/browse/IGNITE-15859

Review Comment:
   we have a few documentation TODO in the class - will be great to fix it.



##########
modules/sql-engine/src/main/java/org/apache/ignite/internal/sql/engine/metadata/IgniteMdSelectivity.java:
##########
@@ -17,49 +17,226 @@
 
 package org.apache.ignite.internal.sql.engine.metadata;
 
+import static org.apache.calcite.rex.RexUtil.expandSearch;
+
+import com.google.common.collect.ImmutableList;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMdSelectivity;
 import org.apache.calcite.rel.metadata.RelMdUtil;
 import org.apache.calcite.rel.metadata.RelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexLocalRef;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexVisitor;
+import org.apache.calcite.rex.RexVisitorImpl;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.Util;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.ExactBounds;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.MultiBounds;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.RangeBounds;
 import org.apache.ignite.internal.sql.engine.prepare.bounds.SearchBounds;
 import org.apache.ignite.internal.sql.engine.rel.IgniteHashIndexSpool;
 import org.apache.ignite.internal.sql.engine.rel.IgniteSortedIndexSpool;
 import 
org.apache.ignite.internal.sql.engine.rel.ProjectableFilterableTableScan;
+import org.apache.ignite.internal.sql.engine.util.Commons;
 import org.apache.ignite.internal.sql.engine.util.RexUtils;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
- * IgniteMdSelectivity.
- * TODO Documentation https://issues.apache.org/jira/browse/IGNITE-15859
+ * IgniteMdSelectivity supplies implementation of {@link 
RelMetadataQuery#getSelectivity}.
  */
 public class IgniteMdSelectivity extends RelMdSelectivity {
     public static final RelMetadataProvider SOURCE =
             ReflectiveRelMetadataProvider.reflectiveSource(
                     BuiltInMethod.SELECTIVITY.method, new 
IgniteMdSelectivity());
 
+    private static final double EQ_SELECTIVITY = 0.333;
+
+    private static double computeOpsSelectivity(Map<RexNode, List<SqlKind>> 
operands, double baseSelectivity) {
+        double result = baseSelectivity;
+
+        for (Map.Entry<RexNode, List<SqlKind>> e : operands.entrySet()) {
+            int eqNum = 0;
+            double result0 = 0.0;
+
+            for (SqlKind kind : e.getValue()) {
+                switch (kind) {
+                    case IS_NOT_NULL:
+                        result0 = Math.max(result0, 0.9);
+                        break;
+                    case EQUALS:
+                        // Take into account Zipf`s distribution.
+                        result0 = Math.min(result0 + (EQ_SELECTIVITY / 
Math.sqrt(++eqNum)), 1.0);
+                        break;
+                    case GREATER_THAN:
+                    case LESS_THAN:
+                    case GREATER_THAN_OR_EQUAL:
+                    case LESS_THAN_OR_EQUAL:
+                        result0 = Math.min(result0 + 0.5, 1.0);
+                        break;
+                    default:
+                        // Not clear here, proceed with default.
+                        result0 += 0.05;
+                }
+            }
+
+            result = Math.max(result, result0);
+        }
+
+        return result;
+    }
+
+    /**
+     * Implements selectivity prediction algorithm. <br>
+     * Current implementation work as follows: <br><br>
+     * 1. If mixed OR-related operands are processed i.e: OR(=($t1, 'D'), 
=($t1, 'M'), =($t2, 'W')) selectivity computes separately
+     * for each local ref and a big one is chosen <br>
+     * 2. If mixed OR or AND related operands are processed i.e:
+     * OR(<($t3, 110), >($t3, 150), AND(>=($t2, -($t1, 2)), <=($t2, +($t3, 
2))), >($t4, $t2), <($t4, $t3)) selectivity computes separately
+     * for each local ref with AND selectivity adjustment. <br>
+     */
+    private static double computeSelectivity(RexCall call) {
+        ImmutableList<RexNode> operands = call.operands;

Review Comment:
   Let's avoid use guava classes, just simple List<RexNode>



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscr...@ignite.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to