This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new c22bb8d7973 [opt](nereids) improve filter estimation on string column 
#35610 #34542 (#35667)
c22bb8d7973 is described below

commit c22bb8d7973e0cf24f776cd0ca8addeb45bb3b88
Author: minghong <engle...@gmail.com>
AuthorDate: Sun Jun 2 09:34:40 2024 +0800

    [opt](nereids) improve filter estimation on string column #35610 #34542 
(#35667)
    
    Some users are likely to define date(datetime) column as Varchar type.
    when estimating the selectivity of predicate like A>'2020-01-01', if
    nereids regards A and '2020-01-01' as date type, the sel is more
    accurate than that as string type.
---
 .../org/apache/doris/analysis/DateLiteral.java     |  58 +++++++--
 .../doris/nereids/stats/FilterEstimation.java      | 145 ++++++++++++++++++---
 .../expressions/literal/StringLikeLiteral.java     |  11 +-
 .../org/apache/doris/nereids/types/TimeType.java   |   3 +-
 .../org/apache/doris/nereids/types/TimeV2Type.java |   3 +-
 .../doris/nereids/types/coercion/DateLikeType.java |   2 +-
 .../doris/nereids/types/coercion/NumericType.java  |   2 +-
 .../{TimeType.java => coercion/RangeScalable.java} |  37 ++----
 .../doris/nereids/stats/FilterEstimationTest.java  | 118 +++++++++++++++++
 .../data/nereids_ssb_shape_sf100_p0/shape/q2.2.out |  27 ++--
 10 files changed, 332 insertions(+), 74 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
index 9f05c0e6574..1dc58b4c382 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
@@ -66,6 +66,7 @@ import java.util.Set;
 import java.util.TimeZone;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
+import javax.annotation.Nullable;
 
 public class DateLiteral extends LiteralExpr {
     private static final Logger LOG = LogManager.getLogger(DateLiteral.class);
@@ -367,13 +368,21 @@ public class DateLiteral extends LiteralExpr {
         type = other.type;
     }
 
+    public DateLiteral(String s) throws AnalysisException {
+        super();
+        init(s, null);
+        analysisDone();
+    }
+
     public static DateLiteral createMinValue(Type type) throws 
AnalysisException {
         return new DateLiteral(type, false);
     }
 
-    private void init(String s, Type type) throws AnalysisException {
+    private void init(String s, @Nullable Type type) throws AnalysisException {
         try {
-            Preconditions.checkArgument(type.isDateType());
+            if (type != null) {
+                Preconditions.checkArgument(type.isDateType());
+            }
             TemporalAccessor dateTime = null;
             boolean parsed = false;
             int offset = 0;
@@ -445,25 +454,29 @@ public class DateLiteral extends LiteralExpr {
                     builder.appendLiteral(" ");
                 }
                 String[] timePart = s.contains(" ") ? s.split(" 
")[1].split(":") : new String[] {};
-                if (timePart.length > 0 && (type.equals(Type.DATE) || 
type.equals(Type.DATEV2))) {
+                if (timePart.length > 0 && type != null && 
(type.equals(Type.DATE) || type.equals(Type.DATEV2))) {
                     throw new AnalysisException("Invalid date value: " + s);
                 }
-                if (timePart.length == 0 && (type.equals(Type.DATETIME) || 
type.equals(Type.DATETIMEV2))) {
+                if (timePart.length == 0 && type != null
+                        && (type.equals(Type.DATETIME) || 
type.equals(Type.DATETIMEV2))) {
                     throw new AnalysisException("Invalid datetime value: " + 
s);
                 }
                 for (int i = 0; i < timePart.length; i++) {
                     switch (i) {
                         case 0:
-                            builder.appendPattern(String.join("", 
Collections.nCopies(timePart[i].length(), "H")));
+                            builder.appendPattern(String.join("",
+                                    Collections.nCopies(timePart[i].length(), 
"H")));
                             break;
                         case 1:
-                            builder.appendPattern(String.join("", 
Collections.nCopies(timePart[i].length(), "m")));
+                            builder.appendPattern(String.join("",
+                                    Collections.nCopies(timePart[i].length(), 
"m")));
                             break;
                         case 2:
                             builder.appendPattern(String.join("", 
Collections.nCopies(timePart[i].contains(".")
                                     ? timePart[i].split("\\.")[0].length() : 
timePart[i].length(), "s")));
                             if (timePart[i].contains(".")) {
-                                
builder.appendFraction(ChronoField.MICRO_OF_SECOND, 0, 6, true);
+                                
builder.appendFraction(ChronoField.MICRO_OF_SECOND,
+                                        0, 6, true);
                             }
                             break;
                         default:
@@ -488,10 +501,29 @@ public class DateLiteral extends LiteralExpr {
             minute = getOrDefault(dateTime, ChronoField.MINUTE_OF_HOUR, 0);
             second = getOrDefault(dateTime, ChronoField.SECOND_OF_MINUTE, 0);
             microsecond = getOrDefault(dateTime, ChronoField.MICRO_OF_SECOND, 
0);
-            if (microsecond != 0 && type.isDatetime()) {
-                int dotIndex = s.lastIndexOf(".");
-                int scale = s.length() - dotIndex - 1;
-                type = ScalarType.createDatetimeV2Type(scale);
+            if (type != null) {
+                if (microsecond != 0 && type.isDatetime()) {
+                    int dotIndex = s.lastIndexOf(".");
+                    int scale = s.length() - dotIndex - 1;
+                    type = ScalarType.createDatetimeV2Type(scale);
+                }
+            } else {
+                if (hour == 0 && minute == 0 && second == 0 && microsecond == 
0) {
+                    type = ScalarType.getDefaultDateType(Type.DATE);
+                } else {
+                    type = ScalarType.getDefaultDateType(Type.DATETIME);
+                    if (type.isDatetimeV2() && microsecond != 0) {
+                        int scale = 6;
+                        for (int i = 0; i < 6; i++) {
+                            if (microsecond % Math.pow(10.0, i + 1) > 0) {
+                                break;
+                            } else {
+                                scale -= 1;
+                            }
+                        }
+                        type = ScalarType.createDatetimeV2Type(scale);
+                    }
+                }
             }
             this.type = type;
 
@@ -720,6 +752,10 @@ public class DateLiteral extends LiteralExpr {
         return getLongValue();
     }
 
+    public double getDoubleValueAsDateTime() {
+        return (year * 10000 + month * 100 + day) * 1000000L + hour * 10000 + 
minute * 100 + second;
+    }
+
     @Override
     protected void toThrift(TExprNode msg) {
         if (type.isDatetimeV2()) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index c56a7d9f5bc..33f6318808b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -17,7 +17,9 @@
 
 package org.apache.doris.nereids.stats;
 
+import org.apache.doris.analysis.DateLiteral;
 import org.apache.doris.analysis.LiteralExpr;
+import org.apache.doris.analysis.StringLiteral;
 import org.apache.doris.nereids.stats.FilterEstimation.EstimationContext;
 import org.apache.doris.nereids.trees.TreeNode;
 import org.apache.doris.nereids.trees.expressions.And;
@@ -39,7 +41,11 @@ import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.functions.Function;
 import org.apache.doris.nereids.trees.expressions.literal.Literal;
+import org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.nereids.types.DateTimeType;
+import org.apache.doris.nereids.types.coercion.RangeScalable;
 import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.ColumnStatisticBuilder;
 import org.apache.doris.statistics.StatisticRange;
@@ -49,7 +55,10 @@ import org.apache.doris.statistics.StatisticsBuilder;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Sets;
 
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
+import java.util.Optional;
 import java.util.Set;
 import java.util.function.Predicate;
 
@@ -176,22 +185,22 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
         }
     }
 
-    private Statistics updateLessThanLiteral(Expression leftExpr, 
ColumnStatistic statsForLeft,
+    private Statistics updateLessThanLiteral(Expression leftExpr, DataType 
dataType, ColumnStatistic statsForLeft,
             ColumnStatistic statsForRight, EstimationContext context) {
         StatisticRange rightRange = new StatisticRange(statsForLeft.minValue, 
statsForLeft.minExpr,
                 statsForRight.maxValue, statsForRight.maxExpr,
-                statsForLeft.ndv, leftExpr.getDataType());
-        return estimateBinaryComparisonFilter(leftExpr,
+                statsForLeft.ndv, dataType);
+        return estimateBinaryComparisonFilter(leftExpr, dataType,
                 statsForLeft,
                 rightRange, context);
     }
 
-    private Statistics updateGreaterThanLiteral(Expression leftExpr, 
ColumnStatistic statsForLeft,
+    private Statistics updateGreaterThanLiteral(Expression leftExpr, DataType 
dataType, ColumnStatistic statsForLeft,
             ColumnStatistic statsForRight, EstimationContext context) {
         StatisticRange rightRange = new StatisticRange(statsForRight.minValue, 
statsForRight.minExpr,
                 statsForLeft.maxValue, statsForLeft.maxExpr,
-                statsForLeft.ndv, leftExpr.getDataType());
-        return estimateBinaryComparisonFilter(leftExpr, statsForLeft, 
rightRange, context);
+                statsForLeft.ndv, dataType);
+        return estimateBinaryComparisonFilter(leftExpr, dataType, 
statsForLeft, rightRange, context);
     }
 
     private Statistics calculateWhenLiteralRight(ComparisonPredicate cp,
@@ -203,14 +212,111 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
         if (cp instanceof EqualPredicate) {
             return estimateEqualTo(cp, statsForLeft, statsForRight, context);
         } else {
+            // literal Map used to covert dateLiteral back to stringLiteral
+            Map<DateLiteral, StringLiteral> literalMap = new HashMap<>();
+            DataType compareType = cp.left().getDataType();
+            Optional<ColumnStatistic> statsForLeftMayConvertedOpt =
+                    tryConvertStringColStatsToDateColStats(statsForLeft, 
literalMap);
+            Optional<ColumnStatistic> statsForRightMayConvertedOpt = 
(statsForLeftMayConvertedOpt.isPresent())
+                    ? tryConvertStringColStatsToDateColStats(statsForRight, 
literalMap)
+                    : Optional.empty();
+
+            boolean converted = false;
+            ColumnStatistic statsForLeftMayConverted = statsForLeft;
+            ColumnStatistic statsForRightMayConverted = statsForRight;
+            if (statsForLeftMayConvertedOpt.isPresent() && 
statsForRightMayConvertedOpt.isPresent()
+                    && statsForRightMayConvertedOpt.get().minExpr.getType()
+                    == statsForLeftMayConvertedOpt.get().minExpr.getType()) {
+                // string type is converted to date type
+                converted = true;
+                compareType = DateTimeType.INSTANCE;
+                statsForLeftMayConverted = statsForLeftMayConvertedOpt.get();
+                statsForRightMayConverted = statsForRightMayConvertedOpt.get();
+            }
+            Statistics result = null;
             if (cp instanceof LessThan || cp instanceof LessThanEqual) {
-                return updateLessThanLiteral(cp.left(), statsForLeft, 
statsForRight, context);
+                result = updateLessThanLiteral(cp.left(), compareType, 
statsForLeftMayConverted,
+                        statsForRightMayConverted, context);
             } else if (cp instanceof GreaterThan || cp instanceof 
GreaterThanEqual) {
-                return updateGreaterThanLiteral(cp.left(), statsForLeft, 
statsForRight, context);
+                result = updateGreaterThanLiteral(cp.left(), compareType, 
statsForLeftMayConverted,
+                        statsForRightMayConverted, context);
             } else {
                 throw new RuntimeException(String.format("Unexpected 
expression : %s", cp.toSql()));
             }
+            if (converted) {
+                // convert min/max of left.colStats back to string type
+                ColumnStatistic newLeftStats = 
result.findColumnStatistics(cp.left());
+                result.addColumnStats(cp.left(), 
convertDateColStatsToStringColStats(newLeftStats, literalMap));
+            }
+            return result;
+        }
+    }
+
+    private ColumnStatistic 
convertDateColStatsToStringColStats(ColumnStatistic colStats,
+            Map<DateLiteral, StringLiteral> literalMap) {
+        if (colStats.minExpr == null && colStats.maxExpr == null) {
+            // when sel=0, minExpr and maxExpr are both null
+            return colStats;
+        }
+        Preconditions.checkArgument(colStats.minExpr instanceof DateLiteral
+                        && colStats.maxExpr instanceof DateLiteral,
+                "cannot convert colStats back to stringType %s", 
colStats.toString());
+        ColumnStatisticBuilder builder = new ColumnStatisticBuilder(colStats);
+        StringLiteral newMinLiteral = new 
StringLiteral(colStats.maxExpr.toString());
+        return builder.setMaxExpr(newMinLiteral)
+                .setMaxExpr(literalMap.get(colStats.maxExpr))
+                
.setMaxValue(StringLikeLiteral.getDouble(colStats.maxExpr.toString()))
+                .setMinExpr(literalMap.get(colStats.minExpr))
+                
.setMinValue(StringLikeLiteral.getDouble(colStats.minExpr.getStringValue()))
+                .build();
+    }
+
+    private Optional<ColumnStatistic> 
tryConvertStringColStatsToDateColStats(ColumnStatistic colStats,
+            Map<DateLiteral, StringLiteral> literalMap) {
+        if (colStats.minExpr == null || colStats.maxExpr == null) {
+            return Optional.empty();
+        }
+        if (!(colStats.minExpr instanceof StringLiteral) || !(colStats.maxExpr 
instanceof StringLiteral)) {
+            return Optional.empty();
+        }
+        Optional<DateLiteral> newMinExpr = 
tryConvertStrLiteralToDateLiteral(colStats.minExpr);
+        if (!newMinExpr.isPresent()) {
+            return Optional.empty();
         }
+        Optional<DateLiteral> newMaxExpr = 
tryConvertStrLiteralToDateLiteral(colStats.maxExpr);
+        if (!newMaxExpr.isPresent()) {
+            return Optional.empty();
+        }
+        if (newMaxExpr.get().getType() != newMinExpr.get().getType()) {
+            return Optional.empty();
+        }
+        literalMap.put(newMinExpr.get(), (StringLiteral) colStats.minExpr);
+        literalMap.put(newMaxExpr.get(), (StringLiteral) colStats.maxExpr);
+
+        ColumnStatisticBuilder builder = new ColumnStatisticBuilder(colStats);
+        return 
Optional.of(builder.setMinValue(newMinExpr.get().getDoubleValueAsDateTime())
+                .setMinExpr(newMinExpr.get())
+                .setMaxValue(newMaxExpr.get().getDoubleValueAsDateTime())
+                .setMaxExpr(newMaxExpr.get())
+                .build());
+    }
+
+    private Optional<DateLiteral> 
tryConvertStrLiteralToDateLiteral(LiteralExpr literal) {
+        if (literal == null) {
+            return Optional.empty();
+        }
+        if (!(literal instanceof StringLiteral)) {
+            return Optional.empty();
+        }
+
+        DateLiteral dt = null;
+        try {
+            dt = new DateLiteral(literal.getStringValue());
+            dt.checkValueValid();
+        } catch (Exception e) {
+            // ignore
+        }
+        return dt == null ? Optional.empty() : Optional.of(dt);
     }
 
     private Statistics estimateEqualTo(ComparisonPredicate cp, ColumnStatistic 
statsForLeft,
@@ -451,11 +557,11 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
         }
     }
 
-    private Statistics estimateBinaryComparisonFilter(Expression leftExpr, 
ColumnStatistic leftStats,
+    private Statistics estimateBinaryComparisonFilter(Expression leftExpr, 
DataType dataType, ColumnStatistic leftStats,
             StatisticRange rightRange, EstimationContext context) {
         StatisticRange leftRange =
                 new StatisticRange(leftStats.minValue, leftStats.minExpr, 
leftStats.maxValue, leftStats.maxExpr,
-                        leftStats.ndv, leftExpr.getDataType());
+                        leftStats.ndv, dataType);
         StatisticRange intersectRange = leftRange.cover(rightRange);
 
         ColumnStatisticBuilder leftColumnStatisticBuilder;
@@ -479,6 +585,9 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
                     .setNdv(intersectRange.getDistinctValues())
                     .setNumNulls(0);
             double sel = leftRange.overlapPercentWith(rightRange);
+            if (!(dataType instanceof RangeScalable) && (sel != 0.0 && sel != 
1.0)) {
+                sel = DEFAULT_INEQUALITY_COEFFICIENT;
+            }
             sel = getNotNullSelectivity(leftStats, sel);
             updatedStatistics = context.statistics.withSel(sel);
             
leftColumnStatisticBuilder.setCount(updatedStatistics.getRowCount());
@@ -535,8 +644,9 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
         }
 
         double leftOverlapPercent = leftRange.overlapPercentWith(rightRange);
-        // Left always greater than right
-        if (leftOverlapPercent == 0) {
+
+        if (leftOverlapPercent == 0.0) {
+            // Left always greater than right
             return context.statistics.withRowCount(0.0);
         }
         StatisticRange leftAlwaysLessThanRightRange = new 
StatisticRange(leftStats.minValue, leftStats.minExpr,
@@ -565,9 +675,14 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
                 .setNdv(rightStats.ndv * (rightAlwaysGreaterRangeFraction + 
rightOverlappingRangeFraction))
                 .setNumNulls(0)
                 .build();
-        double sel = leftAlwaysLessThanRightPercent
-                + leftOverlapPercent * rightOverlappingRangeFraction * 
DEFAULT_INEQUALITY_COEFFICIENT
-                + leftOverlapPercent * rightAlwaysGreaterRangeFraction;
+        double sel = DEFAULT_INEQUALITY_COEFFICIENT;
+        if (leftExpr.getDataType() instanceof RangeScalable) {
+            sel = leftAlwaysLessThanRightPercent
+                    + leftOverlapPercent * rightOverlappingRangeFraction * 
DEFAULT_INEQUALITY_COEFFICIENT
+                    + leftOverlapPercent * rightAlwaysGreaterRangeFraction;
+        } else if (leftOverlapPercent == 1.0) {
+            sel = 1.0;
+        }
         context.addKeyIfSlot(leftExpr);
         context.addKeyIfSlot(rightExpr);
         return context.statistics.withSel(sel)
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java
index 5b437021f53..30cb9580195 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java
@@ -39,11 +39,18 @@ public abstract class StringLikeLiteral extends Literal {
 
     @Override
     public double getDouble() {
+        return getDouble(value);
+    }
+
+    /**
+     * get double value
+     */
+    public static double getDouble(String str) {
         long v = 0;
         int pos = 0;
-        int len = Math.min(value.length(), 7);
+        int len = Math.min(str.length(), 7);
         while (pos < len) {
-            v += Byte.toUnsignedLong(value.getBytes()[pos]) << ((6 - pos) * 8);
+            v += Byte.toUnsignedLong(str.getBytes()[pos]) << ((6 - pos) * 8);
             pos++;
         }
         return (double) v;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/TimeType.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/TimeType.java
index 9fb438fd79c..1111038ed91 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/TimeType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/TimeType.java
@@ -19,11 +19,12 @@ package org.apache.doris.nereids.types;
 
 import org.apache.doris.catalog.Type;
 import org.apache.doris.nereids.types.coercion.PrimitiveType;
+import org.apache.doris.nereids.types.coercion.RangeScalable;
 
 /**
  * Datetime type in Nereids.
  */
-public class TimeType extends PrimitiveType {
+public class TimeType extends PrimitiveType implements RangeScalable {
 
     public static final TimeType INSTANCE = new TimeType();
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/TimeV2Type.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/TimeV2Type.java
index ec625d0cd17..b436fe31d39 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/TimeV2Type.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/TimeV2Type.java
@@ -20,11 +20,12 @@ package org.apache.doris.nereids.types;
 import org.apache.doris.catalog.ScalarType;
 import org.apache.doris.catalog.Type;
 import org.apache.doris.nereids.types.coercion.PrimitiveType;
+import org.apache.doris.nereids.types.coercion.RangeScalable;
 
 /**
  * Datetime type in Nereids.
  */
-public class TimeV2Type extends PrimitiveType {
+public class TimeV2Type extends PrimitiveType implements RangeScalable {
 
     public static final TimeV2Type INSTANCE = new TimeV2Type();
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java
index 22ea99f00bc..1f8130215b0 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java
@@ -33,7 +33,7 @@ import java.time.LocalDateTime;
 /**
  * date like type.
  */
-public abstract class DateLikeType extends PrimitiveType {
+public abstract class DateLikeType extends PrimitiveType implements 
RangeScalable {
 
     protected LocalDate toLocalDate(double d) {
         // d = (year * 10000 + month * 100 + day) * 1000000L;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/NumericType.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/NumericType.java
index 18a41d3ffef..1d09a80a74f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/NumericType.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/NumericType.java
@@ -24,7 +24,7 @@ import org.apache.doris.nereids.types.DoubleType;
 /**
  * Abstract class for all numeric type in Nereids.
  */
-public class NumericType extends PrimitiveType {
+public class NumericType extends PrimitiveType implements RangeScalable {
 
     public static final NumericType INSTANCE = new NumericType();
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/TimeType.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/RangeScalable.java
similarity index 57%
copy from fe/fe-core/src/main/java/org/apache/doris/nereids/types/TimeType.java
copy to 
fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/RangeScalable.java
index 9fb438fd79c..8209e2f2b6e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/TimeType.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/RangeScalable.java
@@ -15,35 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
-package org.apache.doris.nereids.types;
-
-import org.apache.doris.catalog.Type;
-import org.apache.doris.nereids.types.coercion.PrimitiveType;
+package org.apache.doris.nereids.types.coercion;
 
 /**
- * Datetime type in Nereids.
+ * numeric type/ date related type are range scalable
+ * RangeScalable Column can be estimated by filter like "A < 10" more accurate.
+ * For example, for a given relation R, which contains 10 rows. R.A in (1, 
100),
+ * the selectivity of filter "A<10" is "(10-1) / (100 -1)"
+ * But for string column A, the filter selectivity of "A<'abc'" can not be 
estimated by range, although we could
+ * have an order reserved mapping from string value to double.
+ *
  */
-public class TimeType extends PrimitiveType {
-
-    public static final TimeType INSTANCE = new TimeType();
-
-    private static final int WIDTH = 8;
-
-    private TimeType() {
-    }
-
-    @Override
-    public Type toCatalogDataType() {
-        return Type.TIME;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        return o instanceof TimeType;
-    }
-
-    @Override
-    public int width() {
-        return WIDTH;
-    }
+public interface RangeScalable {
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index eca15eea364..da8159ef6b9 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -18,6 +18,7 @@
 package org.apache.doris.nereids.stats;
 
 import org.apache.doris.analysis.IntLiteral;
+import org.apache.doris.analysis.StringLiteral;
 import org.apache.doris.nereids.trees.expressions.And;
 import org.apache.doris.nereids.trees.expressions.Cast;
 import org.apache.doris.nereids.trees.expressions.EqualTo;
@@ -35,9 +36,11 @@ import 
org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral;
 import org.apache.doris.nereids.types.DateType;
 import org.apache.doris.nereids.types.DoubleType;
 import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.types.VarcharType;
 import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.ColumnStatisticBuilder;
 import org.apache.doris.statistics.Statistics;
@@ -1139,4 +1142,119 @@ class FilterEstimationTest {
         Statistics resultEq = estimator.estimate(eq, statsBuilder.build());
         Assertions.assertEquals(7, resultNse.getRowCount() - 
resultEq.getRowCount());
     }
+
+    /**
+     * for string literal, min-max range is only used for coverage, not for 
percentage
+     */
+    @Test
+    public void testStringRangeColToLiteral() {
+        SlotReference a = new SlotReference("a", new VarcharType(25));
+        ColumnStatisticBuilder columnStatisticBuilder = new 
ColumnStatisticBuilder()
+                .setNdv(100)
+                .setAvgSizeByte(25)
+                .setNumNulls(0)
+                .setMaxExpr(new StringLiteral("200"))
+                .setMaxValue(new VarcharLiteral("200").getDouble())
+                .setMinExpr(new StringLiteral("100"))
+                .setMinValue(new VarcharLiteral("100").getDouble())
+                .setCount(100);
+        StatisticsBuilder statsBuilder = new StatisticsBuilder();
+        statsBuilder.setRowCount(100);
+        statsBuilder.putColumnStatistics(a, columnStatisticBuilder.build());
+        Statistics baseStats = statsBuilder.build();
+        VarcharLiteral i500 = new VarcharLiteral("500");
+        Statistics filter500 = new FilterEstimation().estimate(new LessThan(a, 
i500), baseStats);
+        Assertions.assertEquals(100, filter500.getRowCount());
+
+        VarcharLiteral i10 = new VarcharLiteral("10");
+        Statistics filter10 = new FilterEstimation().estimate(new 
LessThan(i10, a), baseStats);
+        Assertions.assertEquals(100, filter10.getRowCount());
+
+        VarcharLiteral i199 = new VarcharLiteral("199");
+        Statistics filter199 = new FilterEstimation().estimate(new 
GreaterThan(a, i199), baseStats);
+        Assertions.assertEquals(50, filter199.getRowCount(), 0.01);
+    }
+
+    @Test
+    public void testStringRangeColToDateLiteral() {
+        SlotReference a = new SlotReference("a", new VarcharType(25));
+        ColumnStatisticBuilder columnStatisticBuilder = new 
ColumnStatisticBuilder()
+                .setNdv(100)
+                .setAvgSizeByte(25)
+                .setNumNulls(0)
+                .setMaxExpr(new StringLiteral("2022-01-01"))
+                .setMaxValue(new VarcharLiteral("2022-01-01").getDouble())
+                .setMinExpr(new StringLiteral("2020-01-01"))
+                .setMinValue(new VarcharLiteral("2020-01-01").getDouble())
+                .setCount(100);
+        StatisticsBuilder statsBuilder = new StatisticsBuilder();
+        statsBuilder.setRowCount(100);
+        statsBuilder.putColumnStatistics(a, columnStatisticBuilder.build());
+        Statistics baseStats = statsBuilder.build();
+        VarcharLiteral year2030 = new VarcharLiteral("2030-01-01");
+        Statistics filter2030 = new FilterEstimation().estimate(new 
LessThan(a, year2030), baseStats);
+        Assertions.assertEquals(100, filter2030.getRowCount());
+
+        VarcharLiteral year2000 = new VarcharLiteral("2000-01-01");
+        Statistics filter2k = new FilterEstimation().estimate(new 
LessThan(year2000, a), baseStats);
+        Assertions.assertEquals(100, filter2k.getRowCount());
+
+        VarcharLiteral year2021 = new VarcharLiteral("2021-12-01");
+        Statistics filter2021 = new FilterEstimation().estimate(new 
GreaterThan(a, year2021), baseStats);
+        Assertions.assertEquals(4.24, filter2021.getRowCount(), 0.01);
+    }
+
+    @Test
+    public void testStringRangeColToCol() {
+        SlotReference a = new SlotReference("a", new VarcharType(25));
+        ColumnStatisticBuilder columnStatisticBuilderA = new 
ColumnStatisticBuilder()
+                .setNdv(100)
+                .setAvgSizeByte(25)
+                .setNumNulls(0)
+                .setMaxExpr(new StringLiteral("2022-01-01"))
+                .setMaxValue(new VarcharLiteral("2022-01-01").getDouble())
+                .setMinExpr(new StringLiteral("2020-01-01"))
+                .setMinValue(new VarcharLiteral("2020-01-01").getDouble())
+                .setCount(100);
+
+        SlotReference b = new SlotReference("b", new VarcharType(25));
+        ColumnStatisticBuilder columnStatisticBuilderB = new 
ColumnStatisticBuilder()
+                .setNdv(100)
+                .setAvgSizeByte(25)
+                .setNumNulls(0)
+                .setMaxExpr(new StringLiteral("2012-01-01"))
+                .setMaxValue(new VarcharLiteral("2012-01-01").getDouble())
+                .setMinExpr(new StringLiteral("2010-01-01"))
+                .setMinValue(new VarcharLiteral("2010-01-01").getDouble())
+                .setCount(100);
+
+        SlotReference c = new SlotReference("c", new VarcharType(25));
+        ColumnStatisticBuilder columnStatisticBuilderC = new 
ColumnStatisticBuilder()
+                .setNdv(100)
+                .setAvgSizeByte(25)
+                .setNumNulls(0)
+                .setMaxExpr(new StringLiteral("2021-01-01"))
+                .setMaxValue(new VarcharLiteral("2021-01-01").getDouble())
+                .setMinExpr(new StringLiteral("2010-01-01"))
+                .setMinValue(new VarcharLiteral("2010-01-01").getDouble())
+                .setCount(100);
+
+        StatisticsBuilder statsBuilder = new StatisticsBuilder();
+        statsBuilder.setRowCount(100);
+        statsBuilder.putColumnStatistics(a, columnStatisticBuilderA.build());
+        statsBuilder.putColumnStatistics(b, columnStatisticBuilderB.build());
+        statsBuilder.putColumnStatistics(c, columnStatisticBuilderC.build());
+        Statistics baseStats = statsBuilder.build();
+
+        // (2020-2022) > (2010,2012), sel=1
+        Statistics agrtb = new FilterEstimation().estimate(new GreaterThan(a, 
b), baseStats);
+        Assertions.assertEquals(100, agrtb.getRowCount());
+        // (2020-2022) < (2010,2012), sel=0
+        Statistics alessb = new FilterEstimation().estimate(new LessThan(a, 
b), baseStats);
+        Assertions.assertEquals(0, alessb.getRowCount());
+
+        // (2020-2022) > (2010-2021), sel = DEFAULT (0.5)
+        Statistics agrtc = new FilterEstimation().estimate(new GreaterThan(a, 
c), baseStats);
+        Assertions.assertEquals(50, agrtc.getRowCount());
+    }
 }
diff --git a/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.2.out 
b/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.2.out
index de0b1d9ce05..0a148822bf8 100644
--- a/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.2.out
+++ b/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.2.out
@@ -10,21 +10,20 @@ PhysicalResultSink
 --------------hashAgg[LOCAL]
 ----------------PhysicalProject
 ------------------hashJoin[INNER_JOIN](lineorder.lo_orderdate = 
dates.d_datekey)
---------------------PhysicalDistribute
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN](lineorder.lo_suppkey = 
supplier.s_suppkey)
---------------------------PhysicalDistribute
-----------------------------hashJoin[INNER_JOIN](lineorder.lo_partkey = 
part.p_partkey)
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[lineorder]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------filter((part.p_brand >= 
'MFGR#2221')(part.p_brand <= 'MFGR#2228'))
-------------------------------------PhysicalOlapScan[part]
---------------------------PhysicalDistribute
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN](lineorder.lo_partkey = 
part.p_partkey)
+------------------------PhysicalDistribute
+--------------------------hashJoin[INNER_JOIN](lineorder.lo_suppkey = 
supplier.s_suppkey)
 ----------------------------PhysicalProject
-------------------------------filter((supplier.s_region = 'ASIA'))
---------------------------------PhysicalOlapScan[supplier]
+------------------------------PhysicalOlapScan[lineorder]
+----------------------------PhysicalDistribute
+------------------------------PhysicalProject
+--------------------------------filter((supplier.s_region = 'ASIA'))
+----------------------------------PhysicalOlapScan[supplier]
+------------------------PhysicalDistribute
+--------------------------PhysicalProject
+----------------------------filter((part.p_brand >= 'MFGR#2221')(part.p_brand 
<= 'MFGR#2228'))
+------------------------------PhysicalOlapScan[part]
 --------------------PhysicalDistribute
 ----------------------PhysicalProject
 ------------------------PhysicalOlapScan[dates]


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to