This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 5dfdacd278 [enhancement](histogram) add histogram syntax and perstist histogram statistics (#15490) 5dfdacd278 is described below commit 5dfdacd278de0025baa51e1f52c31be65701f78a Author: ElvinWei <zhengte....@outlook.com> AuthorDate: Sat Jan 7 00:55:42 2023 +0800 [enhancement](histogram) add histogram syntax and perstist histogram statistics (#15490) Histogram statistics are more expensive to collect and we collect and persist them separately. This PR does the following work: 1. Add histogram syntax and add keyword `TABLE` 2. Add the task of collecting histogram statistics 3. Persistent histogram statistics 4. Replace fastjson with gson 5. Add unit tests... Relevant syntax examples: > Refer to some databases such as mysql and add the keyword `TABLE`. ```SQL -- collect column statistics ANALYZE TABLE statistics_test; -- collect histogram statistics ANALYZE TABLE statistics_test UPDATE HISTOGRAM ON col1,col2; ``` base on #15317 --- fe/fe-core/src/main/cup/sql_parser.cup | 9 +- .../doris/analysis/AlterColumnStatsStmt.java | 1 - .../org/apache/doris/analysis/AnalyzeStmt.java | 14 +- .../java/org/apache/doris/catalog/FunctionSet.java | 2 + .../doris/catalog/InternalSchemaInitializer.java | 35 +++- .../java/org/apache/doris/catalog/OlapTable.java | 4 + .../doris/nereids/stats/StatsCalculator.java | 3 +- .../trees/expressions/functions/table/Numbers.java | 2 +- .../apache/doris/statistics/AnalysisManager.java | 7 +- .../apache/doris/statistics/AnalysisTaskInfo.java | 7 +- .../apache/doris/statistics/BaseAnalysisTask.java | 7 +- .../java/org/apache/doris/statistics/Bucket.java | 25 +++ .../apache/doris/statistics/ColumnStatistic.java | 42 +---- .../doris/statistics/ColumnStatisticBuilder.java | 13 +- .../org/apache/doris/statistics/Histogram.java | 187 ++++++++++----------- .../apache/doris/statistics/HistogramBuilder.java | 100 +++++++++++ .../org/apache/doris/statistics/HistogramTask.java | 90 ++++++++++ .../apache/doris/statistics/HiveAnalysisTask.java | 4 +- .../doris/statistics/IcebergAnalysisTask.java | 2 +- .../statistics/{Bucket.java => Statistic.java} | 55 +++--- .../doris/statistics/StatisticConstants.java | 2 + .../apache/doris/statistics/StatisticsCache.java | 36 +++- .../doris/statistics/StatisticsCacheLoader.java | 40 ++++- .../doris/statistics/StatisticsRepository.java | 20 ++- .../doris/statistics/util/StatisticsUtil.java | 30 ++++ fe/fe-core/src/main/jflex/sql_scanner.flex | 1 + .../doris/nereids/util/HyperGraphBuilder.java | 2 +- .../apache/doris/statistics/AnalysisJobTest.java | 2 +- .../org/apache/doris/statistics/CacheTest.java | 78 ++++++++- .../org/apache/doris/statistics/HistogramTest.java | 16 +- .../apache/doris/statistics/MVStatisticsTest.java | 2 +- .../doris/statistics/StatsDeriveResultTest.java | 2 +- .../suites/statistics/alter_col_stats.groovy | 3 +- 33 files changed, 603 insertions(+), 240 deletions(-) diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 77260ba95f..f1acb245ea 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -615,7 +615,8 @@ terminal String KW_WRITE, KW_YEAR, KW_MTMV, - KW_TYPECAST; + KW_TYPECAST, + KW_HISTOGRAM; terminal COMMA, COLON, DOT, DOTDOTDOT, AT, STAR, LPAREN, RPAREN, SEMICOLON, LBRACKET, RBRACKET, DIVIDE, MOD, ADD, SUBTRACT; terminal BITAND, BITOR, BITXOR, BITNOT; @@ -2648,10 +2649,14 @@ show_create_routine_load_stmt ::= // analyze statment analyze_stmt ::= - KW_ANALYZE table_name:tbl opt_col_list:cols opt_partition_names:partitionNames opt_properties:properties + KW_ANALYZE KW_TABLE table_name:tbl opt_col_list:cols opt_partition_names:partitionNames opt_properties:properties {: RESULT = new AnalyzeStmt(tbl, cols, partitionNames, properties); :} + | KW_ANALYZE KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM KW_ON ident_list:cols opt_properties:properties + {: + RESULT = new AnalyzeStmt(tbl, cols, properties); + :} ; // Grant statement diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java index 1af3dffff9..440a6acca9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java @@ -62,7 +62,6 @@ public class AlterColumnStatsStmt extends DdlStmt { .add(ColumnStatistic.NUM_NULLS) .add(ColumnStatistic.MIN_VALUE) .add(ColumnStatistic.MAX_VALUE) - .add(ColumnStatistic.HISTOGRAM) .add(StatsType.DATA_SIZE) .build(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java index ecf34cd23c..d80dda6d43 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java @@ -64,6 +64,8 @@ public class AnalyzeStmt extends DdlStmt { // time to wait for collect statistics public static final String CBO_STATISTICS_TASK_TIMEOUT_SEC = "cbo_statistics_task_timeout_sec"; + public boolean isHistogram = false; + private static final ImmutableSet<String> PROPERTIES_SET = new ImmutableSet.Builder<String>() .add(CBO_STATISTICS_TASK_TIMEOUT_SEC) .build(); @@ -76,7 +78,7 @@ public class AnalyzeStmt extends DdlStmt { private TableIf table; - private final PartitionNames optPartitionNames; + private PartitionNames optPartitionNames; private List<String> optColumnNames; private Map<String, String> optProperties; @@ -85,6 +87,16 @@ public class AnalyzeStmt extends DdlStmt { private final List<String> partitionNames = Lists.newArrayList(); + public AnalyzeStmt(TableName tableName, + List<String> optColumnNames, + Map<String, String> optProperties) { + this.tableName = tableName; + this.optColumnNames = optColumnNames; + wholeTbl = CollectionUtils.isEmpty(optColumnNames); + isHistogram = true; + this.optProperties = optProperties; + } + public AnalyzeStmt(TableName tableName, List<String> optColumnNames, PartitionNames optPartitionNames, diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 1b437e3e92..d25395177a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -2612,6 +2612,8 @@ public class FunctionSet<T> { "", "", "", "", "", true, false, true, true)); addBuiltin(AggregateFunction.createBuiltin(HISTOGRAM, Lists.newArrayList(t, Type.DOUBLE, Type.INT), Type.VARCHAR, t, "", "", "", "", "", true, false, true, true)); + addBuiltin(AggregateFunction.createBuiltin(HISTOGRAM, Lists.newArrayList(t, Type.DOUBLE, Type.INT), Type.VARCHAR, t, + "", "", "", "", "", true, false, true, true)); } // Avg diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java index eccadac0f6..45061e5287 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java @@ -79,6 +79,7 @@ public class InternalSchemaInitializer extends Thread { private void createTbl() throws UserException { Env.getCurrentEnv().getInternalCatalog().createTable(buildStatisticsTblStmt()); + Env.getCurrentEnv().getInternalCatalog().createTable(buildHistogramTblStmt()); Env.getCurrentEnv().getInternalCatalog().createTable(buildAnalysisJobTblStmt()); } @@ -115,7 +116,6 @@ public class InternalSchemaInitializer extends Thread { columnDefs.add(new ColumnDef("null_count", TypeDef.create(PrimitiveType.BIGINT))); columnDefs.add(new ColumnDef("min", TypeDef.createVarchar(ScalarType.MAX_VARCHAR_LENGTH))); columnDefs.add(new ColumnDef("max", TypeDef.createVarchar(ScalarType.MAX_VARCHAR_LENGTH))); - columnDefs.add(new ColumnDef("histogram", TypeDef.createVarchar(ScalarType.MAX_VARCHAR_LENGTH))); columnDefs.add(new ColumnDef("data_size_in_bytes", TypeDef.create(PrimitiveType.BIGINT))); columnDefs.add(new ColumnDef("update_time", TypeDef.create(PrimitiveType.DATETIME))); String engineName = "olap"; @@ -138,6 +138,39 @@ public class InternalSchemaInitializer extends Thread { return createTableStmt; } + @VisibleForTesting + public CreateTableStmt buildHistogramTblStmt() throws UserException { + TableName tableName = new TableName("", + FeConstants.INTERNAL_DB_NAME, StatisticConstants.HISTOGRAM_TBL_NAME); + List<ColumnDef> columnDefs = new ArrayList<>(); + columnDefs.add(new ColumnDef("id", TypeDef.createVarchar(StatisticConstants.ID_LEN))); + columnDefs.add(new ColumnDef("catalog_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN))); + columnDefs.add(new ColumnDef("db_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN))); + columnDefs.add(new ColumnDef("tbl_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN))); + columnDefs.add(new ColumnDef("idx_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN))); + columnDefs.add(new ColumnDef("col_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN))); + columnDefs.add(new ColumnDef("sample_rate", TypeDef.create(PrimitiveType.DOUBLE))); + columnDefs.add(new ColumnDef("buckets", TypeDef.createVarchar(ScalarType.MAX_VARCHAR_LENGTH))); + columnDefs.add(new ColumnDef("update_time", TypeDef.create(PrimitiveType.DATETIME))); + String engineName = "olap"; + KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS, + Lists.newArrayList("id")); + DistributionDesc distributionDesc = new HashDistributionDesc( + StatisticConstants.STATISTIC_TABLE_BUCKET_COUNT, + Lists.newArrayList("id")); + Map<String, String> properties = new HashMap<String, String>() { + { + put("replication_num", String.valueOf(Config.statistic_internal_table_replica_num)); + } + }; + CreateTableStmt createTableStmt = new CreateTableStmt(true, false, + tableName, columnDefs, engineName, keysDesc, null, distributionDesc, + properties, null, "Doris internal statistics table, don't modify it", null); + StatisticsUtil.analyze(createTableStmt); + // createTableStmt.setClusterName(SystemInfoService.DEFAULT_CLUSTER); + return createTableStmt; + } + @VisibleForTesting public CreateTableStmt buildAnalysisJobTblStmt() throws UserException { TableName tableName = new TableName("", diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 2cde04593d..db628618d6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -51,6 +51,7 @@ import org.apache.doris.statistics.AnalysisTaskInfo; import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType; import org.apache.doris.statistics.AnalysisTaskScheduler; import org.apache.doris.statistics.BaseAnalysisTask; +import org.apache.doris.statistics.HistogramTask; import org.apache.doris.statistics.MVAnalysisTask; import org.apache.doris.statistics.OlapAnalysisTask; import org.apache.doris.system.Backend; @@ -1005,6 +1006,9 @@ public class OlapTable extends Table { @Override public BaseAnalysisTask createAnalysisTask(AnalysisTaskScheduler scheduler, AnalysisTaskInfo info) { + if (info.analysisType.equals(AnalysisType.HISTOGRAM)) { + return new HistogramTask(scheduler, info); + } if (info.analysisType.equals(AnalysisType.COLUMN)) { return new OlapAnalysisTask(scheduler, info); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 2b2bf83e6b..e6ae621ca8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -290,6 +290,7 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void> return computeTopN(topN); } + @Override public StatsDeriveResult visitPhysicalLocalQuickSort(PhysicalLocalQuickSort<? extends Plan> sort, Void context) { return groupExpression.childStatistics(0); } @@ -447,7 +448,6 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void> stats.dataSize < 0 ? stats.dataSize : stats.dataSize * groupingSetNum, stats.minValue, stats.maxValue, - stats.histogram, stats.selectivity, stats.minExpr, stats.maxExpr, @@ -538,7 +538,6 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void> leftStats.dataSize + rightStats.dataSize, Math.min(leftStats.minValue, rightStats.minValue), Math.max(leftStats.maxValue, rightStats.maxValue), - null, 1.0 / (leftStats.ndv + rightStats.ndv), leftStats.minExpr, leftStats.maxExpr, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java index 0aa0fb93a7..fc94a3dc09 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java @@ -71,7 +71,7 @@ public class Numbers extends TableValuedFunction { Map<Id, ColumnStatistic> columnToStatistics = Maps.newHashMap(); ColumnStatistic columnStat = new ColumnStatistic(rowNum, rowNum, 8, 0, 8, 0, rowNum - 1, - null, 1.0 / rowNum, new IntLiteral(0, Type.BIGINT), new IntLiteral(rowNum - 1, Type.BIGINT), false); + 1.0 / rowNum, new IntLiteral(0, Type.BIGINT), new IntLiteral(rowNum - 1, Type.BIGINT), false); columnToStatistics.put(slots.get(0).getExprId(), columnStat); return new StatsDeriveResult(rowNum, columnToStatistics); } catch (Exception t) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index cbfd217158..28fdd4d450 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -43,6 +43,8 @@ import java.util.concurrent.ConcurrentMap; public class AnalysisManager { + public final AnalysisTaskScheduler taskScheduler; + private static final Logger LOG = LogManager.getLogger(AnalysisManager.class); private static final String UPDATE_JOB_STATE_SQL_TEMPLATE = "UPDATE " @@ -51,8 +53,6 @@ public class AnalysisManager { private final ConcurrentMap<Long, Map<Long, AnalysisTaskInfo>> analysisJobIdToTaskMap; - public final AnalysisTaskScheduler taskScheduler; - private StatisticsCache statisticsCache; private final AnalysisTaskExecutor taskExecutor; @@ -76,10 +76,11 @@ public class AnalysisManager { if (colNames != null) { for (String colName : colNames) { long taskId = Env.getCurrentEnv().getNextId(); + AnalysisType analType = analyzeStmt.isHistogram ? AnalysisType.HISTOGRAM : AnalysisType.COLUMN; AnalysisTaskInfo analysisTaskInfo = new AnalysisTaskInfoBuilder().setJobId(jobId) .setTaskId(taskId).setCatalogName(catalogName).setDbName(db) .setTblName(tbl.getTbl()).setColName(colName).setJobType(JobType.MANUAL) - .setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(AnalysisType.COLUMN) + .setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(analType) .setState(AnalysisState.PENDING) .setScheduleType(ScheduleType.ONCE).build(); try { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java index 6c2243b261..b0b1c370cb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java @@ -34,7 +34,8 @@ public class AnalysisTaskInfo { public enum AnalysisType { COLUMN, - INDEX + INDEX, + HISTOGRAM } public enum JobType { @@ -69,6 +70,10 @@ public class AnalysisTaskInfo { public final AnalysisType analysisType; + // TODO: define constants or get them from configuration properties + public final double sampleRate = 0.2; + public final int maxBucketNum = 128; + public String message; // finished or failed diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index 81c4601647..54793881ab 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -26,9 +26,13 @@ import org.apache.doris.qe.StmtExecutor; import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType; import com.google.common.annotations.VisibleForTesting; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; public abstract class BaseAnalysisTask { + public static final Logger LOG = LogManager.getLogger(BaseAnalysisTask.class); + protected static final String INSERT_PART_STATISTICS = "INSERT INTO " + "${internalDB}.${columnStatTbl}" + " SELECT " @@ -119,7 +123,8 @@ public abstract class BaseAnalysisTask { info, AnalysisState.FAILED, String.format("Table with name %s not exists", info.tblName), System.currentTimeMillis()); } - if (info.analysisType != null && info.analysisType.equals(AnalysisType.COLUMN)) { + if (info.analysisType != null && (info.analysisType.equals(AnalysisType.COLUMN) + || info.analysisType.equals(AnalysisType.HISTOGRAM))) { col = tbl.getColumn(info.colName); if (col == null) { Env.getCurrentEnv().getAnalysisManager().updateTaskStatus( diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java index adcbd27d73..645a9d68e1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java @@ -18,6 +18,15 @@ package org.apache.doris.statistics; import org.apache.doris.analysis.LiteralExpr; +import org.apache.doris.catalog.Type; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.statistics.util.StatisticsUtil; + +import com.google.common.collect.Lists; +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; + +import java.util.List; public class Bucket { public LiteralExpr lower; @@ -65,4 +74,20 @@ public class Bucket { public void setNdv(int ndv) { this.ndv = ndv; } + + public static List<Bucket> deserializeFromjson(Type datatype, JsonArray jsonArray) + throws AnalysisException { + List<Bucket> buckets = Lists.newArrayList(); + for (int i = 0; i < jsonArray.size(); i++) { + JsonObject bucketJson = jsonArray.get(i).getAsJsonObject(); + Bucket bucket = new Bucket(); + bucket.lower = StatisticsUtil.readableValue(datatype, bucketJson.get("lower").getAsString()); + bucket.upper = StatisticsUtil.readableValue(datatype, bucketJson.get("upper").getAsString()); + bucket.count = bucketJson.get("count").getAsInt(); + bucket.preSum = bucketJson.get("pre_sum").getAsInt(); + bucket.ndv = bucketJson.get("ndv").getAsInt(); + buckets.add(bucket); + } + return buckets; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index d128e66c50..3a47e368c8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -19,12 +19,7 @@ package org.apache.doris.statistics; import org.apache.doris.analysis.LiteralExpr; import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.DatabaseIf; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.Type; -import org.apache.doris.datasource.CatalogIf; import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.statistics.util.StatisticsUtil; @@ -42,13 +37,12 @@ public class ColumnStatistic { public static final StatsType NUM_NULLS = StatsType.NUM_NULLS; public static final StatsType MIN_VALUE = StatsType.MIN_VALUE; public static final StatsType MAX_VALUE = StatsType.MAX_VALUE; - public static final StatsType HISTOGRAM = StatsType.HISTOGRAM; private static final Logger LOG = LogManager.getLogger(ColumnStatistic.class); public static ColumnStatistic DEFAULT = new ColumnStatisticBuilder().setAvgSizeByte(1).setNdv(1) .setNumNulls(1).setCount(1).setMaxValue(Double.MAX_VALUE).setMinValue(Double.MIN_VALUE) - .setHistogram(Histogram.defaultHistogram()).setSelectivity(1.0).setIsUnknown(true) + .setSelectivity(1.0).setIsUnknown(true) .build(); public static final Set<Type> MAX_MIN_UNSUPPORTED_TYPE = new HashSet<>(); @@ -68,7 +62,6 @@ public class ColumnStatistic { public final double avgSizeByte; public final double minValue; public final double maxValue; - public final Histogram histogram; public final boolean isUnKnown; /* selectivity of Column T1.A: @@ -90,8 +83,7 @@ public class ColumnStatistic { public ColumnStatistic(double count, double ndv, double avgSizeByte, double numNulls, double dataSize, double minValue, double maxValue, - Histogram histogram, double selectivity, LiteralExpr minExpr, - LiteralExpr maxExpr, boolean isUnKnown) { + double selectivity, LiteralExpr minExpr, LiteralExpr maxExpr, boolean isUnKnown) { this.count = count; this.ndv = ndv; this.avgSizeByte = avgSizeByte; @@ -99,7 +91,6 @@ public class ColumnStatistic { this.dataSize = dataSize; this.minValue = minValue; this.maxValue = maxValue; - this.histogram = histogram; this.selectivity = selectivity; this.minExpr = minExpr; this.maxExpr = maxExpr; @@ -127,7 +118,7 @@ public class ColumnStatistic { long dbID = Long.parseLong(resultRow.getColumnValue("db_id")); long tblId = Long.parseLong(resultRow.getColumnValue("tbl_id")); String colName = resultRow.getColumnValue("col_id"); - Column col = findColumn(catalogId, dbID, tblId, idxId, colName); + Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName); if (col == null) { LOG.warn("Failed to deserialize column statistics, ctlId: {} dbId: {}" + "tblId: {} column: {} not exists", @@ -136,10 +127,8 @@ public class ColumnStatistic { } String min = resultRow.getColumnValue("min"); String max = resultRow.getColumnValue("max"); - String histogram = resultRow.getColumnValue("histogram"); columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min)); columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max)); - columnStatisticBuilder.setHistogram(Histogram.deserializeFromJson(col.getType(), histogram)); columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max)); columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min)); columnStatisticBuilder.setSelectivity(1.0); @@ -158,7 +147,7 @@ public class ColumnStatistic { public ColumnStatistic copy() { return new ColumnStatisticBuilder().setCount(count).setNdv(ndv).setAvgSizeByte(avgSizeByte) .setNumNulls(numNulls).setDataSize(dataSize).setMinValue(minValue) - .setMaxValue(maxValue).setHistogram(histogram).setMinExpr(minExpr).setMaxExpr(maxExpr) + .setMaxValue(maxValue).setMinExpr(minExpr).setMaxExpr(maxExpr) .setSelectivity(selectivity).setIsUnknown(isUnKnown).build(); } @@ -182,7 +171,6 @@ public class ColumnStatistic { .setDataSize(Math.ceil(dataSize * ratio)) .setMinValue(minValue) .setMaxValue(maxValue) - .setHistogram(histogram) .setMinExpr(minExpr) .setMaxExpr(maxExpr) .setSelectivity(newSelectivity) @@ -194,28 +182,6 @@ public class ColumnStatistic { return Math.max(this.minValue, other.minValue) <= Math.min(this.maxValue, other.maxValue); } - public static Column findColumn(long catalogId, long dbId, long tblId, long idxId, String columnName) { - CatalogIf<DatabaseIf<TableIf>> catalogIf = Env.getCurrentEnv().getCatalogMgr().getCatalog(catalogId); - if (catalogIf == null) { - return null; - } - DatabaseIf<TableIf> db = catalogIf.getDb(dbId).orElse(null); - if (db == null) { - return null; - } - TableIf tblIf = db.getTable(tblId).orElse(null); - if (tblIf == null) { - return null; - } - if (idxId != -1) { - if (tblIf instanceof OlapTable) { - OlapTable olapTable = (OlapTable) tblIf; - return olapTable.getIndexIdToMeta().get(idxId).getColumnByName(columnName); - } - } - return tblIf.getColumn(columnName); - } - public ColumnStatistic updateBySelectivity(double selectivity, double rowCount) { if (isUnKnown) { return DEFAULT; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java index 4714b3a1bd..57353eb22b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java @@ -27,7 +27,6 @@ public class ColumnStatisticBuilder { private double dataSize; private double minValue; private double maxValue; - private Histogram histogram; private double selectivity = 1.0; private LiteralExpr minExpr; private LiteralExpr maxExpr; @@ -45,7 +44,6 @@ public class ColumnStatisticBuilder { this.dataSize = columnStatistic.dataSize; this.minValue = columnStatistic.minValue; this.maxValue = columnStatistic.maxValue; - this.histogram = columnStatistic.histogram; this.selectivity = columnStatistic.selectivity; this.minExpr = columnStatistic.minExpr; this.maxExpr = columnStatistic.maxExpr; @@ -87,11 +85,6 @@ public class ColumnStatisticBuilder { return this; } - public ColumnStatisticBuilder setHistogram(Histogram histogram) { - this.histogram = histogram; - return this; - } - public ColumnStatisticBuilder setSelectivity(double selectivity) { this.selectivity = selectivity; return this; @@ -140,10 +133,6 @@ public class ColumnStatisticBuilder { return maxValue; } - public Histogram getHistogram() { - return histogram; - } - public double getSelectivity() { return selectivity; } @@ -162,6 +151,6 @@ public class ColumnStatisticBuilder { public ColumnStatistic build() { return new ColumnStatistic(count, ndv, avgSizeByte, numNulls, - dataSize, minValue, maxValue, histogram, selectivity, minExpr, maxExpr, isUnknown); + dataSize, minValue, maxValue, selectivity, minExpr, maxExpr, isUnknown); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java index 0516c1fabf..fec57ac4c6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java @@ -17,14 +17,15 @@ package org.apache.doris.statistics; -import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Type; +import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.statistics.util.StatisticsUtil; -import com.alibaba.fastjson2.JSON; -import com.alibaba.fastjson2.JSONArray; -import com.alibaba.fastjson2.JSONObject; import com.google.common.collect.Lists; +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.parquet.Strings; @@ -34,71 +35,71 @@ import java.util.List; public class Histogram { private static final Logger LOG = LogManager.getLogger(Histogram.class); - private Type dataType; + public final Type dataType; - private int maxBucketSize; - private int bucketSize; - private float sampleRate; + public final int maxBucketNum; - private List<Bucket> buckets; + public final int bucketNum; - public Histogram(Type dataType) { - this.dataType = dataType; - } + public final double sampleRate; - public Type getDataType() { - return dataType; - } + public final List<Bucket> buckets; - public void setDataType(Type dataType) { + public Histogram(Type dataType, int maxBucketNum, int bucketNum, + double sampleRate, List<Bucket> buckets) { this.dataType = dataType; + this.maxBucketNum = maxBucketNum; + this.bucketNum = bucketNum; + this.sampleRate = sampleRate; + this.buckets = buckets; } - public int getMaxBucketSize() { - return maxBucketSize; - } + public static Histogram DEFAULT = new HistogramBuilder() + .setDataType(Type.INVALID).setMaxBucketNum(1) + .setBucketNum(0).setSampleRate(1.0).setBuckets(Lists.newArrayList()).build(); - public void setMaxBucketSize(int maxBucketSize) { - this.maxBucketSize = maxBucketSize; - } + // TODO: use thrift + public static Histogram fromResultRow(ResultRow resultRow) { + try { + HistogramBuilder histogramBuilder = new HistogramBuilder(); + + long catalogId = Long.parseLong(resultRow.getColumnValue("catalog_id")); + long idxId = Long.parseLong(resultRow.getColumnValue("idx_id")); + long dbId = Long.parseLong(resultRow.getColumnValue("db_id")); + long tblId = Long.parseLong(resultRow.getColumnValue("tbl_id")); + + String colName = resultRow.getColumnValue("col_id"); + Column col = StatisticsUtil.findColumn(catalogId, dbId, tblId, idxId, colName); + if (col == null) { + LOG.warn("Failed to deserialize histogram statistics, ctlId: {} dbId: {}" + + "tblId: {} column: {} not exists", + catalogId, dbId, tblId, colName); + return Histogram.DEFAULT; + } - public int getBucketSize() { - return bucketSize; - } + double sampleRate = Double.parseDouble(resultRow.getColumnValue("sample_rate")); + histogramBuilder.setSampleRate(sampleRate); + histogramBuilder.setDataType(col.getType()); - public void setBucketSize(int bucketSize) { - this.bucketSize = bucketSize; - } + String json = resultRow.getColumnValue("buckets"); + JsonObject jsonObj = JsonParser.parseString(json).getAsJsonObject(); - public float getSampleRate() { - return sampleRate; - } + int maxBucketNum = jsonObj.get("max_bucket_num").getAsInt(); + histogramBuilder.setMaxBucketNum(maxBucketNum); - public void setSampleRate(float sampleRate) { - if (sampleRate < 0f || sampleRate > 1f) { - this.sampleRate = 1f; - } else { - this.sampleRate = sampleRate; - } - } + int bucketNum = jsonObj.get("bucket_num").getAsInt(); + histogramBuilder.setBucketNum(bucketNum); - public void setBuckets(List<Bucket> buckets) { - this.buckets = buckets; - } - - public List<Bucket> getBuckets() { - return buckets; - } + JsonArray jsonArray = jsonObj.getAsJsonArray("buckets"); + List<Bucket> buckets = Bucket.deserializeFromjson(col.getType(), jsonArray); + histogramBuilder.setBuckets(buckets); - public static Histogram defaultHistogram() { - Type type = Type.fromPrimitiveType(PrimitiveType.INVALID_TYPE); - List<Bucket> buckets = Lists.newArrayList(); - Histogram histogram = new Histogram(type); - histogram.setMaxBucketSize(0); - histogram.setBucketSize(0); - histogram.setSampleRate(1.0f); - histogram.setBuckets(buckets); - return histogram; + return histogramBuilder.build(); + } catch (Exception e) { + e.printStackTrace(); + LOG.warn("Failed to deserialize histogram statistics.", e); + return Histogram.DEFAULT; + } } /** @@ -111,37 +112,28 @@ public class Histogram { } try { - Histogram histogram = new Histogram(datatype); - JSONObject histogramJson = JSON.parseObject(json); - - List<Bucket> buckets = Lists.newArrayList(); - JSONArray jsonArray = histogramJson.getJSONArray("buckets"); - - for (int i = 0; i < jsonArray.size(); i++) { - JSONObject bucketJson = jsonArray.getJSONObject(i); - Bucket bucket = new Bucket(); - bucket.lower = StatisticsUtil.readableValue(datatype, bucketJson.get("lower").toString()); - bucket.upper = StatisticsUtil.readableValue(datatype, bucketJson.get("upper").toString()); - bucket.count = bucketJson.getIntValue("count"); - bucket.preSum = bucketJson.getIntValue("pre_sum"); - bucket.ndv = bucketJson.getIntValue("ndv"); - buckets.add(bucket); - } + HistogramBuilder histogramBuilder = new HistogramBuilder(); - histogram.setBuckets(buckets); + histogramBuilder.setDataType(datatype); - int maxBucketSize = histogramJson.getIntValue("max_bucket_size"); - histogram.setMaxBucketSize(maxBucketSize); + JsonObject histogramJson = JsonParser.parseString(json).getAsJsonObject(); + JsonArray jsonArray = histogramJson.getAsJsonArray("buckets"); + List<Bucket> buckets = Bucket.deserializeFromjson(datatype, jsonArray); - int bucketSize = histogramJson.getIntValue("bucket_size"); - histogram.setBucketSize(bucketSize); + histogramBuilder.setBuckets(buckets); - float sampleRate = histogramJson.getFloatValue("sample_rate"); - histogram.setSampleRate(sampleRate); + int maxBucketSize = histogramJson.get("max_bucket_num").getAsInt(); + histogramBuilder.setMaxBucketNum(maxBucketSize); - return histogram; + int bucketSize = histogramJson.get("bucket_num").getAsInt(); + histogramBuilder.setBucketNum(bucketSize); + + float sampleRate = histogramJson.get("sample_rate").getAsFloat(); + histogramBuilder.setSampleRate(sampleRate); + + return histogramBuilder.build(); } catch (Throwable e) { - LOG.warn("deserialize from json error, input json string: {}", json, e); + LOG.error("deserialize from json error.", e); } return null; @@ -155,26 +147,25 @@ public class Histogram { return ""; } - JSONObject histogramJson = new JSONObject(); - histogramJson.put("max_bucket_size", histogram.maxBucketSize); - histogramJson.put("bucket_size", histogram.bucketSize); - histogramJson.put("sample_rate", histogram.sampleRate); - - JSONArray bucketsJsonArray = new JSONArray(); - histogramJson.put("buckets", bucketsJsonArray); - - if (histogram.buckets != null) { - for (Bucket bucket : histogram.buckets) { - JSONObject bucketJson = new JSONObject(); - bucketJson.put("count", bucket.count); - bucketJson.put("pre_sum", bucket.preSum); - bucketJson.put("ndv", bucket.ndv); - bucketJson.put("upper", bucket.upper.getStringValue()); - bucketJson.put("lower", bucket.lower.getStringValue()); - bucketsJsonArray.add(bucketJson); - } + JsonObject histogramJson = new JsonObject(); + + histogramJson.addProperty("max_bucket_num", histogram.maxBucketNum); + histogramJson.addProperty("bucket_num", histogram.bucketNum); + histogramJson.addProperty("sample_rate", histogram.sampleRate); + + JsonArray bucketsJsonArray = new JsonArray(); + histogramJson.add("buckets", bucketsJsonArray); + + for (Bucket bucket : histogram.buckets) { + JsonObject bucketJson = new JsonObject(); + bucketJson.addProperty("count", bucket.count); + bucketJson.addProperty("pre_sum", bucket.preSum); + bucketJson.addProperty("ndv", bucket.ndv); + bucketJson.addProperty("upper", bucket.upper.getStringValue()); + bucketJson.addProperty("lower", bucket.lower.getStringValue()); + bucketsJsonArray.add(bucketJson); } - return histogramJson.toJSONString(); + return histogramJson.toString(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramBuilder.java new file mode 100644 index 0000000000..dcc2299882 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramBuilder.java @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.catalog.Type; + +import java.util.Comparator; +import java.util.List; + +public class HistogramBuilder { + private Type dataType; + + private int maxBucketNum; + + private int bucketNum; + + private double sampleRate; + + private List<Bucket> buckets; + + public HistogramBuilder() { + } + + public HistogramBuilder(Histogram histogram) { + this.dataType = histogram.dataType; + this.maxBucketNum = histogram.maxBucketNum; + this.bucketNum = histogram.bucketNum; + this.sampleRate = histogram.sampleRate; + this.buckets = histogram.buckets; + } + + public HistogramBuilder setDataType(Type dataType) { + this.dataType = dataType; + return this; + } + + public HistogramBuilder setMaxBucketNum(int maxBucketNum) { + this.maxBucketNum = maxBucketNum; + return this; + } + + public HistogramBuilder setBucketNum(int bucketNum) { + this.bucketNum = bucketNum; + return this; + } + + public HistogramBuilder setSampleRate(double sampleRate) { + if (sampleRate < 0 || sampleRate > 1.0) { + this.sampleRate = 1.0; + } else { + this.sampleRate = sampleRate; + } + return this; + } + + public HistogramBuilder setBuckets(List<Bucket> buckets) { + buckets.sort(Comparator.comparing(Bucket::getLower)); + this.buckets = buckets; + return this; + } + + public Type getDataType() { + return dataType; + } + + public int getMaxBucketNum() { + return maxBucketNum; + } + + public int getBucketNum() { + return bucketNum; + } + + public double getSampleRate() { + return sampleRate; + } + + public List<Bucket> getBuckets() { + return buckets; + } + + public Histogram build() { + return new Histogram(dataType, maxBucketNum, bucketNum, sampleRate, buckets); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java new file mode 100644 index 0000000000..cc349e0425 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.catalog.Env; +import org.apache.doris.common.FeConstants; +import org.apache.doris.qe.AutoCloseConnectContext; +import org.apache.doris.qe.StmtExecutor; +import org.apache.doris.statistics.util.StatisticsUtil; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.text.StringSubstitutor; + +import java.util.HashMap; +import java.util.Map; + +/** + * Each task analyze one column. + */ +public class HistogramTask extends BaseAnalysisTask { + + /** To avoid too much data, use the following efficient sampling method */ + private static final String ANALYZE_HISTOGRAM_SQL_TEMPLATE = "INSERT INTO " + + "${internalDB}.${histogramStatTbl} " + + "SELECT " + + " CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, " + + " ${catalogId} AS catalog_id, " + + " ${dbId} AS db_id, " + + " ${tblId} AS tbl_id, " + + " ${idxId} AS idx_id, " + + " '${colId}' AS col_id, " + + " ${sampleRate} AS sample_rate, " + + " `HISTOGRAM`(`${colName}`, 1, ${maxBucketNum}) AS buckets, " + + " NOW() AS create_time " + + "FROM " + + " `${dbName}`.`${tblName}` TABLESAMPLE (${percentValue} PERCENT)"; + + @VisibleForTesting + public HistogramTask() { + super(); + } + + public HistogramTask(AnalysisTaskScheduler analysisTaskScheduler, AnalysisTaskInfo info) { + super(analysisTaskScheduler, info); + } + + @Override + public void execute() throws Exception { + Map<String, String> params = new HashMap<>(); + params.put("internalDB", FeConstants.INTERNAL_DB_NAME); + params.put("histogramStatTbl", StatisticConstants.HISTOGRAM_TBL_NAME); + params.put("catalogId", String.valueOf(catalog.getId())); + params.put("dbId", String.valueOf(db.getId())); + params.put("tblId", String.valueOf(tbl.getId())); + params.put("idxId", "-1"); + params.put("colId", String.valueOf(info.colName)); + params.put("dbName", info.dbName); + params.put("tblName", String.valueOf(info.tblName)); + params.put("colName", String.valueOf(info.colName)); + params.put("sampleRate", String.valueOf(info.sampleRate)); + params.put("maxBucketNum", String.valueOf(info.maxBucketNum)); + params.put("percentValue", String.valueOf((int) (info.sampleRate * 100))); + + StringSubstitutor stringSubstitutor = new StringSubstitutor(params); + String histogramSql = stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE); + LOG.info("SQL to collect the histogram:\n {}", histogramSql); + + try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) { + this.stmtExecutor = new StmtExecutor(r.connectContext, histogramSql); + this.stmtExecutor.execute(); + } + + Env.getCurrentEnv().getStatisticsCache().refreshSync(tbl.getId(), -1, col.getName()); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java index af2103f13a..44be5bf078 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java @@ -58,12 +58,12 @@ public class HiveAnalysisTask extends HMSAnalysisTask { private static final String ANALYZE_PARTITION_SQL_TEMPLATE = "INSERT INTO " + "${internalDB}.${columnStatTbl}" + " values ('${id}','${catalogId}', '${dbId}', '${tblId}', '-1', '${colId}', '${partId}', " - + "${numRows}, ${ndv}, ${nulls}, '${min}', '${max}', NULL, ${dataSize}, '${update_time}')"; + + "${numRows}, ${ndv}, ${nulls}, '${min}', '${max}', ${dataSize}, '${update_time}')"; private static final String ANALYZE_TABLE_SQL_TEMPLATE = "INSERT INTO " + "${internalDB}.${columnStatTbl}" + " values ('${id}','${catalogId}', '${dbId}', '${tblId}', NULL, '${colId}', NULL, " - + "${numRows}, ${ndv}, ${nulls}, '${min}', '${max}', NULL, ${dataSize}, '${update_time}')"; + + "${numRows}, ${ndv}, ${nulls}, '${min}', '${max}', ${dataSize}, '${update_time}')"; @Override protected void getColumnStatsByMeta() throws Exception { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/IcebergAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/IcebergAnalysisTask.java index 4dfc68a946..fcb6abf457 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/IcebergAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/IcebergAnalysisTask.java @@ -52,7 +52,7 @@ public class IcebergAnalysisTask extends HMSAnalysisTask { private static final String INSERT_TABLE_SQL_TEMPLATE = "INSERT INTO " + "${internalDB}.${columnStatTbl}" + " values ('${id}','${catalogId}', '${dbId}', '${tblId}', '-1', '${colId}', NULL, " - + "${numRows}, 0, ${nulls}, '0', '0', NULL, ${dataSize}, '${update_time}')"; + + "${numRows}, 0, ${nulls}, '0', '0', ${dataSize}, '${update_time}')"; @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistic.java similarity index 51% copy from fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java copy to fe/fe-core/src/main/java/org/apache/doris/statistics/Statistic.java index adcbd27d73..b1ecded2f4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistic.java @@ -17,52 +17,39 @@ package org.apache.doris.statistics; -import org.apache.doris.analysis.LiteralExpr; +public class Statistic { -public class Bucket { - public LiteralExpr lower; - public LiteralExpr upper; - public int count; - public int preSum; - public int ndv; + public Histogram histogram; - public LiteralExpr getLower() { - return lower; - } - - public void setLower(LiteralExpr lower) { - this.lower = lower; - } - - public LiteralExpr getUpper() { - return upper; - } - - public void setUpper(LiteralExpr upper) { - this.upper = upper; - } + public ColumnStatistic columnStatistic; - public int getCount() { - return count; + public Statistic() { } - public void setCount(int count) { - this.count = count; + public Statistic(Histogram histogram, ColumnStatistic columnStatistic) { + this.histogram = histogram; + this.columnStatistic = columnStatistic; } - public int getPreSum() { - return preSum; + public Histogram getHistogram() { + if (histogram != null) { + return histogram; + } + return Histogram.DEFAULT; } - public void setPreSum(int preSum) { - this.preSum = preSum; + public void setHistogram(Histogram histogram) { + this.histogram = histogram; } - public int getNdv() { - return ndv; + public ColumnStatistic getColumnStatistic() { + if (columnStatistic != null) { + return columnStatistic; + } + return ColumnStatistic.DEFAULT; } - public void setNdv(int ndv) { - this.ndv = ndv; + public void setColumnStatistic(ColumnStatistic columnStatistic) { + this.columnStatistic = columnStatistic; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index dc89cc42fa..df34c2f9d6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -22,6 +22,8 @@ import java.util.concurrent.TimeUnit; public class StatisticConstants { public static final String STATISTIC_TBL_NAME = "column_statistics"; + public static final String HISTOGRAM_TBL_NAME = "histogram_statistics"; + public static final String ANALYSIS_JOB_TABLE = "analysis_jobs"; public static final int MAX_NAME_LEN = 64; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java index b92873460c..08c58800ac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java @@ -31,7 +31,7 @@ public class StatisticsCache { private static final Logger LOG = LogManager.getLogger(StatisticsCache.class); - private final AsyncLoadingCache<StatisticsCacheKey, ColumnStatistic> cache = Caffeine.newBuilder() + private final AsyncLoadingCache<StatisticsCacheKey, Statistic> cache = Caffeine.newBuilder() .maximumSize(StatisticConstants.STATISTICS_RECORDS_CACHE_SIZE) .expireAfterAccess(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_VALID_DURATION_IN_HOURS)) .refreshAfterWrite(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_REFRESH_INTERVAL)) @@ -42,29 +42,49 @@ public class StatisticsCache { } public ColumnStatistic getColumnStatistics(long tblId, long idxId, String colName) { - if (ConnectContext.get().getSessionVariable().internalSession) { + ConnectContext ctx = ConnectContext.get(); + if (ctx != null && ctx.getSessionVariable().internalSession) { return ColumnStatistic.DEFAULT; } StatisticsCacheKey k = new StatisticsCacheKey(tblId, idxId, colName); try { - CompletableFuture<ColumnStatistic> f = cache.get(k); - if (f.isDone()) { - return f.get(); + CompletableFuture<Statistic> f = cache.get(k); + if (f.isDone() && f.get() != null) { + return f.get().getColumnStatistic(); } } catch (Exception e) { LOG.warn("Unexpected exception while returning ColumnStatistic", e); - return ColumnStatistic.DEFAULT; } return ColumnStatistic.DEFAULT; } + public Histogram getHistogram(long tblId, String colName) { + return getHistogram(tblId, -1, colName); + } + + public Histogram getHistogram(long tblId, long idxId, String colName) { + ConnectContext ctx = ConnectContext.get(); + if (ctx != null && ctx.getSessionVariable().internalSession) { + return Histogram.DEFAULT; + } + StatisticsCacheKey k = new StatisticsCacheKey(tblId, idxId, colName); + try { + CompletableFuture<Statistic> f = cache.get(k); + if (f.isDone() && f.get() != null) { + return f.get().getHistogram(); + } + } catch (Exception e) { + LOG.warn("Unexpected exception while returning Histogram", e); + } + return Histogram.DEFAULT; + } + // TODO: finish this method. public void eraseExpiredCache(long tblId, long idxId, String colName) { cache.synchronous().invalidate(new StatisticsCacheKey(tblId, idxId, colName)); } - public void updateCache(long tblId, long idxId, String colName, ColumnStatistic statistic) { - + public void updateCache(long tblId, long idxId, String colName, Statistic statistic) { cache.synchronous().put(new StatisticsCacheKey(tblId, idxId, colName), statistic); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java index dea76b6201..08781e5689 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java @@ -35,7 +35,7 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.Executor; -public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKey, ColumnStatistic> { +public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKey, Statistic> { private static final Logger LOG = LogManager.getLogger(StatisticsCacheLoader.class); @@ -43,13 +43,17 @@ public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKe + "." + StatisticConstants.STATISTIC_TBL_NAME + " WHERE " + "id = CONCAT('${tblId}', '-', ${idxId}, '-', '${colId}')"; + private static final String QUERY_HISTOGRAM_STATISTICS = "SELECT * FROM " + FeConstants.INTERNAL_DB_NAME + + "." + StatisticConstants.HISTOGRAM_TBL_NAME + " WHERE " + + "id = CONCAT('${tblId}', '-', ${idxId}, '-', '${colId}')"; + private static int CUR_RUNNING_LOAD = 0; private static final Object LOCK = new Object(); // TODO: Maybe we should trigger a analyze job when the required ColumnStatistic doesn't exists. @Override - public @NonNull CompletableFuture<ColumnStatistic> asyncLoad(@NonNull StatisticsCacheKey key, + public @NonNull CompletableFuture<Statistic> asyncLoad(@NonNull StatisticsCacheKey key, @NonNull Executor executor) { synchronized (LOCK) { if (CUR_RUNNING_LOAD > StatisticConstants.LOAD_TASK_LIMITS) { @@ -61,31 +65,53 @@ public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKe } CUR_RUNNING_LOAD++; return CompletableFuture.supplyAsync(() -> { + Statistic statistic = new Statistic(); + try { Map<String, String> params = new HashMap<>(); params.put("tblId", String.valueOf(key.tableId)); params.put("idxId", String.valueOf(key.idxId)); params.put("colId", String.valueOf(key.colName)); - List<ResultRow> resultBatches = + + List<ColumnStatistic> columnStatistics; + List<ResultRow> columnResult = StatisticsUtil.execStatisticQuery(new StringSubstitutor(params) .replace(QUERY_COLUMN_STATISTICS)); - List<ColumnStatistic> columnStatistics = null; try { - columnStatistics = StatisticsUtil.deserializeToColumnStatistics(resultBatches); + columnStatistics = StatisticsUtil.deserializeToColumnStatistics(columnResult); } catch (Exception e) { LOG.warn("Failed to deserialize column statistics", e); throw new CompletionException(e); } if (CollectionUtils.isEmpty(columnStatistics)) { - return ColumnStatistic.DEFAULT; + statistic.setColumnStatistic(ColumnStatistic.DEFAULT); + } else { + statistic.setColumnStatistic(columnStatistics.get(0)); + } + + List<Histogram> histogramStatistics; + List<ResultRow> histogramResult = + StatisticsUtil.execStatisticQuery(new StringSubstitutor(params) + .replace(QUERY_HISTOGRAM_STATISTICS)); + try { + histogramStatistics = StatisticsUtil.deserializeToHistogramStatistics(histogramResult); + } catch (Exception e) { + LOG.warn("Failed to deserialize histogram statistics", e); + throw new CompletionException(e); + } + if (CollectionUtils.isEmpty(histogramStatistics)) { + statistic.setHistogram(Histogram.DEFAULT); + } else { + statistic.setHistogram(histogramStatistics.get(0)); } - return columnStatistics.get(0); } finally { synchronized (LOCK) { CUR_RUNNING_LOAD--; LOCK.notify(); } } + + return statistic; }); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 7901c1c918..475c972a22 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -74,8 +74,7 @@ public class StatisticsRepository { private static final String INSERT_INTO_COLUMN_STATISTICS = "INSERT INTO " + FULL_QUALIFIED_COLUMN_STATISTICS_NAME + " VALUES('${id}', ${catalogId}, ${dbId}, ${tblId}, '${idxId}'," - + "'${colId}', ${partId}, ${count}, ${ndv}, ${nullCount}, '${min}', '${max}', " - + "'${histogram}', ${dataSize}, NOW())"; + + "'${colId}', ${partId}, ${count}, ${ndv}, ${nullCount}, '${min}', '${max}', ${dataSize}, NOW())"; public static ColumnStatistic queryColumnStatisticsByName(long tableId, String colName) { ResultRow resultRow = queryColumnStatisticById(tableId, colName); @@ -159,7 +158,6 @@ public class StatisticsRepository { String nullCount = alterColumnStatsStmt.getValue(StatsType.NUM_NULLS); String min = alterColumnStatsStmt.getValue(StatsType.MIN_VALUE); String max = alterColumnStatsStmt.getValue(StatsType.MAX_VALUE); - String histogram = alterColumnStatsStmt.getValue(StatsType.HISTOGRAM); String dataSize = alterColumnStatsStmt.getValue(StatsType.DATA_SIZE); ColumnStatisticBuilder builder = new ColumnStatisticBuilder(); String colName = alterColumnStatsStmt.getColumnName(); @@ -181,12 +179,10 @@ public class StatisticsRepository { builder.setMaxExpr(StatisticsUtil.readableValue(column.getType(), max)); builder.setMaxValue(StatisticsUtil.convertToDouble(column.getType(), max)); } - if (histogram != null) { - builder.setHistogram(Histogram.deserializeFromJson(column.getType(), histogram)); - } if (dataSize != null) { builder.setDataSize(Double.parseDouble(dataSize)); } + ColumnStatistic columnStatistic = builder.build(); Map<String, String> params = new HashMap<>(); params.put("id", constructId(objects.table.getId(), -1, colName)); @@ -201,9 +197,17 @@ public class StatisticsRepository { params.put("nullCount", String.valueOf(columnStatistic.numNulls)); params.put("min", min == null ? "NULL" : min); params.put("max", max == null ? "NULL" : max); - params.put("histogram", (columnStatistic.histogram == null) ? "NULL" : histogram); params.put("dataSize", String.valueOf(columnStatistic.dataSize)); StatisticsUtil.execUpdate(INSERT_INTO_COLUMN_STATISTICS, params); - Env.getCurrentEnv().getStatisticsCache().updateCache(objects.table.getId(), -1, colName, builder.build()); + + Histogram histogram = Env.getCurrentEnv().getStatisticsCache() + .getHistogram(objects.table.getId(), -1, colName); + + Statistic statistic = new Statistic(); + statistic.setHistogram(histogram); + statistic.setColumnStatistic(builder.build()); + + Env.getCurrentEnv().getStatisticsCache() + .updateCache(objects.table.getId(), -1, colName, statistic); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 08822bb27c..8ceabf4897 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -29,8 +29,10 @@ import org.apache.doris.analysis.StatementBase; import org.apache.doris.analysis.StringLiteral; import org.apache.doris.analysis.TableName; import org.apache.doris.analysis.UserIdentity; +import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.TableIf; @@ -47,6 +49,7 @@ import org.apache.doris.qe.SessionVariable; import org.apache.doris.qe.StmtExecutor; import org.apache.doris.statistics.AnalysisTaskInfo; import org.apache.doris.statistics.ColumnStatistic; +import org.apache.doris.statistics.Histogram; import org.apache.doris.statistics.StatisticConstants; import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.system.SystemInfoService; @@ -107,6 +110,11 @@ public class StatisticsUtil { return resultBatches.stream().map(ColumnStatistic::fromResultRow).collect(Collectors.toList()); } + public static List<Histogram> deserializeToHistogramStatistics(List<ResultRow> resultBatches) + throws Exception { + return resultBatches.stream().map(Histogram::fromResultRow).collect(Collectors.toList()); + } + public static AutoCloseConnectContext buildConnectContext() { ConnectContext connectContext = new ConnectContext(); SessionVariable sessionVariable = connectContext.getSessionVariable(); @@ -253,4 +261,26 @@ public class StatisticsUtil { } return new DBObjects(catalogIf, databaseIf, tableIf); } + + public static Column findColumn(long catalogId, long dbId, long tblId, long idxId, String columnName) { + CatalogIf<DatabaseIf<TableIf>> catalogIf = Env.getCurrentEnv().getCatalogMgr().getCatalog(catalogId); + if (catalogIf == null) { + return null; + } + DatabaseIf<TableIf> db = catalogIf.getDb(dbId).orElse(null); + if (db == null) { + return null; + } + TableIf tblIf = db.getTable(tblId).orElse(null); + if (tblIf == null) { + return null; + } + if (idxId != -1) { + if (tblIf instanceof OlapTable) { + OlapTable olapTable = (OlapTable) tblIf; + return olapTable.getIndexIdToMeta().get(idxId).getColumnByName(columnName); + } + } + return tblIf.getColumn(columnName); + } } diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex b/fe/fe-core/src/main/jflex/sql_scanner.flex index d9c89907de..b66a91004a 100644 --- a/fe/fe-core/src/main/jflex/sql_scanner.flex +++ b/fe/fe-core/src/main/jflex/sql_scanner.flex @@ -478,6 +478,7 @@ import org.apache.doris.qe.SqlModeHelper; keywordMap.put("write", new Integer(SqlParserSymbols.KW_WRITE)); keywordMap.put("year", new Integer(SqlParserSymbols.KW_YEAR)); keywordMap.put("mtmv", new Integer(SqlParserSymbols.KW_MTMV)); + keywordMap.put("histogram", new Integer(SqlParserSymbols.KW_HISTOGRAM)); } // map from token id to token description diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java index 99e6a24338..49e80acccf 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java @@ -191,7 +191,7 @@ public class HyperGraphBuilder { int count = rowCounts.get(Integer.parseInt(scanPlan.getTable().getName())); for (Slot slot : scanPlan.getOutput()) { slotIdToColumnStats.put(slot.getExprId(), - new ColumnStatistic(count, count, 0, 0, 0, 0, 0, null, 0, null, null, true)); + new ColumnStatistic(count, count, 0, 0, 0, 0, 0, 0, null, null, true)); } StatsDeriveResult stats = new StatsDeriveResult(count, slotIdToColumnStats); group.setStatistics(stats); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java index a01dd8f7cf..2cb428e402 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java @@ -82,7 +82,7 @@ public class AnalysisJobTest extends TestWithFeService { return connectContext; } }; - String sql = "ANALYZE t1"; + String sql = "ANALYZE TABLE t1"; Assertions.assertNotNull(getSqlStmtExecutor(sql)); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java index 96ae8fae9f..f04c846456 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java @@ -32,6 +32,7 @@ import org.junit.jupiter.api.Test; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executor; @@ -63,14 +64,12 @@ public class CacheTest extends TestWithFeService { @Test public void testLoad() throws Exception { - new MockUp<ColumnStatistic>() { + new MockUp<StatisticsUtil>() { @Mock public Column findColumn(long catalogId, long dbId, long tblId, long idxId, String columnName) { return new Column("abc", PrimitiveType.BIGINT); } - }; - new MockUp<StatisticsUtil>() { @Mock public List<ResultRow> execStatisticQuery(String sql) { @@ -91,7 +90,6 @@ public class CacheTest extends TestWithFeService { colNames.add("col_id"); colNames.add("min"); colNames.add("max"); - colNames.add("histogram"); List<PrimitiveType> primitiveTypes = new ArrayList<>(); primitiveTypes.add(PrimitiveType.BIGINT); primitiveTypes.add(PrimitiveType.BIGINT); @@ -103,6 +101,7 @@ public class CacheTest extends TestWithFeService { primitiveTypes.add(PrimitiveType.VARCHAR); primitiveTypes.add(PrimitiveType.VARCHAR); primitiveTypes.add(PrimitiveType.VARCHAR); + primitiveTypes.add(PrimitiveType.VARCHAR); List<String> values = new ArrayList<>(); values.add("1"); values.add("2"); @@ -115,7 +114,6 @@ public class CacheTest extends TestWithFeService { values.add("8"); values.add("9"); values.add("10"); - values.add(""); ResultRow resultRow = new ResultRow(colNames, primitiveTypes, values); return Arrays.asList(resultRow); } @@ -123,10 +121,78 @@ public class CacheTest extends TestWithFeService { StatisticsCache statisticsCache = new StatisticsCache(); ColumnStatistic columnStatistic = statisticsCache.getColumnStatistics(0, "col"); Assertions.assertEquals(ColumnStatistic.DEFAULT, columnStatistic); - Thread.sleep(100); + Thread.sleep(1000); columnStatistic = statisticsCache.getColumnStatistics(0, "col"); Assertions.assertEquals(1, columnStatistic.count); Assertions.assertEquals(2, columnStatistic.ndv); Assertions.assertEquals(10, columnStatistic.maxValue); } + + @Test + public void testLoadHistogram() throws Exception { + new MockUp<StatisticsUtil>() { + + @Mock + public Column findColumn(long catalogId, long dbId, long tblId, long idxId, String columnName) { + return new Column("abc", PrimitiveType.DATETIME); + } + + @Mock + public List<ResultRow> execStatisticQuery(String sql) { + try { + Thread.sleep(50); + } catch (InterruptedException e) { + // ignore + } + List<String> colNames = new ArrayList<>(); + colNames.add("catalog_id"); + colNames.add("db_id"); + colNames.add("idx_id"); + colNames.add("tbl_id"); + colNames.add("col_id"); + colNames.add("sample_rate"); + colNames.add("buckets"); + List<PrimitiveType> primitiveTypes = new ArrayList<>(); + primitiveTypes.add(PrimitiveType.VARCHAR); + primitiveTypes.add(PrimitiveType.VARCHAR); + primitiveTypes.add(PrimitiveType.VARCHAR); + primitiveTypes.add(PrimitiveType.VARCHAR); + primitiveTypes.add(PrimitiveType.VARCHAR); + primitiveTypes.add(PrimitiveType.VARCHAR); + primitiveTypes.add(PrimitiveType.VARCHAR); + List<String> values = new ArrayList<>(); + values.add("1"); + values.add("2"); + values.add("3"); + values.add("-1"); + values.add("4"); + values.add("0.2"); + String buckets = "{\"max_bucket_num\":128,\"bucket_num\":5,\"sample_rate\":1.0,\"buckets\":" + + "[{\"lower\":\"2022-09-21 17:30:29\",\"upper\":\"2022-09-21 22:30:29\"," + + "\"count\":9,\"pre_sum\":0,\"ndv\":1}," + + "{\"lower\":\"2022-09-22 17:30:29\",\"upper\":\"2022-09-22 22:30:29\"," + + "\"count\":10,\"pre_sum\":9,\"ndv\":1}," + + "{\"lower\":\"2022-09-23 17:30:29\",\"upper\":\"2022-09-23 22:30:29\"," + + "\"count\":9,\"pre_sum\":19,\"ndv\":1}," + + "{\"lower\":\"2022-09-24 17:30:29\",\"upper\":\"2022-09-24 22:30:29\"," + + "\"count\":9,\"pre_sum\":28,\"ndv\":1}," + + "{\"lower\":\"2022-09-25 17:30:29\",\"upper\":\"2022-09-25 22:30:29\"," + + "\"count\":9,\"pre_sum\":37,\"ndv\":1}]}"; + values.add(buckets); + ResultRow resultRow = new ResultRow(colNames, primitiveTypes, values); + return Collections.singletonList(resultRow); + } + }; + + StatisticsCache statisticsCache = new StatisticsCache(); + Histogram histogram = statisticsCache.getHistogram(0, "col"); + Assertions.assertEquals(Histogram.DEFAULT, histogram); + Thread.sleep(1000); + histogram = statisticsCache.getHistogram(0, "col"); + Assertions.assertEquals("DATETIME", histogram.dataType.toString()); + Assertions.assertEquals(128, histogram.maxBucketNum); + Assertions.assertEquals(5, histogram.bucketNum); + Assertions.assertEquals(0.2, histogram.sampleRate); + Assertions.assertEquals(5, histogram.buckets.size()); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java index f575872c23..df2546c843 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java @@ -39,7 +39,7 @@ class HistogramTest { @BeforeEach void setUp() throws Exception { - String json = "{\"max_bucket_size\":128,\"bucket_size\":5,\"sample_rate\":1.0,\"buckets\":" + String json = "{\"max_bucket_num\":128,\"bucket_num\":5,\"sample_rate\":1.0,\"buckets\":" + "[{\"lower\":\"2022-09-21 17:30:29\",\"upper\":\"2022-09-21 22:30:29\"," + "\"count\":9,\"pre_sum\":0,\"ndv\":1}," + "{\"lower\":\"2022-09-22 17:30:29\",\"upper\":\"2022-09-22 22:30:29\"," @@ -58,19 +58,19 @@ class HistogramTest { @Test void testDeserializeFromJson() throws Exception { - Type dataType = histogramUnderTest.getDataType(); + Type dataType = histogramUnderTest.dataType; Assertions.assertTrue(dataType.isDatetime()); - int maxBucketSize = histogramUnderTest.getMaxBucketSize(); + int maxBucketSize = histogramUnderTest.maxBucketNum; Assertions.assertEquals(128, maxBucketSize); - int bucketSize = histogramUnderTest.getBucketSize(); + int bucketSize = histogramUnderTest.bucketNum; Assertions.assertEquals(5, bucketSize); - float sampleRate = histogramUnderTest.getSampleRate(); + double sampleRate = histogramUnderTest.sampleRate; Assertions.assertEquals(1.0, sampleRate); - List<Bucket> buckets = histogramUnderTest.getBuckets(); + List<Bucket> buckets = histogramUnderTest.buckets; Assertions.assertEquals(5, buckets.size()); LiteralExpr expectedLower = LiteralExpr.create("2022-09-21 17:30:29", @@ -97,10 +97,10 @@ class HistogramTest { String json = Histogram.serializeToJson(histogramUnderTest); JSONObject histogramJson = JSON.parseObject(json); - int maxBucketSize = histogramJson.getIntValue("max_bucket_size"); + int maxBucketSize = histogramJson.getIntValue("max_bucket_num"); Assertions.assertEquals(128, maxBucketSize); - int bucketSize = histogramJson.getIntValue("bucket_size"); + int bucketSize = histogramJson.getIntValue("bucket_num"); Assertions.assertEquals(5, bucketSize); float sampleRate = histogramJson.getFloat("sample_rate"); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/MVStatisticsTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/MVStatisticsTest.java index 7ed261c831..6040731a14 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/MVStatisticsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/MVStatisticsTest.java @@ -79,7 +79,7 @@ public class MVStatisticsTest extends TestWithFeService { }; AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); Deencapsulation.setField(analysisManager, "statisticsCache", statisticsCache); - getSqlStmtExecutor("analyze t1"); + getSqlStmtExecutor("analyze table t1"); Thread.sleep(3000); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java index df4f48858a..0f2cebf511 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java @@ -27,7 +27,7 @@ public class StatsDeriveResultTest { public void testUpdateRowCountByLimit() { StatsDeriveResult stats = new StatsDeriveResult(100); ColumnStatistic a = new ColumnStatistic(100, 10, 1, 5, 10, - 1, 100, null, 0.5, null, null, false); + 1, 100, 0.5, null, null, false); Id id = new Id(1); stats.addColumnStats(id, a); StatsDeriveResult res = stats.updateByLimit(0); diff --git a/regression-test/suites/statistics/alter_col_stats.groovy b/regression-test/suites/statistics/alter_col_stats.groovy index 52714b9dfa..958e3341d4 100644 --- a/regression-test/suites/statistics/alter_col_stats.groovy +++ b/regression-test/suites/statistics/alter_col_stats.groovy @@ -33,7 +33,8 @@ suite("alter_column_stats") { sql """INSERT INTO statistics_test VALUES(2, 'b', '2012-01-01')""" sql """INSERT INTO statistics_test VALUES(3, 'c', '2013-01-01')""" - sql """ANALYZE statistics_test""" + sql """ANALYZE TABLE statistics_test""" + sql """ANALYZE TABLE statistics_test UPDATE HISTOGRAM ON col1,col2""" sleep(9000) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org