This is an automated email from the ASF dual-hosted git repository.
morrySnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ff0fbf56dba [fix](statistics) full analyze not collect hot value by
default (#63625)
ff0fbf56dba is described below
commit ff0fbf56dba743ac6c97e51b4adbfd485f170526
Author: yujun <[email protected]>
AuthorDate: Wed May 27 17:13:18 2026 +0800
[fix](statistics) full analyze not collect hot value by default (#63625)
#62435 let full analyze always collect hot value, but excute may exceed
statistics sql memory limit (default 2GB) for big table.
Keep sample analyze hot value collection unchanged while making manual
full analyze require explicit WITH HOT VALUE. Auto full analyze
continues to skip hot values, and auto sample still collects them, no
change behaviour.
usage:
```sql
analyze table t with sync with hot value
```
Tests:
- FE UT: AnalyzeTableCommandTest, OlapAnalysisTaskTest,
AnalysisManagerTest
- Regression: test_hot_value, test_full_analyze_hot_value
Docs PR: apache/doris-website#3769
---
.../antlr4/org/apache/doris/nereids/DorisLexer.g4 | 1 +
.../antlr4/org/apache/doris/nereids/DorisParser.g4 | 2 +
.../apache/doris/analysis/AnalyzeProperties.java | 20 ++++
.../doris/nereids/parser/LogicalPlanBuilder.java | 2 +
.../trees/plans/commands/AnalyzeCommand.java | 6 ++
.../org/apache/doris/statistics/AnalysisInfo.java | 10 +-
.../doris/statistics/AnalysisInfoBuilder.java | 11 +-
.../apache/doris/statistics/AnalysisManager.java | 3 +
.../apache/doris/statistics/BaseAnalysisTask.java | 23 +++++
.../doris/statistics/ExternalAnalysisTask.java | 16 +--
.../apache/doris/statistics/OlapAnalysisTask.java | 12 ++-
.../doris/statistics/StatisticsAutoCollector.java | 1 +
.../plans/commands/AnalyzeTableCommandTest.java | 70 ++++++++++++-
.../doris/statistics/AnalysisManagerTest.java | 102 +++++++++++++++++++
.../doris/statistics/HMSAnalysisTaskTest.java | 46 +++++++++
.../doris/statistics/OlapAnalysisTaskTest.java | 113 +++++++++++++++++++++
.../doris/statistics/util/StatisticsUtilTest.java | 4 +
.../distinct_split/disitinct_split.out | 57 ++++-------
.../statistics/test_full_analyze_hot_value.groovy | 59 +++++++----
.../suites/statistics/test_hot_value.groovy | 2 +-
20 files changed, 486 insertions(+), 74 deletions(-)
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
index 9915618253f..efd539d5e7e 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
@@ -284,6 +284,7 @@ HISTOGRAM: 'HISTOGRAM';
HLL: 'HLL';
HLL_UNION: 'HLL_UNION';
HOSTNAME: 'HOSTNAME';
+HOT: 'HOT';
HOTSPOT: 'HOTSPOT';
HOUR: 'HOUR';
HOURS: 'HOURS';
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
index feb1f607477..1bf497f1cf9 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
@@ -908,6 +908,7 @@ analyzeProperties
| FULL
| SQL
| HISTOGRAM
+ | (HOT VALUE)
| (SAMPLE ((ROWS rows=INTEGER_VALUE) | (PERCENT percent=INTEGER_VALUE)) )
| (BUCKETS bucket=INTEGER_VALUE)
| (PERIOD periodInSecond=INTEGER_VALUE)
@@ -2136,6 +2137,7 @@ nonReserved
| HISTOGRAM
| HLL_UNION
| HOSTNAME
+ | HOT
| HOTSPOT
| HOUR
| HOURS
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java
index faa45082043..858b77ca015 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java
@@ -45,6 +45,7 @@ public class AnalyzeProperties {
public static final String PROPERTY_FORCE_FULL = "force.full";
public static final String PROPERTY_EXTERNAL_TABLE_USE_SQL =
"external.table.use.sql";
public static final String PROPERTY_USE_AUTO_ANALYZER =
"use.auto.analyzer";
+ public static final String PROPERTY_COLLECT_HOT_VALUE =
"collect.hot.value";
public static final AnalyzeProperties DEFAULT_PROP = new
AnalyzeProperties(new HashMap<String, String>() {
{
@@ -74,6 +75,7 @@ public class AnalyzeProperties {
.add(PROPERTY_FORCE_FULL)
.add(PROPERTY_EXTERNAL_TABLE_USE_SQL)
.add(PROPERTY_USE_AUTO_ANALYZER)
+ .add(PROPERTY_COLLECT_HOT_VALUE)
.build();
public AnalyzeProperties(Map<String, String> properties) {
@@ -96,6 +98,7 @@ public class AnalyzeProperties {
checkAnalysisMode(msgTemplate);
checkAnalysisType(msgTemplate);
checkScheduleType(msgTemplate);
+ checkCollectHotValue(msgTemplate);
checkPeriod();
}
@@ -240,6 +243,15 @@ public class AnalyzeProperties {
}
}
+ private void checkCollectHotValue(String msgTemplate) throws
AnalysisException {
+ if (properties.containsKey(PROPERTY_COLLECT_HOT_VALUE)) {
+ String value = properties.get(PROPERTY_COLLECT_HOT_VALUE);
+ if (!"true".equalsIgnoreCase(value) &&
!"false".equalsIgnoreCase(value)) {
+ throw new AnalysisException(String.format(msgTemplate,
PROPERTY_COLLECT_HOT_VALUE, value));
+ }
+ }
+ }
+
private void checkPeriod() throws AnalysisException {
if (properties.containsKey(PROPERTY_PERIOD_SECONDS)
&& properties.containsKey(PROPERTY_PERIOD_CRON)) {
@@ -283,6 +295,14 @@ public class AnalyzeProperties {
return properties.containsKey(PROPERTY_EXTERNAL_TABLE_USE_SQL);
}
+ public boolean hasCollectHotValue() {
+ return properties.containsKey(PROPERTY_COLLECT_HOT_VALUE);
+ }
+
+ public boolean collectHotValue() {
+ return
Boolean.parseBoolean(properties.get(PROPERTY_COLLECT_HOT_VALUE));
+ }
+
public String toSQL() {
StringBuilder sb = new StringBuilder();
sb.append("PROPERTIES(");
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index 53c3f09c46f..fbeb376e4da 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -8501,6 +8501,8 @@ public class LogicalPlanBuilder extends
DorisParserBaseVisitor<Object> {
properties.put(AnalyzeProperties.PROPERTY_EXTERNAL_TABLE_USE_SQL,
"true");
} else if (ctx.HISTOGRAM() != null) {
properties.put(AnalyzeProperties.PROPERTY_ANALYSIS_TYPE,
AnalysisInfo.AnalysisType.HISTOGRAM.toString());
+ } else if (ctx.HOT() != null) {
+ properties.put(AnalyzeProperties.PROPERTY_COLLECT_HOT_VALUE,
"true");
} else if (ctx.SAMPLE() != null) {
if (ctx.ROWS() != null) {
properties.put(AnalyzeProperties.PROPERTY_SAMPLE_ROWS,
ctx.INTEGER_VALUE().getText());
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AnalyzeCommand.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AnalyzeCommand.java
index 7c3d8ebe2c1..34323496fb4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AnalyzeCommand.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AnalyzeCommand.java
@@ -138,10 +138,16 @@ public abstract class AnalyzeCommand extends Command
implements ForwardWithSync
return analyzeProperties.usingSqlForExternalTable();
}
+ /**
+ * Validate analyze command properties.
+ */
public void validate(ConnectContext ctx) throws UserException {
if (analyzeProperties != null) {
analyzeProperties.check();
}
+ if (analyzeProperties.hasCollectHotValue() && getAnalysisMethod() ==
AnalysisInfo.AnalysisMethod.SAMPLE) {
+ throw new AnalysisException("Sample analyze always collects hot
value");
+ }
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
index 58b2c3e3d1f..d2c293b33b1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
@@ -168,6 +168,9 @@ public class AnalysisInfo implements Writable {
@SerializedName("usingSqlForExternalTable")
public final boolean usingSqlForExternalTable;
+ @SerializedName("chv")
+ public final Boolean collectHotValue;
+
@SerializedName("createTime")
public final long createTime = System.currentTimeMillis();
@@ -209,8 +212,9 @@ public class AnalysisInfo implements Writable {
long lastExecTimeInMs, long timeCostInMs, AnalysisState state,
ScheduleType scheduleType,
boolean partitionOnly, boolean samplingPartition,
boolean isAllPartition, long partitionCount, CronExpression
cronExpression, boolean forceFull,
- boolean usingSqlForExternalTable, long tblUpdateTime, long
rowCount, boolean userInject, long updateRows,
- long tableVersion, JobPriority priority, Map<Long, Long>
partitionUpdateRows, boolean enablePartition) {
+ boolean usingSqlForExternalTable, Boolean collectHotValue, long
tblUpdateTime, long rowCount,
+ boolean userInject, long updateRows, long tableVersion,
JobPriority priority,
+ Map<Long, Long> partitionUpdateRows, boolean enablePartition) {
this.jobId = jobId;
this.taskId = taskId;
this.taskIds = taskIds;
@@ -243,6 +247,7 @@ public class AnalysisInfo implements Writable {
}
this.forceFull = forceFull;
this.usingSqlForExternalTable = usingSqlForExternalTable;
+ this.collectHotValue = collectHotValue;
this.tblUpdateTime = tblUpdateTime;
this.rowCount = rowCount;
this.userInject = userInject;
@@ -293,6 +298,7 @@ public class AnalysisInfo implements Writable {
}
sj.add("forceFull: " + forceFull);
sj.add("usingSqlForExternalTable: " + usingSqlForExternalTable);
+ sj.add("collectHotValue: " + collectHotValue);
sj.add("rowCount: " + rowCount);
sj.add("userInject: " + userInject);
sj.add("updateRows: " + updateRows);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
index bbd0d616495..d9fe7c21220 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
@@ -59,6 +59,7 @@ public class AnalysisInfoBuilder {
private CronExpression cronExpression;
private boolean forceFull;
private boolean usingSqlForExternalTable;
+ private Boolean collectHotValue;
private long tblUpdateTime;
private long rowCount;
private boolean userInject = false;
@@ -101,6 +102,7 @@ public class AnalysisInfoBuilder {
cronExpression = info.cronExpression;
forceFull = info.forceFull;
usingSqlForExternalTable = info.usingSqlForExternalTable;
+ collectHotValue = info.collectHotValue;
tblUpdateTime = info.tblUpdateTime;
rowCount = info.rowCount;
userInject = info.userInject;
@@ -256,6 +258,11 @@ public class AnalysisInfoBuilder {
return this;
}
+ public AnalysisInfoBuilder setCollectHotValue(Boolean collectHotValue) {
+ this.collectHotValue = collectHotValue;
+ return this;
+ }
+
public AnalysisInfoBuilder setTblUpdateTime(long tblUpdateTime) {
this.tblUpdateTime = tblUpdateTime;
return this;
@@ -301,8 +308,8 @@ public class AnalysisInfoBuilder {
colName, indexId, jobType, analysisMethod, analysisType,
samplePercent,
sampleRows, maxBucketNum, periodTimeInMs, message,
lastExecTimeInMs, timeCostInMs, state, scheduleType,
partitionOnly, samplingPartition, isAllPartition,
partitionCount,
- cronExpression, forceFull, usingSqlForExternalTable,
tblUpdateTime, rowCount, userInject, updateRows,
- tableVersion, priority, partitionUpdateRows, enablePartition);
+ cronExpression, forceFull, usingSqlForExternalTable,
collectHotValue, tblUpdateTime, rowCount,
+ userInject, updateRows, tableVersion, priority,
partitionUpdateRows, enablePartition);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 67f3306298e..b2b4c0d57f6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -381,6 +381,9 @@ public class AnalysisManager implements Writable {
infoBuilder.setCronExpression(cronExpression);
infoBuilder.setForceFull(command.forceFull());
infoBuilder.setUsingSqlForExternalTable(command.usingSqlForExternalTable());
+ AnalyzeProperties analyzeProperties = command.getAnalyzeProperties();
+ infoBuilder.setCollectHotValue((analyzeProperties.hasCollectHotValue()
+ && analyzeProperties.collectHotValue()) || analysisMethod ==
AnalysisMethod.SAMPLE);
if (analysisMethod == AnalysisMethod.SAMPLE) {
infoBuilder.setSamplePercent(samplePercent);
infoBuilder.setSampleRows(sampleRows);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index 852ec0e6fde..1e68e01210b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -105,6 +105,25 @@ public abstract class BaseAnalysisTask {
+ "GROUP BY `hash_value` ORDER BY `count` DESC LIMIT
${hotValueCollectCount}) t) "
+ "SELECT * FROM cte2 CROSS JOIN cte3";
+ protected static final String FULL_ANALYZE_WITHOUT_HOT_VALUE_TEMPLATE =
+ "SELECT CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS `id`, "
+ + "${catalogId} AS `catalog_id`, "
+ + "${dbId} AS `db_id`, "
+ + "${tblId} AS `tbl_id`, "
+ + "${idxId} AS `idx_id`, "
+ + "'${colId}' AS `col_id`, "
+ + "NULL AS `part_id`, "
+ + "COUNT(1) AS `row_count`, "
+ + "NDV(`${colName}`) AS `ndv`, "
+ + "COUNT(1) - COUNT(`${colName}`) AS `null_count`, "
+ + "SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) AS
`min`, "
+ + "SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) AS
`max`, "
+ + "${dataSizeFunction} AS `data_size`, "
+ + "NOW() AS `update_time`, "
+ + "null as `hot_value` "
+ + "FROM (SELECT `${colName}`${lengthAssert} "
+ + "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index})
__lc_t";
+
protected static final String LINEAR_ANALYZE_TEMPLATE = "WITH cte1 AS ("
+ "SELECT `${colName}`${lengthAssert} "
+ "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index}
${sampleHints} ${limit} ${preAggHint}), "
@@ -576,6 +595,10 @@ public abstract class BaseAnalysisTask {
protected abstract void deleteNotExistPartitionStats(AnalysisInfo jobInfo)
throws DdlException;
+ protected boolean shouldCollectHotValue() {
+ return Boolean.TRUE.equals(info.collectHotValue);
+ }
+
protected String getPartitionInfo(String partitionName) {
return "";
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java
index 8c4eccb1165..d37b3740f9d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java
@@ -58,18 +58,22 @@ public class ExternalAnalysisTask extends BaseAnalysisTask {
}
protected void doFull() throws Exception {
- StringBuilder sb = new StringBuilder();
Map<String, String> params = buildSqlParams();
params.put("dataSizeFunction", getDataSizeFunction(col, false));
- params.put("hotValueCollectCount",
String.valueOf(SessionVariable.getHotValueCollectCount()));
- params.put("subStringColName", getStringTypeColName(col));
- params.put("rowCount2", "(SELECT COUNT(1) FROM cte1 WHERE `${colName}`
IS NOT NULL)");
if (LOG.isDebugEnabled()) {
LOG.debug("Will do full collection for column {}", col.getName());
}
- sb.append(FULL_ANALYZE_TEMPLATE);
+ String template;
+ if (shouldCollectHotValue()) {
+ params.put("hotValueCollectCount",
String.valueOf(SessionVariable.getHotValueCollectCount()));
+ params.put("subStringColName", getStringTypeColName(col));
+ params.put("rowCount2", "(SELECT COUNT(1) FROM cte1 WHERE
`${colName}` IS NOT NULL)");
+ template = FULL_ANALYZE_TEMPLATE;
+ } else {
+ template = FULL_ANALYZE_WITHOUT_HOT_VALUE_TEMPLATE;
+ }
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
- String sql = stringSubstitutor.replace(sb.toString());
+ String sql = stringSubstitutor.replace(template);
runQuery(sql);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 4312b31208f..45e84da9177 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -330,11 +330,15 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
doPartitionTable();
} else {
Map<String, String> params = buildSqlParams();
- params.put("hotValueCollectCount",
String.valueOf(SessionVariable.getHotValueCollectCount()));
- params.put("subStringColName", getStringTypeColName(col));
- params.put("rowCount2", "(SELECT COUNT(1) FROM cte1 WHERE
`${colName}` IS NOT NULL)");
StringSubstitutor stringSubstitutor = new
StringSubstitutor(params);
- runQuery(stringSubstitutor.replace(FULL_ANALYZE_TEMPLATE));
+ if (shouldCollectHotValue()) {
+ params.put("hotValueCollectCount",
String.valueOf(SessionVariable.getHotValueCollectCount()));
+ params.put("subStringColName", getStringTypeColName(col));
+ params.put("rowCount2", "(SELECT COUNT(1) FROM cte1 WHERE
`${colName}` IS NOT NULL)");
+ runQuery(stringSubstitutor.replace(FULL_ANALYZE_TEMPLATE));
+ } else {
+
runQuery(stringSubstitutor.replace(FULL_ANALYZE_WITHOUT_HOT_VALUE_TEMPLATE));
+ }
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
index da86284967d..4f142cc0587 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
@@ -258,6 +258,7 @@ public class StatisticsAutoCollector extends MasterDaemon {
.setPartitionNames(Collections.emptySet())
.setSampleRows(analysisMethod.equals(AnalysisMethod.SAMPLE)
? StatisticsUtil.getHugeTableSampleRows() : -1)
+
.setCollectHotValue(analysisMethod.equals(AnalysisMethod.SAMPLE))
.setScheduleType(ScheduleType.AUTOMATIC)
.setState(AnalysisState.PENDING)
.setTaskIds(new ArrayList<>())
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/AnalyzeTableCommandTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/AnalyzeTableCommandTest.java
index 4d73e001824..568fb613e7b 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/AnalyzeTableCommandTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/AnalyzeTableCommandTest.java
@@ -20,14 +20,19 @@ package org.apache.doris.nereids.trees.plans.commands;
import org.apache.doris.analysis.AnalyzeProperties;
import org.apache.doris.backup.CatalogMocker;
import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.NameSpaceContext;
import org.apache.doris.catalog.info.PartitionNamesInfo;
import org.apache.doris.catalog.info.TableNameInfo;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.mysql.privilege.AccessControllerManager;
import org.apache.doris.mysql.privilege.PrivPredicate;
+import org.apache.doris.nereids.parser.NereidsParser;
+import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.QueryState;
+import org.apache.doris.qe.SessionVariable;
+import org.apache.doris.statistics.AnalysisInfo;
import com.google.common.collect.ImmutableList;
import org.junit.jupiter.api.AfterEach;
@@ -37,6 +42,9 @@ import org.junit.jupiter.api.Test;
import org.mockito.MockedStatic;
import org.mockito.Mockito;
+import java.util.HashMap;
+import java.util.Map;
+
public class AnalyzeTableCommandTest {
private static final String internalCtl =
InternalCatalog.INTERNAL_CATALOG_NAME;
@@ -59,6 +67,8 @@ public class AnalyzeTableCommandTest {
Mockito.when(env.getAccessManager()).thenReturn(accessManager);
Mockito.when(ctx.getState()).thenReturn(new QueryState());
+ Mockito.when(ctx.getSessionVariable()).thenReturn(new
SessionVariable());
+ Mockito.when(ctx.getNameSpaceContext()).thenReturn(new
NameSpaceContext(internalCtl, null, -1));
}
@AfterEach
@@ -97,5 +107,63 @@ public class AnalyzeTableCommandTest {
() ->
analyzeTableCommand.checkAnalyzePrivilege(tableNameInfo2),
"ANALYZE command denied to user 'null'@'null' for table
'test_db: test_tbl2'");
}
-}
+ @Test
+ void testCollectHotValueDefaultAndOverride() {
+ TableNameInfo tableNameInfo = new TableNameInfo(internalCtl,
+ CatalogMocker.TEST_DB_NAME, CatalogMocker.TEST_TBL_NAME);
+
+ AnalyzeTableCommand fullAnalyzeCommand = new
AnalyzeTableCommand(tableNameInfo,
+ null, null, new AnalyzeProperties(defaultAnalyzeProperties()));
+
Assertions.assertFalse(fullAnalyzeCommand.getAnalyzeProperties().hasCollectHotValue());
+ Assertions.assertEquals(AnalysisInfo.AnalysisMethod.FULL,
fullAnalyzeCommand.getAnalysisMethod());
+
+ Map<String, String> sampleProperties = defaultAnalyzeProperties();
+ sampleProperties.put(AnalyzeProperties.PROPERTY_SAMPLE_ROWS, "100");
+ AnalyzeTableCommand sampleAnalyzeCommand = new
AnalyzeTableCommand(tableNameInfo,
+ null, null, new AnalyzeProperties(sampleProperties));
+
Assertions.assertFalse(sampleAnalyzeCommand.getAnalyzeProperties().hasCollectHotValue());
+ Assertions.assertEquals(AnalysisInfo.AnalysisMethod.SAMPLE,
sampleAnalyzeCommand.getAnalysisMethod());
+
+ Map<String, String> forcedFullHotValueProperties =
defaultAnalyzeProperties();
+
forcedFullHotValueProperties.put(AnalyzeProperties.PROPERTY_COLLECT_HOT_VALUE,
"true");
+ AnalyzeTableCommand forcedFullHotValueCommand = new
AnalyzeTableCommand(tableNameInfo,
+ null, null, new
AnalyzeProperties(forcedFullHotValueProperties));
+
Assertions.assertTrue(forcedFullHotValueCommand.getAnalyzeProperties().collectHotValue());
+ }
+
+ @Test
+ void testCollectHotValuePropertyValidation() {
+ Map<String, String> properties = defaultAnalyzeProperties();
+ properties.put(AnalyzeProperties.PROPERTY_COLLECT_HOT_VALUE,
"invalid");
+ AnalyzeProperties analyzeProperties = new
AnalyzeProperties(properties);
+ Assertions.assertThrows(AnalysisException.class,
analyzeProperties::check);
+ }
+
+ @Test
+ void testParseWithHotValue() {
+ NereidsParser parser = new NereidsParser();
+ LogicalPlan plan = parser.parseSingle("ANALYZE TABLE test_db.test_tbl
WITH HOT VALUE");
+ Assertions.assertTrue(plan instanceof AnalyzeTableCommand);
+ AnalyzeTableCommand command = (AnalyzeTableCommand) plan;
+
Assertions.assertTrue(command.getAnalyzeProperties().hasCollectHotValue());
+
Assertions.assertTrue(command.getAnalyzeProperties().collectHotValue());
+ }
+
+ @Test
+ void testSampleAnalyzeWithHotValueRejected() {
+ NereidsParser parser = new NereidsParser();
+ AnalyzeTableCommand command = (AnalyzeTableCommand) parser.parseSingle(
+ "ANALYZE TABLE test_db.test_tbl WITH SAMPLE ROWS 100 WITH HOT
VALUE");
+ AnalysisException exception =
Assertions.assertThrows(AnalysisException.class,
+ () -> command.validate(ctx));
+ Assertions.assertEquals("Sample analyze always collects hot value",
exception.getDetailMessage());
+ }
+
+ private Map<String, String> defaultAnalyzeProperties() {
+ Map<String, String> properties = new HashMap<>();
+ properties.put(AnalyzeProperties.PROPERTY_SYNC, "false");
+ properties.put(AnalyzeProperties.PROPERTY_ANALYSIS_TYPE,
"FUNDAMENTALS");
+ return properties;
+ }
+}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
index 577086b6c92..30b7bd55ed5 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
@@ -17,27 +17,37 @@
package org.apache.doris.statistics;
+import org.apache.doris.analysis.AnalyzeProperties;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.catalog.TableIf;
+import org.apache.doris.catalog.info.TableNameInfo;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
+import org.apache.doris.common.Pair;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.nereids.trees.expressions.ExprId;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.plans.commands.AnalyzeTableCommand;
import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
import org.apache.doris.statistics.AnalysisInfo.AnalysisType;
import org.apache.doris.statistics.AnalysisInfo.JobType;
+import org.apache.doris.statistics.AnalysisInfo.ScheduleType;
import org.apache.doris.thrift.TQueryColumn;
import com.google.common.collect.ImmutableList;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+import org.mockito.MockedStatic;
import org.mockito.Mockito;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -47,6 +57,67 @@ import java.util.concurrent.atomic.AtomicInteger;
// CHECKSTYLE OFF
public class AnalysisManagerTest {
+ @Test
+ public void testBuildAnalysisJobInfoCollectHotValueDefault() {
+ AnalysisManager manager = new AnalysisManager();
+ Env env = Mockito.mock(Env.class);
+ try (MockedStatic<Env> envMockedStatic =
Mockito.mockStatic(Env.class)) {
+ envMockedStatic.when(Env::getCurrentEnv).thenReturn(env);
+ Mockito.when(env.getNextId()).thenReturn(1L, 2L, 3L, 4L);
+
+ AnalysisInfo fullOnce = manager.buildAnalysisJobInfo(
+ mockAnalyzeCommand(AnalysisMethod.FULL, ScheduleType.ONCE,
false, false));
+ Assertions.assertFalse(fullOnce.collectHotValue);
+
+ AnalysisInfo samplePeriod = manager.buildAnalysisJobInfo(
+ mockAnalyzeCommand(AnalysisMethod.SAMPLE,
ScheduleType.PERIOD, false, true));
+ Assertions.assertTrue(samplePeriod.collectHotValue);
+
+ AnalysisInfo automatic = manager.buildAnalysisJobInfo(
+ mockAnalyzeCommand(AnalysisMethod.FULL,
ScheduleType.AUTOMATIC, false, true));
+ Assertions.assertFalse(automatic.collectHotValue);
+
+ AnalysisInfo automaticSample = manager.buildAnalysisJobInfo(
+ mockAnalyzeCommand(AnalysisMethod.SAMPLE,
ScheduleType.AUTOMATIC, false, true));
+ Assertions.assertTrue(automaticSample.collectHotValue);
+
+ AnalysisInfo explicitAutomatic = manager.buildAnalysisJobInfo(
+ mockAnalyzeCommand(AnalysisMethod.FULL,
ScheduleType.AUTOMATIC, true, false));
+ Assertions.assertFalse(explicitAutomatic.collectHotValue);
+ }
+ }
+
+ @Test
+ public void testBuildAnalysisJobInfoAutoSampleCommandCollectsHotValue() {
+ AnalysisManager manager = new AnalysisManager();
+ Env env = Mockito.mock(Env.class);
+ try (MockedStatic<Env> envMockedStatic =
Mockito.mockStatic(Env.class)) {
+ envMockedStatic.when(Env::getCurrentEnv).thenReturn(env);
+ Mockito.when(env.getNextId()).thenReturn(1L);
+
+ Map<String, String> properties = new HashMap<>();
+ properties.put(AnalyzeProperties.PROPERTY_SYNC, "false");
+ properties.put(AnalyzeProperties.PROPERTY_ANALYSIS_TYPE,
AnalysisType.FUNDAMENTALS.toString());
+ properties.put(AnalyzeProperties.PROPERTY_AUTOMATIC, "true");
+ properties.put(AnalyzeProperties.PROPERTY_SAMPLE_ROWS, "100");
+ AnalyzeTableCommand command = Mockito.spy(new AnalyzeTableCommand(
+ new TableNameInfo(InternalCatalog.INTERNAL_CATALOG_NAME,
"testDb", "testTbl"),
+ null, ImmutableList.of("testCol"), new
AnalyzeProperties(properties)));
+ TableIf table = Mockito.mock(TableIf.class);
+ Mockito.when(table.getId()).thenReturn(30001L);
+ Mockito.when(table.getColumnIndexPairs(Mockito.any()))
+ .thenReturn(Collections.singleton(Pair.of("testTbl",
"testCol")));
+ Mockito.doReturn(table).when(command).getTable();
+ Mockito.doReturn(10001L).when(command).getCatalogId();
+ Mockito.doReturn(20001L).when(command).getDbId();
+
+ AnalysisInfo analysisInfo = manager.buildAnalysisJobInfo(command);
+ Assertions.assertEquals(ScheduleType.AUTOMATIC,
analysisInfo.scheduleType);
+ Assertions.assertEquals(AnalysisMethod.SAMPLE,
analysisInfo.analysisMethod);
+ Assertions.assertTrue(analysisInfo.collectHotValue);
+ }
+ }
+
@Test
public void testUpdateTaskStatus() {
BaseAnalysisTask task1 = Mockito.mock(BaseAnalysisTask.class);
@@ -371,4 +442,35 @@ public class AnalysisManagerTest {
Assertions.assertTrue(count.get() > 0);
Assertions.assertTrue(count.get() <= 20);
}
+
+ private AnalyzeTableCommand mockAnalyzeCommand(AnalysisMethod
analysisMethod, ScheduleType scheduleType,
+ boolean hasCollectHotValue, boolean collectHotValue) {
+ AnalyzeTableCommand command = Mockito.mock(AnalyzeTableCommand.class);
+ TableIf table = Mockito.mock(TableIf.class);
+ Map<String, String> properties = new HashMap<>();
+ if (hasCollectHotValue) {
+ properties.put(AnalyzeProperties.PROPERTY_COLLECT_HOT_VALUE,
String.valueOf(collectHotValue));
+ }
+ AnalyzeProperties analyzeProperties = new
AnalyzeProperties(properties);
+ Mockito.when(table.getId()).thenReturn(30001L);
+ Mockito.when(command.getTable()).thenReturn(table);
+
Mockito.when(command.getColumnNames()).thenReturn(Collections.emptySet());
+ Mockito.when(command.isPartitionOnly()).thenReturn(false);
+ Mockito.when(command.isSamplingPartition()).thenReturn(false);
+ Mockito.when(command.isStarPartition()).thenReturn(false);
+ Mockito.when(command.getPartitionCount()).thenReturn(0L);
+ Mockito.when(command.getSamplePercent()).thenReturn(0);
+ Mockito.when(command.getSampleRows()).thenReturn(100);
+
Mockito.when(command.getAnalysisType()).thenReturn(AnalysisType.FUNDAMENTALS);
+ Mockito.when(command.getAnalysisMethod()).thenReturn(analysisMethod);
+ Mockito.when(command.getScheduleType()).thenReturn(scheduleType);
+ Mockito.when(command.getCron()).thenReturn(null);
+ Mockito.when(command.getCatalogId()).thenReturn(10001L);
+ Mockito.when(command.getDbId()).thenReturn(20001L);
+
Mockito.when(command.getPartitionNames()).thenReturn(Collections.emptySet());
+ Mockito.when(command.forceFull()).thenReturn(false);
+ Mockito.when(command.usingSqlForExternalTable()).thenReturn(false);
+
Mockito.when(command.getAnalyzeProperties()).thenReturn(analyzeProperties);
+ return command;
+ }
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
index 0df615319f1..3011a83101d 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
@@ -158,6 +158,51 @@ public class HMSAnalysisTaskTest {
Mockito.when(databaseIf.getFullName()).thenReturn("default");
Mockito.when(tableIf.getPartitionNames()).thenReturn(ImmutableSet.of("date=20230101/hour=12"));
+ HMSAnalysisTask task = Mockito.spy(new HMSAnalysisTask());
+ Mockito.doAnswer(invocation -> {
+ String sql = invocation.getArgument(0);
+ Assertions.assertEquals("SELECT CONCAT(30001, '-', -1, '-',
'hour') AS `id`, "
+ + "10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS
`tbl_id`, "
+ + "-1 AS `idx_id`, 'hour' AS `col_id`, NULL AS `part_id`, "
+ + "COUNT(1) AS `row_count`, NDV(`hour`) AS `ndv`, "
+ + "COUNT(1) - COUNT(`hour`) AS `null_count`, "
+ + "SUBSTRING(CAST(MIN(`hour`) AS STRING), 1, 1024) AS
`min`, "
+ + "SUBSTRING(CAST(MAX(`hour`) AS STRING), 1, 1024) AS
`max`, "
+ + "COUNT(1) * 4 AS `data_size`, NOW() AS `update_time`,
null as `hot_value` "
+ + "FROM (SELECT `hour` FROM `hms`.`default`.`test` )
__lc_t", sql);
+ return null;
+ }).when(task).runQuery(Mockito.anyString());
+
+ task.col = new Column("hour", PrimitiveType.INT);
+ task.tbl = tableIf;
+ task.catalog = catalogIf;
+ task.db = databaseIf;
+ task.setTable(tableIf);
+
+ AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder();
+ analysisInfoBuilder.setColName("hour");
+ analysisInfoBuilder.setJobType(AnalysisInfo.JobType.MANUAL);
+ analysisInfoBuilder.setUsingSqlForExternalTable(true);
+ task.info = analysisInfoBuilder.build();
+
+ task.doExecute();
+ }
+
+ @SuppressWarnings("unchecked")
+ @Test
+ public void testOrdinaryStatsWithHotValue() throws Exception {
+ CatalogIf catalogIf = Mockito.mock(CatalogIf.class);
+ DatabaseIf databaseIf = Mockito.mock(DatabaseIf.class);
+ HMSExternalTable tableIf = Mockito.mock(HMSExternalTable.class);
+
+ Mockito.when(tableIf.getId()).thenReturn(30001L);
+ Mockito.when(tableIf.getName()).thenReturn("test");
+ Mockito.when(catalogIf.getId()).thenReturn(10001L);
+ Mockito.when(catalogIf.getName()).thenReturn("hms");
+ Mockito.when(databaseIf.getId()).thenReturn(20001L);
+ Mockito.when(databaseIf.getFullName()).thenReturn("default");
+
Mockito.when(tableIf.getPartitionNames()).thenReturn(ImmutableSet.of("date=20230101/hour=12"));
+
try (MockedStatic<SessionVariable> mockedSessionVariable =
Mockito.mockStatic(SessionVariable.class)) {
mockedSessionVariable.when(SessionVariable::getHotValueCollectCount).thenReturn(10);
@@ -196,6 +241,7 @@ public class HMSAnalysisTaskTest {
analysisInfoBuilder.setColName("hour");
analysisInfoBuilder.setJobType(AnalysisInfo.JobType.MANUAL);
analysisInfoBuilder.setUsingSqlForExternalTable(true);
+ analysisInfoBuilder.setCollectHotValue(true);
task.info = analysisInfoBuilder.build();
task.doExecute();
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
index 2c93ad7dd9a..ae29fc4c9ce 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
@@ -39,6 +39,7 @@ import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
import org.apache.doris.datasource.CatalogIf;
+import org.apache.doris.persist.gson.GsonUtils;
import org.apache.doris.qe.SessionVariable;
import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
import org.apache.doris.statistics.AnalysisInfo.JobType;
@@ -86,6 +87,30 @@ public class OlapAnalysisTaskTest {
Assertions.assertFalse(tableSample.isPercent());
}
+ @Test
+ public void testShouldCollectHotValue() {
+ OlapAnalysisTask olapAnalysisTask = new OlapAnalysisTask();
+ olapAnalysisTask.info = new AnalysisInfoBuilder().build();
+ Assertions.assertFalse(olapAnalysisTask.shouldCollectHotValue());
+
+ olapAnalysisTask.info = new
AnalysisInfoBuilder().setCollectHotValue(true).build();
+ Assertions.assertTrue(olapAnalysisTask.shouldCollectHotValue());
+
+ olapAnalysisTask.info = new
AnalysisInfoBuilder().setCollectHotValue(false).build();
+ Assertions.assertFalse(olapAnalysisTask.shouldCollectHotValue());
+ }
+
+ @Test
+ public void testCollectHotValueUseShortSerializedName() {
+ AnalysisInfo info = new
AnalysisInfoBuilder().setCollectHotValue(false).build();
+ String json = GsonUtils.GSON.toJson(info);
+ Assertions.assertTrue(json.contains("\"chv\":false"));
+ Assertions.assertFalse(json.contains("collectHotValue"));
+
+ AnalysisInfo deserialized = GsonUtils.GSON.fromJson(json,
AnalysisInfo.class);
+ Assertions.assertFalse(deserialized.collectHotValue);
+ }
+
// test auto small table
@Test
public void testSample3() {
@@ -795,6 +820,7 @@ public class OlapAnalysisTaskTest {
AnalysisInfoBuilder builder = new AnalysisInfoBuilder();
builder.setJobType(AnalysisInfo.JobType.MANUAL);
builder.setColName("testCol");
+ builder.setCollectHotValue(true);
task.info = builder.build();
task.catalog = catalogIf;
task.db = databaseIf;
@@ -802,6 +828,92 @@ public class OlapAnalysisTaskTest {
}
}
+ @Test
+ public void testDoFullWithoutHotValue() throws Exception {
+ CatalogIf catalogIf = Mockito.mock(CatalogIf.class);
+ DatabaseIf databaseIf = Mockito.mock(DatabaseIf.class);
+ OlapTable tableIf = Mockito.mock(OlapTable.class);
+ Mockito.when(tableIf.getId()).thenReturn(30001L);
+ Mockito.when(tableIf.getName()).thenReturn("testTbl");
+ Mockito.when(catalogIf.getId()).thenReturn(10001L);
+ Mockito.when(catalogIf.getName()).thenReturn("catalogName");
+ Mockito.when(databaseIf.getId()).thenReturn(20001L);
+ Mockito.when(databaseIf.getFullName()).thenReturn("testDb");
+
+ try (MockedStatic<StatisticsUtil> mockedStatisticsUtil =
Mockito.mockStatic(
+ StatisticsUtil.class, Mockito.CALLS_REAL_METHODS)) {
+
mockedStatisticsUtil.when(StatisticsUtil::enablePartitionAnalyze).thenReturn(false);
+
+ OlapAnalysisTask task = Mockito.spy(new OlapAnalysisTask());
+ Mockito.doAnswer(invocation -> {
+ String sql = invocation.getArgument(0);
+ Assertions.assertTrue(sql.startsWith("SELECT CONCAT(30001,
'-', -1, '-', 'testCol') AS `id`"), sql);
+ Assertions.assertTrue(sql.contains("null as `hot_value`"),
sql);
+ Assertions.assertTrue(sql.contains(
+ "FROM (SELECT `testCol` FROM
`catalogName`.`testDb`.`testTbl` ) __lc_t"), sql);
+ Assertions.assertFalse(sql.contains("CROSS JOIN cte3"), sql);
+ Assertions.assertFalse(sql.contains("GROUP BY `hash_value`"),
sql);
+ return null;
+ }).when(task).runQuery(Mockito.anyString());
+
+ task.col = new Column("testCol",
Type.fromPrimitiveType(PrimitiveType.INT),
+ true, null, null, null);
+ task.tbl = tableIf;
+ AnalysisInfoBuilder builder = new AnalysisInfoBuilder();
+ builder.setJobType(AnalysisInfo.JobType.MANUAL);
+ builder.setColName("testCol");
+ builder.setCollectHotValue(false);
+ task.info = builder.build();
+ task.catalog = catalogIf;
+ task.db = databaseIf;
+ task.doFull();
+ }
+ }
+
+ @Test
+ public void testDoSampleAlwaysCollectsHotValue() throws Exception {
+ CatalogIf catalogIf = Mockito.mock(CatalogIf.class);
+ DatabaseIf databaseIf = Mockito.mock(DatabaseIf.class);
+ OlapTable tableIf = Mockito.mock(OlapTable.class);
+ Mockito.when(tableIf.getId()).thenReturn(30001L);
+ Mockito.when(tableIf.getName()).thenReturn("testTbl");
+ Mockito.when(tableIf.getRowCount()).thenReturn(1000L);
+ Mockito.when(catalogIf.getId()).thenReturn(10001L);
+ Mockito.when(catalogIf.getName()).thenReturn("catalogName");
+ Mockito.when(databaseIf.getId()).thenReturn(20001L);
+ Mockito.when(databaseIf.getFullName()).thenReturn("testDb");
+
+ OlapAnalysisTask task = Mockito.spy(new OlapAnalysisTask());
+ Mockito.doReturn(new ResultRow(Lists.newArrayList("1",
"2"))).when(task).collectMinMax();
+ Mockito.doNothing().when(task).getSampleParams(ArgumentMatchers.any(),
ArgumentMatchers.anyLong());
+ Mockito.doAnswer(invocation -> {
+ String sql = invocation.getArgument(0);
+ Assertions.assertTrue(sql.contains("as `hot_value`"), sql);
+ Assertions.assertTrue(sql.contains("cte3"), sql);
+ Assertions.assertTrue(sql.contains("CROSS JOIN cte3"), sql);
+ Assertions.assertTrue(sql.contains("LIMIT 10"), sql);
+ Assertions.assertFalse(sql.contains("null as `hot_value`"), sql);
+ return null;
+ }).when(task).runQuery(Mockito.anyString());
+
+ task.col = new Column("testCol",
Type.fromPrimitiveType(PrimitiveType.INT),
+ true, null, null, null);
+ task.tbl = tableIf;
+ AnalysisInfoBuilder builder = new AnalysisInfoBuilder();
+ builder.setJobType(AnalysisInfo.JobType.MANUAL);
+ builder.setColName("testCol");
+ builder.setCollectHotValue(false);
+ task.info = builder.build();
+ task.catalog = catalogIf;
+ task.db = databaseIf;
+ task.tableSample = new TableSample(false, 100L);
+
+ Mockito.doReturn(true).when(task).useLinearAnalyzeTemplate();
+ task.doSample();
+ Mockito.doReturn(false).when(task).useLinearAnalyzeTemplate();
+ task.doSample();
+ }
+
@Test
public void testDoFullHotValueStringColumn() throws Exception {
CatalogIf catalogIf = Mockito.mock(CatalogIf.class);
@@ -835,6 +947,7 @@ public class OlapAnalysisTaskTest {
AnalysisInfoBuilder builder = new AnalysisInfoBuilder();
builder.setJobType(AnalysisInfo.JobType.MANUAL);
builder.setColName("strCol");
+ builder.setCollectHotValue(true);
task.info = builder.build();
task.catalog = catalogIf;
task.db = databaseIf;
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
index b169a3ac73e..8bccbd2929f 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
@@ -409,6 +409,10 @@ class StatisticsUtilTest {
@Test
void testGetHotValues() {
+ Assertions.assertNull(StatisticsUtil.getHotValues(null, Type.INT));
+ Assertions.assertNull(StatisticsUtil.getHotValues("null", Type.INT));
+ Assertions.assertTrue(StatisticsUtil.getHotValues("",
Type.INT).isEmpty());
+
String value1 = "1234 :0.35 ;222 :0.34";
Map<Literal, Float> hotValues = StatisticsUtil.getHotValues(value1,
Type.INT);
Map<Literal, Float> hotValuesAfterFilter =
StatisticsUtil.getHotValuesWithOriginalThreshold(hotValues, 100);
diff --git
a/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
b/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
index 9a18cab3879..b239f4d54f8 100644
--- a/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
+++ b/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
@@ -369,13 +369,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------hashAgg[DISTINCT_GLOBAL]
--------hashAgg[DISTINCT_LOCAL]
----------hashAgg[GLOBAL]
-------------hashAgg[LOCAL]
---------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------PhysicalCteConsumer ( cteId=CTEId#0 )
------hashAgg[DISTINCT_GLOBAL]
--------hashAgg[DISTINCT_LOCAL]
----------hashAgg[GLOBAL]
-------------hashAgg[LOCAL]
---------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------PhysicalCteConsumer ( cteId=CTEId#0 )
-- !multi_count_mulitcols_with_gby --
PhysicalCteAnchor ( cteId=CTEId#0 )
@@ -386,13 +384,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------hashAgg[GLOBAL]
--------hashAgg[LOCAL]
----------hashAgg[GLOBAL]
-------------hashAgg[LOCAL]
---------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------PhysicalCteConsumer ( cteId=CTEId#0 )
------hashAgg[GLOBAL]
--------hashAgg[LOCAL]
----------hashAgg[GLOBAL]
-------------hashAgg[LOCAL]
---------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------PhysicalCteConsumer ( cteId=CTEId#0 )
-- !three_count_mulitcols_without_gby --
PhysicalCteAnchor ( cteId=CTEId#0 )
@@ -404,18 +400,15 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
--------hashAgg[DISTINCT_GLOBAL]
----------hashAgg[DISTINCT_LOCAL]
------------hashAgg[GLOBAL]
---------------hashAgg[LOCAL]
-----------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------hashAgg[DISTINCT_GLOBAL]
----------hashAgg[DISTINCT_LOCAL]
------------hashAgg[GLOBAL]
---------------hashAgg[LOCAL]
-----------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------PhysicalCteConsumer ( cteId=CTEId#0 )
------hashAgg[DISTINCT_GLOBAL]
--------hashAgg[DISTINCT_LOCAL]
----------hashAgg[GLOBAL]
-------------hashAgg[LOCAL]
---------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------PhysicalCteConsumer ( cteId=CTEId#0 )
-- !four_count_mulitcols_with_gby --
PhysicalCteAnchor ( cteId=CTEId#0 )
@@ -428,23 +421,19 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------hashAgg[GLOBAL]
------------hashAgg[LOCAL]
--------------hashAgg[GLOBAL]
-----------------hashAgg[LOCAL]
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------hashAgg[GLOBAL]
------------hashAgg[LOCAL]
--------------hashAgg[GLOBAL]
-----------------hashAgg[LOCAL]
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------hashAgg[GLOBAL]
----------hashAgg[LOCAL]
------------hashAgg[GLOBAL]
---------------hashAgg[LOCAL]
-----------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------PhysicalCteConsumer ( cteId=CTEId#0 )
------hashAgg[GLOBAL]
--------hashAgg[LOCAL]
----------hashAgg[GLOBAL]
-------------hashAgg[LOCAL]
---------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------PhysicalCteConsumer ( cteId=CTEId#0 )
-- !has_other_func --
PhysicalCteAnchor ( cteId=CTEId#0 )
@@ -478,13 +467,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------hashAgg[GLOBAL]
------------hashAgg[LOCAL]
--------------hashAgg[GLOBAL]
-----------------hashAgg[LOCAL]
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------hashAgg[GLOBAL]
------------hashAgg[LOCAL]
--------------hashAgg[GLOBAL]
-----------------hashAgg[LOCAL]
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
-- !multi_count_with_gby --
PhysicalCteAnchor ( cteId=CTEId#0 )
@@ -495,13 +482,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------hashAgg[GLOBAL]
--------hashAgg[LOCAL]
----------hashAgg[GLOBAL]
-------------hashAgg[LOCAL]
---------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------PhysicalCteConsumer ( cteId=CTEId#0 )
------hashAgg[GLOBAL]
--------hashAgg[LOCAL]
----------hashAgg[GLOBAL]
-------------hashAgg[LOCAL]
---------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------PhysicalCteConsumer ( cteId=CTEId#0 )
-- !multi_sum_with_gby --
PhysicalCteAnchor ( cteId=CTEId#0 )
@@ -512,13 +497,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------hashAgg[GLOBAL]
--------hashAgg[LOCAL]
----------hashAgg[GLOBAL]
-------------hashAgg[LOCAL]
---------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------PhysicalCteConsumer ( cteId=CTEId#0 )
------hashAgg[GLOBAL]
--------hashAgg[LOCAL]
----------hashAgg[GLOBAL]
-------------hashAgg[LOCAL]
---------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------PhysicalCteConsumer ( cteId=CTEId#0 )
-- !sum_count_with_gby --
PhysicalCteAnchor ( cteId=CTEId#0 )
@@ -529,12 +512,10 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------hashAgg[GLOBAL]
--------hashAgg[LOCAL]
----------hashAgg[GLOBAL]
-------------hashAgg[LOCAL]
---------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------PhysicalCteConsumer ( cteId=CTEId#0 )
------hashAgg[GLOBAL]
--------hashAgg[GLOBAL]
-----------hashAgg[LOCAL]
-------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------PhysicalCteConsumer ( cteId=CTEId#0 )
-- !has_grouping --
PhysicalResultSink
diff --git
a/regression-test/suites/statistics/test_full_analyze_hot_value.groovy
b/regression-test/suites/statistics/test_full_analyze_hot_value.groovy
index 92211e69d05..f2e5031eb19 100644
--- a/regression-test/suites/statistics/test_full_analyze_hot_value.groovy
+++ b/regression-test/suites/statistics/test_full_analyze_hot_value.groovy
@@ -22,7 +22,7 @@ suite("test_full_analyze_hot_value") {
sql """use test_full_analyze_hot_value"""
sql """set global enable_auto_analyze=false"""
- // Test 1: Full analyze collects hot_value for varchar column with skewed
distribution
+ // Test 1: Full analyze does not collect hot_value by default.
sql """drop table if exists full_hot_skew"""
sql """CREATE TABLE full_hot_skew (
key1 int NULL,
@@ -43,7 +43,20 @@ suite("test_full_analyze_hot_value") {
logger.info("Test1 result: " + result)
assertEquals(1, result.size())
assertEquals("100.0", result[0][2])
- // Full analyze should now collect hot_value
+ assertEquals("null", String.valueOf(result[0][17]))
+
+ result = sql """show column cached stats full_hot_skew(value1)"""
+ logger.info("Test1 cached result: " + result)
+ assertEquals(1, result.size())
+ assertEquals("null", String.valueOf(result[0][17]))
+
+ // Test 2: Full analyze collects hot_value when explicitly enabled.
+ sql """drop stats full_hot_skew"""
+ sql """analyze table full_hot_skew with sync with hot value"""
+ result = sql """show column stats full_hot_skew(value1)"""
+ logger.info("Test2 result: " + result)
+ assertEquals(1, result.size())
+ assertEquals("100.0", result[0][2])
assertTrue(result[0][17].contains(":"), "Full analyze should collect
hot_value, but got " + result[0][17])
String[] hotValues = result[0][17].split(";")
assertEquals(2, hotValues.length)
@@ -52,22 +65,22 @@ suite("test_full_analyze_hot_value") {
// Verify cached stats also have hot_value
result = sql """show column cached stats full_hot_skew(value1)"""
- logger.info("Test1 cached result: " + result)
+ logger.info("Test2 cached result: " + result)
assertEquals(1, result.size())
hotValues = result[0][17].split(";")
assertEquals(2, hotValues.length)
assertTrue(hotValues[0].trim() == "'1':0.5" || hotValues[0].trim() ==
"'0':0.5")
assertTrue(hotValues[1].trim() == "'1':0.5" || hotValues[1].trim() ==
"'0':0.5")
- // Test 2: Full analyze collects hot_value for int column
+ // Test 3: Explicit full hot value collection works for int column.
result = sql """show column stats full_hot_skew(key1)"""
- logger.info("Test2 result: " + result)
+ logger.info("Test3 result: " + result)
assertEquals(1, result.size())
assertEquals("100.0", result[0][2])
// key1 has 100 unique values, top 10 will each have proportion 0.01 ->
ROUND to 0.01
assertTrue(result[0][17].contains(":"), "Full analyze should collect
hot_value for int column")
- // Test 3: Full analyze with special characters in values
+ // Test 4: Explicit full hot value collection works with special
characters in values.
sql """drop table if exists full_hot_special"""
sql """CREATE TABLE full_hot_special (
key1 int NULL,
@@ -82,18 +95,18 @@ suite("test_full_analyze_hot_value") {
"""
sql """insert into full_hot_special select number, " : ;a" from
numbers("number"="100")"""
- sql """analyze table full_hot_special with sync"""
+ sql """analyze table full_hot_special with sync with hot value"""
result = sql """show column stats full_hot_special(value1)"""
- logger.info("Test3 result: " + result)
+ logger.info("Test4 result: " + result)
assertEquals(1, result.size())
// All 100 rows have the same value " : ;a", so it should appear with
ratio 1.0
assertEquals("' : ;a':1.0", result[0][17])
- // Test 4: Full analyze then sample analyze, hot_value should be updated
+ // Test 5: Sample analyze collects hot_value by default.
sql """drop stats full_hot_skew"""
sql """analyze table full_hot_skew with sample rows 400 with sync"""
result = sql """show column stats full_hot_skew(value1)"""
- logger.info("Test4 result: " + result)
+ logger.info("Test5 result: " + result)
assertEquals(1, result.size())
assertTrue(result[0][17].contains(":"), "Sample analyze should also
collect hot_value")
hotValues = result[0][17].split(";")
@@ -101,11 +114,17 @@ suite("test_full_analyze_hot_value") {
assertTrue(hotValues[0].trim() == "'1':0.5" || hotValues[0].trim() ==
"'0':0.5")
assertTrue(hotValues[1].trim() == "'1':0.5" || hotValues[1].trim() ==
"'0':0.5")
- // Test 5: Verify full analyze produces same hot_value as sample analyze
for same data
+ // Test 6: Sample analyze rejects WITH HOT VALUE because it always
collects hot_value.
+ test {
+ sql """analyze table full_hot_skew with sample rows 400 with sync with
hot value"""
+ exception "Sample analyze always collects hot value"
+ }
+
+ // Test 7: Explicit full analyze produces same hot_value as sample analyze
for same data.
sql """drop stats full_hot_skew"""
- sql """analyze table full_hot_skew with sync"""
+ sql """analyze table full_hot_skew with sync with hot value"""
def fullResult = sql """show column stats full_hot_skew(value1)"""
- logger.info("Test5 full result: " + fullResult)
+ logger.info("Test7 full result: " + fullResult)
assertEquals(1, fullResult.size())
assertTrue(fullResult[0][17].contains(":"))
def fullParts = fullResult[0][17].split(";").collect { it.trim() }.sort()
@@ -113,14 +132,14 @@ suite("test_full_analyze_hot_value") {
sql """drop stats full_hot_skew"""
sql """analyze table full_hot_skew with sample rows 40000 with sync"""
def sampleResult = sql """show column stats full_hot_skew(value1)"""
- logger.info("Test5 sample result: " + sampleResult)
+ logger.info("Test7 sample result: " + sampleResult)
assertEquals(1, sampleResult.size())
assertTrue(sampleResult[0][17].contains(":"))
// Both full and sample should produce the same hot_value entries (order
may differ)
def sampleParts = sampleResult[0][17].split(";").collect { it.trim()
}.sort()
assertEquals(fullParts, sampleParts)
- // Test 6: Full analyze on empty table should produce an empty hot_value
string
+ // Test 8: Explicit full analyze on empty table should produce an empty
hot_value string.
sql """drop table if exists full_hot_empty"""
sql """CREATE TABLE full_hot_empty (
key1 int NULL,
@@ -133,14 +152,14 @@ suite("test_full_analyze_hot_value") {
"replication_num" = "1"
)
"""
- sql """analyze table full_hot_empty with sync"""
+ sql """analyze table full_hot_empty with sync with hot value"""
result = sql """show column stats full_hot_empty(value1)"""
- logger.info("Test6 empty table result: " + result)
+ logger.info("Test8 empty table result: " + result)
assertEquals(1, result.size())
assertEquals("0.0", result[0][2])
assertEquals("''", result[0][17])
- // Test 7: Full analyze on all-NULL column should produce an empty
hot_value string
+ // Test 9: Explicit full analyze on all-NULL column should produce an
empty hot_value string.
sql """drop table if exists full_hot_all_null"""
sql """CREATE TABLE full_hot_all_null (
key1 int NULL,
@@ -154,9 +173,9 @@ suite("test_full_analyze_hot_value") {
)
"""
sql """insert into full_hot_all_null select number, null from
numbers("number"="100")"""
- sql """analyze table full_hot_all_null with sync"""
+ sql """analyze table full_hot_all_null with sync with hot value"""
result = sql """show column stats full_hot_all_null(value1)"""
- logger.info("Test7 all-null result: " + result)
+ logger.info("Test9 all-null result: " + result)
assertEquals(1, result.size())
assertEquals("100.0", result[0][2])
assertEquals("100.0", result[0][4])
diff --git a/regression-test/suites/statistics/test_hot_value.groovy
b/regression-test/suites/statistics/test_hot_value.groovy
index 4d8ee5643df..8b62ff715ec 100644
--- a/regression-test/suites/statistics/test_hot_value.groovy
+++ b/regression-test/suites/statistics/test_hot_value.groovy
@@ -80,7 +80,7 @@ suite("test_hot_value") {
sql """insert into test2 select number, " : ;a" from
numbers("number"="10000")"""
wait_row_count_reported("test_hot_value", "test1", 0, 4, "10000")
wait_row_count_reported("test_hot_value", "test2", 0, 4, "10000")
- sql """analyze table test1 with sync"""
+ sql """analyze table test1 with sync with hot value"""
explain {
sql("memo plan select * from test1")
contains "hotValues=("
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]