This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch high-priority-column in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/high-priority-column by this push: new e8b16d671c0 Unit test (#32398) e8b16d671c0 is described below commit e8b16d671c0bba4cc42faeefd5caa2d7b42d4a72 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Mon Mar 18 16:44:32 2024 +0800 Unit test (#32398) --- .../doris/statistics/FollowerColumnSender.java | 40 +-- .../doris/statistics/StatisticsJobAppender.java | 8 +- .../doris/statistics/AnalysisManagerTest.java | 306 +++++++++++++++++++++ .../doris/statistics/FollowerColumnSenderTest.java | 65 +++++ .../statistics/StatisticsAutoCollectorTest.java | 141 ++++++++++ .../statistics/StatisticsJobAppenderTest.java | 205 ++++++++++++++ .../doris/statistics/util/StatisticsUtilTest.java | 148 ++++++++++ 7 files changed, 892 insertions(+), 21 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/FollowerColumnSender.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/FollowerColumnSender.java index 51ff9501308..8c6064ebac1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/FollowerColumnSender.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/FollowerColumnSender.java @@ -34,6 +34,7 @@ import org.apache.logging.log4j.Logger; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.List; +import java.util.Queue; import java.util.Set; import java.util.stream.Collectors; @@ -70,28 +71,14 @@ public class FollowerColumnSender extends MasterDaemon { if (analysisManager.highPriorityColumns.isEmpty() && analysisManager.midPriorityColumns.isEmpty()) { return; } - Set<TQueryColumn> highPriorityColumns - = analysisManager.highPriorityColumns - .stream() - .filter(c -> StatisticsUtil.needAnalyzeColumn(c)) - .map(QueryColumn::toThrift) - .collect(Collectors.toSet()); - Set<TQueryColumn> midPriorityColumns - = analysisManager.midPriorityColumns - .stream() - .filter(c -> StatisticsUtil.needAnalyzeColumn(c)) - .filter(c -> !highPriorityColumns.contains(c)) - .map(QueryColumn::toThrift) - .collect(Collectors.toSet()); + Set<TQueryColumn> highs = getNeedAnalyzeColumns(analysisManager.highPriorityColumns); + Set<TQueryColumn> mids = getNeedAnalyzeColumns(analysisManager.midPriorityColumns); + mids.removeAll(highs); analysisManager.highPriorityColumns.clear(); analysisManager.midPriorityColumns.clear(); TSyncQueryColumns queryColumns = new TSyncQueryColumns(); - List<TQueryColumn> highs = new ArrayList<>(); - highs.addAll(highPriorityColumns); - queryColumns.highPriorityColumns = highs; - List<TQueryColumn> mids = new ArrayList<>(); - mids.addAll(midPriorityColumns); - queryColumns.midPriorityColumns = mids; + queryColumns.highPriorityColumns = convertSetToList(highs); + queryColumns.midPriorityColumns = convertSetToList(mids); Frontend master = null; try { InetSocketAddress masterAddress = currentEnv.getHaProtocol().getLeader(); @@ -117,7 +104,7 @@ public class FollowerColumnSender extends MasterDaemon { client = ClientPool.frontendPool.borrowObject(address); client.syncQueryColumns(queryColumns); LOG.info("Send {} high priority columns and {} mid priority columns to master.", - highPriorityColumns.size(), midPriorityColumns.size()); + highs.size(), mids.size()); } catch (Throwable t) { LOG.warn("Failed to sync stats to master: {}", address, t); } finally { @@ -126,4 +113,17 @@ public class FollowerColumnSender extends MasterDaemon { } } } + + protected Set<TQueryColumn> getNeedAnalyzeColumns(Queue<QueryColumn> columnQueue) { + return columnQueue.stream() + .filter(c -> StatisticsUtil.needAnalyzeColumn(c)) + .map(QueryColumn::toThrift) + .collect(Collectors.toSet()); + } + + protected List<TQueryColumn> convertSetToList(Set<TQueryColumn> set) { + List<TQueryColumn> list = new ArrayList<>(); + list.addAll(set); + return list; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java index 336171d8858..b30093251d4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java @@ -154,7 +154,7 @@ public class StatisticsJobAppender extends MasterDaemon { } } currentTableId = olapTable.getId(); - if (++processed > TABLE_BATCH_SIZE) { + if (++processed >= TABLE_BATCH_SIZE) { return; } } @@ -167,4 +167,10 @@ public class StatisticsJobAppender extends MasterDaemon { currentTableId = 0; lastRoundFinishTime = System.currentTimeMillis(); } + + // For unit test only. + public void setLastRoundFinishTime(long value) { + lastRoundFinishTime = value; + } + } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index 53cb0807b80..a998ac8a688 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -21,13 +21,28 @@ import org.apache.doris.analysis.AnalyzeProperties; import org.apache.doris.analysis.AnalyzeTblStmt; import org.apache.doris.analysis.PartitionNames; import org.apache.doris.analysis.ShowAnalyzeStmt; +import org.apache.doris.analysis.ShowAutoAnalyzeJobsStmt; +import org.apache.doris.analysis.StatementBase; import org.apache.doris.analysis.TableName; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.DatabaseIf; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.Table; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; +import org.apache.doris.datasource.CatalogIf; +import org.apache.doris.datasource.InternalCatalog; +import org.apache.doris.nereids.trees.expressions.ExprId; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.types.IntegerType; import org.apache.doris.statistics.AnalysisInfo.AnalysisType; import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.AnalysisInfo.ScheduleType; import org.apache.doris.statistics.util.StatisticsUtil; +import org.apache.doris.thrift.TQueryColumn; import com.google.common.annotations.VisibleForTesting; import mockit.Expectations; @@ -44,6 +59,8 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.Set; // CHECKSTYLE OFF public class AnalysisManagerTest { @@ -341,4 +358,293 @@ public class AnalysisManagerTest { Assertions.assertEquals(AnalysisState.FINISHED, analysisInfos.get(1).getState()); Assertions.assertEquals(AnalysisState.FAILED, analysisInfos.get(2).getState()); } + + @Test + public void testAddQuerySlotToQueue() throws DdlException { + AnalysisManager analysisManager = new AnalysisManager(); + InternalCatalog testCatalog = new InternalCatalog(); + Database db = new Database(100, "testDb"); + testCatalog.unprotectCreateDb(db); + Column column1 = new Column("placeholder", PrimitiveType.INT); + Column column2 = new Column("placeholder", PrimitiveType.INT); + Column column3 = new Column("test", PrimitiveType.INT); + List<Column> schema = new ArrayList<>(); + schema.add(column1); + OlapTable table = new OlapTable(200, "testTable", schema, null, null, null); + db.createTableWithLock(table, true, false); + + new MockUp<Table>() { + @Mock + public DatabaseIf getDatabase() { + return db; + } + }; + + new MockUp<Database>() { + @Mock + public CatalogIf getCatalog() { + return testCatalog; + } + }; + + SlotReference slot1 = new SlotReference(new ExprId(1), "slot1", IntegerType.INSTANCE, true, + new ArrayList<>(), table, column1, Optional.empty(), null); + SlotReference slot2 = new SlotReference(new ExprId(2), "slot2", IntegerType.INSTANCE, true, + new ArrayList<>(), table, column2, Optional.empty(), null); + SlotReference slot3 = new SlotReference(new ExprId(3), "slot3", IntegerType.INSTANCE, true, + new ArrayList<>(), table, column3, Optional.empty(), null); + Set<Slot> set1 = new HashSet<>(); + set1.add(slot1); + set1.add(slot2); + analysisManager.updateHighPriorityColumn(set1); + Assertions.assertEquals(2, analysisManager.highPriorityColumns.size()); + QueryColumn result = analysisManager.highPriorityColumns.poll(); + Assertions.assertEquals("placeholder", result.colName); + Assertions.assertEquals(testCatalog.getId(), result.catalogId); + Assertions.assertEquals(db.getId(), result.dbId); + Assertions.assertEquals(table.getId(), result.tblId); + + result = analysisManager.highPriorityColumns.poll(); + Assertions.assertEquals("placeholder", result.colName); + Assertions.assertEquals(testCatalog.getId(), result.catalogId); + Assertions.assertEquals(db.getId(), result.dbId); + Assertions.assertEquals(table.getId(), result.tblId); + Assertions.assertEquals(0, analysisManager.highPriorityColumns.size()); + Set<Slot> set2 = new HashSet<>(); + set2.add(slot3); + for (int i = 0; i < AnalysisManager.COLUMN_QUEUE_SIZE / 2 - 1; i++) { + analysisManager.updateHighPriorityColumn(set1); + } + Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE - 2, analysisManager.highPriorityColumns.size()); + analysisManager.updateHighPriorityColumn(set2); + Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE - 1, analysisManager.highPriorityColumns.size()); + analysisManager.updateHighPriorityColumn(set2); + Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE, analysisManager.highPriorityColumns.size()); + analysisManager.updateHighPriorityColumn(set2); + Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE, analysisManager.highPriorityColumns.size()); + + for (int i = 0; i < AnalysisManager.COLUMN_QUEUE_SIZE - 2; i++) { + result = analysisManager.highPriorityColumns.poll(); + Assertions.assertEquals("placeholder", result.colName); + Assertions.assertEquals(testCatalog.getId(), result.catalogId); + Assertions.assertEquals(db.getId(), result.dbId); + Assertions.assertEquals(table.getId(), result.tblId); + } + Assertions.assertEquals(2, analysisManager.highPriorityColumns.size()); + result = analysisManager.highPriorityColumns.poll(); + Assertions.assertEquals("test", result.colName); + Assertions.assertEquals(testCatalog.getId(), result.catalogId); + Assertions.assertEquals(db.getId(), result.dbId); + Assertions.assertEquals(table.getId(), result.tblId); + + Assertions.assertEquals(1, analysisManager.highPriorityColumns.size()); + result = analysisManager.highPriorityColumns.poll(); + Assertions.assertEquals("test", result.colName); + Assertions.assertEquals(testCatalog.getId(), result.catalogId); + Assertions.assertEquals(db.getId(), result.dbId); + Assertions.assertEquals(table.getId(), result.tblId); + + result = analysisManager.highPriorityColumns.poll(); + Assertions.assertNull(result); + } + + @Test + public void testMergeFollowerColumn() throws DdlException { + AnalysisManager analysisManager = new AnalysisManager(); + QueryColumn placeholder = new QueryColumn(1, 2, 3, "placeholder"); + QueryColumn high1 = new QueryColumn(10, 20, 30, "high1"); + QueryColumn high2 = new QueryColumn(11, 21, 31, "high2"); + QueryColumn mid1 = new QueryColumn(100, 200, 300, "mid1"); + QueryColumn mid2 = new QueryColumn(101, 201, 301, "mid2"); + List<TQueryColumn> highColumns = new ArrayList<>(); + highColumns.add(high1.toThrift()); + highColumns.add(high2.toThrift()); + List<TQueryColumn> midColumns = new ArrayList<>(); + midColumns.add(mid1.toThrift()); + midColumns.add(mid2.toThrift()); + for (int i = 0; i < AnalysisManager.COLUMN_QUEUE_SIZE - 1; i++) { + analysisManager.highPriorityColumns.offer(placeholder); + } + for (int i = 0; i < AnalysisManager.COLUMN_QUEUE_SIZE - 2; i++) { + analysisManager.midPriorityColumns.offer(placeholder); + } + Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE - 1, analysisManager.highPriorityColumns.size()); + Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE - 2, analysisManager.midPriorityColumns.size()); + analysisManager.mergeFollowerQueryColumns(highColumns, midColumns); + Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE, analysisManager.highPriorityColumns.size()); + Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE, analysisManager.midPriorityColumns.size()); + for (int i = 0; i < AnalysisManager.COLUMN_QUEUE_SIZE - 1; i++) { + QueryColumn poll = analysisManager.highPriorityColumns.poll(); + Assertions.assertEquals("placeholder", poll.colName); + Assertions.assertEquals(1, poll.catalogId); + Assertions.assertEquals(2, poll.dbId); + Assertions.assertEquals(3, poll.tblId); + } + QueryColumn poll = analysisManager.highPriorityColumns.poll(); + Assertions.assertEquals("high1", poll.colName); + Assertions.assertEquals(10, poll.catalogId); + Assertions.assertEquals(20, poll.dbId); + Assertions.assertEquals(30, poll.tblId); + Assertions.assertEquals(0, analysisManager.highPriorityColumns.size()); + + for (int i = 0; i < AnalysisManager.COLUMN_QUEUE_SIZE - 2; i++) { + QueryColumn pol2 = analysisManager.midPriorityColumns.poll(); + Assertions.assertEquals("placeholder", pol2.colName); + Assertions.assertEquals(1, pol2.catalogId); + Assertions.assertEquals(2, pol2.dbId); + Assertions.assertEquals(3, pol2.tblId); + } + QueryColumn pol2 = analysisManager.midPriorityColumns.poll(); + Assertions.assertEquals("mid1", pol2.colName); + Assertions.assertEquals(100, pol2.catalogId); + Assertions.assertEquals(200, pol2.dbId); + Assertions.assertEquals(300, pol2.tblId); + + pol2 = analysisManager.midPriorityColumns.poll(); + Assertions.assertEquals("mid2", pol2.colName); + Assertions.assertEquals(101, pol2.catalogId); + Assertions.assertEquals(201, pol2.dbId); + Assertions.assertEquals(301, pol2.tblId); + Assertions.assertEquals(0, analysisManager.midPriorityColumns.size()); + } + + @Test + public void testShowAutoJobs() { + AnalysisManager manager = new AnalysisManager(); + TableName high1 = new TableName("catalog1", "db1", "high1"); + TableName high2 = new TableName("catalog2", "db2", "high2"); + TableName mid1 = new TableName("catalog3", "db3", "mid1"); + TableName mid2 = new TableName("catalog4", "db4", "mid2"); + TableName low1 = new TableName("catalog5", "db5", "low1"); + + manager.highPriorityJobs.put(high1, new HashSet<String>()); + manager.highPriorityJobs.get(high1).add("col1"); + manager.highPriorityJobs.get(high1).add("col2"); + manager.highPriorityJobs.put(high2, new HashSet<String>()); + manager.highPriorityJobs.get(high2).add("col3"); + manager.midPriorityJobs.put(mid1, new HashSet<String>()); + manager.midPriorityJobs.get(mid1).add("col4"); + manager.midPriorityJobs.put(mid2, new HashSet<String>()); + manager.midPriorityJobs.get(mid2).add("col5"); + manager.lowPriorityJobs.put(low1, new HashSet<String>()); + manager.lowPriorityJobs.get(low1).add("col6"); + manager.lowPriorityJobs.get(low1).add("col7"); + + new MockUp<StatementBase>() { + @Mock + public boolean isAnalyzed() { + return true; + } + }; + ShowAutoAnalyzeJobsStmt stmt = new ShowAutoAnalyzeJobsStmt(null, null); + List<AutoAnalysisPendingJob> autoAnalysisPendingJobs = manager.showAutoPendingJobs(stmt); + Assertions.assertEquals(5, autoAnalysisPendingJobs.size()); + AutoAnalysisPendingJob job = autoAnalysisPendingJobs.get(0); + Assertions.assertEquals("catalog1", job.catalogName); + Assertions.assertEquals("db1", job.dbName); + Assertions.assertEquals("high1", job.tableName); + Assertions.assertEquals(2, job.columnNames.size()); + Assertions.assertTrue(job.columnNames.contains("col1")); + Assertions.assertTrue(job.columnNames.contains("col2")); + Assertions.assertEquals(JobPriority.HIGH, job.priority); + + job = autoAnalysisPendingJobs.get(1); + Assertions.assertEquals("catalog2", job.catalogName); + Assertions.assertEquals("db2", job.dbName); + Assertions.assertEquals("high2", job.tableName); + Assertions.assertEquals(1, job.columnNames.size()); + Assertions.assertTrue(job.columnNames.contains("col3")); + Assertions.assertEquals(JobPriority.HIGH, job.priority); + + job = autoAnalysisPendingJobs.get(2); + Assertions.assertEquals("catalog3", job.catalogName); + Assertions.assertEquals("db3", job.dbName); + Assertions.assertEquals("mid1", job.tableName); + Assertions.assertEquals(1, job.columnNames.size()); + Assertions.assertTrue(job.columnNames.contains("col4")); + Assertions.assertEquals(JobPriority.MID, job.priority); + + job = autoAnalysisPendingJobs.get(3); + Assertions.assertEquals("catalog4", job.catalogName); + Assertions.assertEquals("db4", job.dbName); + Assertions.assertEquals("mid2", job.tableName); + Assertions.assertEquals(1, job.columnNames.size()); + Assertions.assertTrue(job.columnNames.contains("col5")); + Assertions.assertEquals(JobPriority.MID, job.priority); + + job = autoAnalysisPendingJobs.get(4); + Assertions.assertEquals("catalog5", job.catalogName); + Assertions.assertEquals("db5", job.dbName); + Assertions.assertEquals("low1", job.tableName); + Assertions.assertEquals(2, job.columnNames.size()); + Assertions.assertTrue(job.columnNames.contains("col6")); + Assertions.assertTrue(job.columnNames.contains("col7")); + Assertions.assertEquals(JobPriority.LOW, job.priority); + + new MockUp<ShowAutoAnalyzeJobsStmt>() { + @Mock + public String getPriority() { + return JobPriority.HIGH.name().toUpperCase(); + } + }; + List<AutoAnalysisPendingJob> highJobs = manager.showAutoPendingJobs(stmt); + Assertions.assertEquals(2, highJobs.size()); + job = highJobs.get(0); + Assertions.assertEquals("catalog1", job.catalogName); + Assertions.assertEquals("db1", job.dbName); + Assertions.assertEquals("high1", job.tableName); + Assertions.assertEquals(2, job.columnNames.size()); + Assertions.assertTrue(job.columnNames.contains("col1")); + Assertions.assertTrue(job.columnNames.contains("col2")); + Assertions.assertEquals(JobPriority.HIGH, job.priority); + + job = highJobs.get(1); + Assertions.assertEquals("catalog2", job.catalogName); + Assertions.assertEquals("db2", job.dbName); + Assertions.assertEquals("high2", job.tableName); + Assertions.assertEquals(1, job.columnNames.size()); + Assertions.assertTrue(job.columnNames.contains("col3")); + Assertions.assertEquals(JobPriority.HIGH, job.priority); + + new MockUp<ShowAutoAnalyzeJobsStmt>() { + @Mock + public String getPriority() { + return JobPriority.MID.name().toUpperCase(); + } + }; + List<AutoAnalysisPendingJob> midJobs = manager.showAutoPendingJobs(stmt); + Assertions.assertEquals(2, midJobs.size()); + job = midJobs.get(0); + Assertions.assertEquals("catalog3", job.catalogName); + Assertions.assertEquals("db3", job.dbName); + Assertions.assertEquals("mid1", job.tableName); + Assertions.assertEquals(1, job.columnNames.size()); + Assertions.assertTrue(job.columnNames.contains("col4")); + Assertions.assertEquals(JobPriority.MID, job.priority); + + job = midJobs.get(1); + Assertions.assertEquals("catalog4", job.catalogName); + Assertions.assertEquals("db4", job.dbName); + Assertions.assertEquals("mid2", job.tableName); + Assertions.assertEquals(1, job.columnNames.size()); + Assertions.assertTrue(job.columnNames.contains("col5")); + Assertions.assertEquals(JobPriority.MID, job.priority); + + new MockUp<ShowAutoAnalyzeJobsStmt>() { + @Mock + public String getPriority() { + return JobPriority.LOW.name().toUpperCase(); + } + }; + List<AutoAnalysisPendingJob> lowJobs = manager.showAutoPendingJobs(stmt); + Assertions.assertEquals(1, lowJobs.size()); + job = lowJobs.get(0); + Assertions.assertEquals("catalog5", job.catalogName); + Assertions.assertEquals("db5", job.dbName); + Assertions.assertEquals("low1", job.tableName); + Assertions.assertEquals(2, job.columnNames.size()); + Assertions.assertTrue(job.columnNames.contains("col6")); + Assertions.assertTrue(job.columnNames.contains("col7")); + Assertions.assertEquals(JobPriority.LOW, job.priority); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/FollowerColumnSenderTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/FollowerColumnSenderTest.java new file mode 100644 index 00000000000..7cbad753994 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/FollowerColumnSenderTest.java @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.statistics.util.StatisticsUtil; +import org.apache.doris.thrift.TQueryColumn; + +import mockit.Mock; +import mockit.MockUp; +import org.eclipse.jetty.util.BlockingArrayQueue; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.Queue; +import java.util.Set; + +public class FollowerColumnSenderTest { + + @Test + public void testGetNeedAnalyzeColumns() { + new MockUp<StatisticsUtil>() { + boolean[] result = {false, true, false, true, true}; + int i = 0; + @Mock + public boolean needAnalyzeColumn(QueryColumn column) { + return result[i++]; + } + }; + QueryColumn column1 = new QueryColumn(1, 2, 3, "col1"); + QueryColumn column2 = new QueryColumn(1, 2, 3, "col2"); + QueryColumn column3 = new QueryColumn(1, 2, 3, "col3"); + QueryColumn column4 = new QueryColumn(1, 2, 3, "col4"); + Queue<QueryColumn> queue = new BlockingArrayQueue<>(); + queue.add(column1); + queue.add(column2); + queue.add(column3); + queue.add(column4); + queue.add(column4); + Assertions.assertEquals(5, queue.size()); + + FollowerColumnSender sender = new FollowerColumnSender(); + Set<TQueryColumn> needAnalyzeColumns = sender.getNeedAnalyzeColumns(queue); + Assertions.assertEquals(2, needAnalyzeColumns.size()); + Assertions.assertFalse(needAnalyzeColumns.contains(column1.toThrift())); + Assertions.assertTrue(needAnalyzeColumns.contains(column2.toThrift())); + Assertions.assertFalse(needAnalyzeColumns.contains(column3.toThrift())); + Assertions.assertTrue(needAnalyzeColumns.contains(column4.toThrift())); + } + +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java new file mode 100644 index 00000000000..45bb521455a --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -0,0 +1,141 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.analysis.TableName; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.common.Pair; +import org.apache.doris.datasource.ExternalTable; +import org.apache.doris.datasource.hive.HMSExternalTable; +import org.apache.doris.datasource.hive.HMSExternalTable.DLAType; +import org.apache.doris.datasource.jdbc.JdbcExternalTable; + +import mockit.Mock; +import mockit.MockUp; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map.Entry; +import java.util.Set; + +public class StatisticsAutoCollectorTest { + + @Test + public void testFetchJob() { + AnalysisManager manager = new AnalysisManager(); + TableName high1 = new TableName("catalog", "db", "high1"); + TableName high2 = new TableName("catalog", "db", "high2"); + TableName mid1 = new TableName("catalog", "db", "mid1"); + TableName mid2 = new TableName("catalog", "db", "mid2"); + TableName low1 = new TableName("catalog", "db", "low1"); + + manager.highPriorityJobs.put(high1, new HashSet<String>()); + manager.highPriorityJobs.get(high1).add("col1"); + manager.highPriorityJobs.get(high1).add("col2"); + manager.highPriorityJobs.put(high2, new HashSet<String>()); + manager.highPriorityJobs.get(high2).add("col3"); + manager.midPriorityJobs.put(mid1, new HashSet<String>()); + manager.midPriorityJobs.get(mid1).add("col4"); + manager.midPriorityJobs.put(mid2, new HashSet<String>()); + manager.midPriorityJobs.get(mid2).add("col5"); + manager.lowPriorityJobs.put(low1, new HashSet<String>()); + manager.lowPriorityJobs.get(low1).add("col6"); + manager.lowPriorityJobs.get(low1).add("col7"); + + + new MockUp<Env>() { + @Mock + public AnalysisManager getAnalysisManager() { + return manager; + } + }; + StatisticsAutoCollector collector = new StatisticsAutoCollector(); + Pair<Entry<TableName, Set<String>>, JobPriority> job = collector.getJob(); + Assertions.assertEquals(high1, job.first.getKey()); + Assertions.assertEquals(2, job.first.getValue().size()); + Assertions.assertTrue(job.first.getValue().contains("col1")); + Assertions.assertTrue(job.first.getValue().contains("col2")); + Assertions.assertEquals(JobPriority.HIGH, job.second); + + job = collector.getJob(); + Assertions.assertEquals(high2, job.first.getKey()); + Assertions.assertEquals(1, job.first.getValue().size()); + Assertions.assertTrue(job.first.getValue().contains("col3")); + Assertions.assertEquals(JobPriority.HIGH, job.second); + + job = collector.getJob(); + Assertions.assertEquals(mid1, job.first.getKey()); + Assertions.assertEquals(1, job.first.getValue().size()); + Assertions.assertTrue(job.first.getValue().contains("col4")); + Assertions.assertEquals(JobPriority.MID, job.second); + + job = collector.getJob(); + Assertions.assertEquals(mid2, job.first.getKey()); + Assertions.assertEquals(1, job.first.getValue().size()); + Assertions.assertTrue(job.first.getValue().contains("col5")); + Assertions.assertEquals(JobPriority.MID, job.second); + + job = collector.getJob(); + Assertions.assertEquals(low1, job.first.getKey()); + Assertions.assertEquals(2, job.first.getValue().size()); + Assertions.assertTrue(job.first.getValue().contains("col6")); + Assertions.assertTrue(job.first.getValue().contains("col7")); + Assertions.assertEquals(JobPriority.LOW, job.second); + + job = collector.getJob(); + Assertions.assertNull(job); + } + + @Test + public void testSupportAutoAnalyze() { + StatisticsAutoCollector collector = new StatisticsAutoCollector(); + Assertions.assertFalse(collector.supportAutoAnalyze(null)); + Column column1 = new Column("placeholder", PrimitiveType.INT); + List<Column> schema = new ArrayList<>(); + schema.add(column1); + OlapTable table1 = new OlapTable(200, "testTable", schema, null, null, null); + Assertions.assertTrue(collector.supportAutoAnalyze(table1)); + + ExternalTable externalTable = new JdbcExternalTable(1, "jdbctable", "jdbcdb", null); + Assertions.assertFalse(collector.supportAutoAnalyze(externalTable)); + + new MockUp<HMSExternalTable>() { + @Mock + public DLAType getDlaType() { + return DLAType.ICEBERG; + } + }; + ExternalTable icebergExternalTable = new HMSExternalTable(1, "hmsTable", "hmsDb", null); + Assertions.assertFalse(collector.supportAutoAnalyze(icebergExternalTable)); + + new MockUp<HMSExternalTable>() { + @Mock + public DLAType getDlaType() { + return DLAType.HIVE; + } + }; + ExternalTable hiveExternalTable = new HMSExternalTable(1, "hmsTable", "hmsDb", null); + Assertions.assertTrue(collector.supportAutoAnalyze(hiveExternalTable)); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java new file mode 100644 index 00000000000..cdb8fd6d8d7 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java @@ -0,0 +1,205 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.analysis.TableName; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.DatabaseIf; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.Table; +import org.apache.doris.catalog.TableIf; +import org.apache.doris.common.DdlException; +import org.apache.doris.datasource.InternalCatalog; +import org.apache.doris.statistics.util.StatisticsUtil; + +import mockit.Mock; +import mockit.MockUp; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ArrayBlockingQueue; + +public class StatisticsJobAppenderTest { + + @Test + public void testAppendQueryColumnToHighAndMidJobMap() throws DdlException { + InternalCatalog testCatalog = new InternalCatalog(); + Database db = new Database(100, "testDb"); + testCatalog.unprotectCreateDb(db); + Column column1 = new Column("placeholder", PrimitiveType.INT); + List<Column> schema = new ArrayList<>(); + schema.add(column1); + OlapTable table1 = new OlapTable(200, "testTable", schema, null, null, null); + OlapTable table2 = new OlapTable(200, "testTable2", schema, null, null, null); + OlapTable table3 = new OlapTable(200, "testTable3", schema, null, null, null); + new MockUp<StatisticsUtil>() { + int i = 0; + Table[] tables = {table1, table2, table1, table3, table2}; + + @Mock + public boolean needAnalyzeColumn(QueryColumn column) { + return true; + } + + @Mock + public TableIf findTable(long catalogId, long dbId, long tblId) { + return tables[i++]; + } + }; + + new MockUp<Table>() { + @Mock + public DatabaseIf getDatabase() { + return db; + } + }; + + Queue<QueryColumn> testQueue = new ArrayBlockingQueue<>(100); + Map<TableName, Set<String>> testMap = new HashMap<TableName, Set<String>>(); + QueryColumn high1 = new QueryColumn(10, 20, 30, "high1"); + testQueue.add(high1); + + StatisticsJobAppender appender = new StatisticsJobAppender(); + appender.appendColumnsToJobs(testQueue, testMap); + Assertions.assertEquals(1, testMap.size()); + Assertions.assertEquals(1, testMap.values().size()); + Assertions.assertTrue(testMap.get(new TableName("internal", "testDb", "testTable")).contains("high1")); + + QueryColumn high2 = new QueryColumn(10, 20, 30, "high2"); + QueryColumn high3 = new QueryColumn(10, 20, 30, "high3"); + testQueue.add(high2); + testQueue.add(high3); + appender.appendColumnsToJobs(testQueue, testMap); + Assertions.assertEquals(2, testMap.size()); + + Set<String> table1Column = testMap.get(new TableName("internal", "testDb", "testTable")); + Assertions.assertEquals(2, table1Column.size()); + Assertions.assertTrue(table1Column.contains("high1")); + Assertions.assertTrue(table1Column.contains("high3")); + + Set<String> table2Column = testMap.get(new TableName("internal", "testDb", "testTable2")); + Assertions.assertEquals(1, table2Column.size()); + Assertions.assertTrue(table2Column.contains("high2")); + + for (int i = 0; i < StatisticsJobAppender.JOB_MAP_SIZE - 2; i++) { + testMap.put(new TableName("a", "b", UUID.randomUUID().toString()), new HashSet<>()); + } + Assertions.assertEquals(StatisticsJobAppender.JOB_MAP_SIZE, testMap.size()); + + QueryColumn high4 = new QueryColumn(10, 20, 30, "high4"); + testQueue.add(high4); + appender.appendColumnsToJobs(testQueue, testMap); + Assertions.assertEquals(StatisticsJobAppender.JOB_MAP_SIZE, testMap.size()); + + QueryColumn high5 = new QueryColumn(10, 20, 30, "high5"); + testQueue.add(high5); + appender.appendColumnsToJobs(testQueue, testMap); + table2Column = testMap.get(new TableName("internal", "testDb", "testTable2")); + Assertions.assertEquals(2, table2Column.size()); + Assertions.assertTrue(table2Column.contains("high2")); + Assertions.assertTrue(table2Column.contains("high5")); + } + + @Test + public void testAppendQueryColumnToLowJobMap() throws DdlException { + InternalCatalog testCatalog = new InternalCatalog(); + int id = 10; + for (int i = 0; i < 70; i++) { + Database db = new Database(id++, "testDb" + i); + testCatalog.unprotectCreateDb(db); + Column column1 = new Column("placeholder", PrimitiveType.INT); + List<Column> schema = new ArrayList<>(); + schema.add(column1); + OlapTable table1 = new OlapTable(id++, "testTable" + id + "_1", schema, null, null, null); + OlapTable table2 = new OlapTable(id++, "testTable" + id + "_1", schema, null, null, null); + db.createTableWithLock(table1, true, false); + db.createTableWithLock(table2, true, false); + } + + new MockUp<Env>() { + @Mock + public InternalCatalog getCurrentInternalCatalog() { + return testCatalog; + } + }; + + Map<TableName, Set<String>> testMap = new HashMap<TableName, Set<String>>(); + StatisticsJobAppender appender = new StatisticsJobAppender(); + appender.appendToLowJobs(testMap); + Assertions.assertEquals(100, testMap.size()); + testMap.clear(); + appender.appendToLowJobs(testMap); + Assertions.assertEquals(40, testMap.size()); + + for (int i = 0; i < StatisticsJobAppender.JOB_MAP_SIZE; i++) { + Database db = new Database(id++, "testDb" + i); + testCatalog.unprotectCreateDb(db); + Column column1 = new Column("placeholder", PrimitiveType.INT); + List<Column> schema = new ArrayList<>(); + schema.add(column1); + OlapTable table1 = new OlapTable(id++, "testTable" + id + "_1", schema, null, null, null); + OlapTable table2 = new OlapTable(id++, "testTable" + id + "_1", schema, null, null, null); + db.createTableWithLock(table1, true, false); + db.createTableWithLock(table2, true, false); + } + + testMap.clear(); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + appender.setLastRoundFinishTime(0); + appender.appendToLowJobs(testMap); + Assertions.assertEquals(StatisticsJobAppender.JOB_MAP_SIZE, testMap.size()); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java index 724e0363833..17555dcd41c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java @@ -17,10 +17,20 @@ package org.apache.doris.statistics.util; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; +import org.apache.doris.datasource.ExternalTable; +import org.apache.doris.datasource.hive.HMSExternalTable; +import org.apache.doris.datasource.hive.HMSExternalTable.DLAType; +import org.apache.doris.datasource.jdbc.JdbcExternalTable; import org.apache.doris.qe.SessionVariable; +import org.apache.doris.statistics.AnalysisManager; +import org.apache.doris.statistics.ColStatsMeta; import org.apache.doris.statistics.ResultRow; +import org.apache.doris.statistics.TableStatsMeta; import com.google.common.collect.Lists; import mockit.Mock; @@ -33,6 +43,7 @@ import java.time.LocalTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Base64; +import java.util.List; class StatisticsUtilTest { @Test @@ -150,4 +161,141 @@ class StatisticsUtilTest { // \\''"" Assertions.assertEquals("\\\\''\"", StatisticsUtil.escapeSQL(origin)); } + + @Test + void testNeedAnalyzeColumn() { + Column column = new Column("testColumn", PrimitiveType.INT); + List<Column> schema = new ArrayList<>(); + schema.add(column); + OlapTable table = new OlapTable(200, "testTable", schema, null, null, null); + // Test table stats meta is null. + new MockUp<AnalysisManager>() { + @Mock + public TableStatsMeta findTableStatsStatus(long tblId) { + return null; + } + }; + Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, column.getName())); + + // Test user injected flag is set. + TableStatsMeta tableMeta = new TableStatsMeta(); + tableMeta.userInjected = true; + new MockUp<AnalysisManager>() { + @Mock + public TableStatsMeta findTableStatsStatus(long tblId) { + return tableMeta; + } + }; + Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(table, column.getName())); + + // Test column meta is null. + tableMeta.userInjected = false; + Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, column.getName())); + + new MockUp<TableStatsMeta>() { + @Mock + public ColStatsMeta findColumnStatsMeta(String colName) { + return new ColStatsMeta(0, null, null, null, 0, 0, 0); + } + }; + + // Test not supported external table type. + ExternalTable externalTable = new JdbcExternalTable(1, "jdbctable", "jdbcdb", null); + Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(externalTable, column.getName())); + + // Test hms external table not hive type. + new MockUp<HMSExternalTable>() { + @Mock + public DLAType getDlaType() { + return DLAType.ICEBERG; + } + }; + ExternalTable hmsExternalTable = new HMSExternalTable(1, "hmsTable", "hmsDb", null); + Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(hmsExternalTable, column.getName())); + + // Test partition first load. + new MockUp<OlapTable>() { + @Mock + public boolean isPartitionColumn(String columnName) { + return true; + } + }; + tableMeta.newPartitionLoaded.set(true); + Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, column.getName())); + + // Test empty table to non-empty table. + new MockUp<OlapTable>() { + @Mock + public long getRowCount() { + return 100; + } + }; + tableMeta.newPartitionLoaded.set(false); + Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, column.getName())); + + // Test non-empty table to empty table. + new MockUp<OlapTable>() { + @Mock + public long getRowCount() { + return 0; + } + }; + new MockUp<TableStatsMeta>() { + @Mock + public ColStatsMeta findColumnStatsMeta(String colName) { + return new ColStatsMeta(0, null, null, null, 0, 100, 0); + } + }; + tableMeta.newPartitionLoaded.set(false); + Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, column.getName())); + + // Test table still empty. + new MockUp<TableStatsMeta>() { + @Mock + public ColStatsMeta findColumnStatsMeta(String colName) { + return new ColStatsMeta(0, null, null, null, 0, 0, 0); + } + }; + tableMeta.newPartitionLoaded.set(false); + Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(table, column.getName())); + + // Test row count changed more than threshold. + new MockUp<OlapTable>() { + @Mock + public long getRowCount() { + return 1000; + } + }; + new MockUp<TableStatsMeta>() { + @Mock + public ColStatsMeta findColumnStatsMeta(String colName) { + return new ColStatsMeta(0, null, null, null, 0, 500, 0); + } + }; + tableMeta.newPartitionLoaded.set(false); + Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, column.getName())); + + // Test update rows changed more than threshold. + new MockUp<OlapTable>() { + @Mock + public long getRowCount() { + return 120; + } + }; + new MockUp<TableStatsMeta>() { + @Mock + public ColStatsMeta findColumnStatsMeta(String colName) { + return new ColStatsMeta(0, null, null, null, 0, 100, 80); + } + }; + tableMeta.newPartitionLoaded.set(false); + tableMeta.updatedRows.set(200); + Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, column.getName())); + + // Test update rows changed less than threshold + tableMeta.newPartitionLoaded.set(false); + tableMeta.updatedRows.set(100); + Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(table, column.getName())); + + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org