morrySnow commented on code in PR #8858: URL: https://github.com/apache/incubator-doris/pull/8858#discussion_r843721770
########## fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJob.java: ########## @@ -18,62 +18,187 @@ package org.apache.doris.statistics; import org.apache.doris.analysis.AnalyzeStmt; +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.Table; +import org.apache.doris.common.UserException; +import com.clearspring.analytics.util.Lists; +import com.google.common.base.Strings; import com.google.common.collect.Maps; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; import org.glassfish.jersey.internal.guava.Sets; import java.util.List; import java.util.Map; import java.util.Set; -import com.clearspring.analytics.util.Lists; - /* Used to store statistics job info, including job status, progress, etc. */ public class StatisticsJob { + private static final Logger LOG = LogManager.getLogger(StatisticsJob.class); public enum JobState { PENDING, SCHEDULING, RUNNING, FINISHED, - CANCELLED + CANCELLED, + FAILED } - private long id = -1; + private final long id = Catalog.getCurrentCatalog().getNextId(); + + /** + * to be collected database stats. + */ + private final long dbId; + + /** + * to be collected table stats. + */ + private final List<Long> tableIds; + + /** + * to be collected column stats. + */ + private final Map<Long, List<String>> tableIdToColumnName; + + private final Map<String, String> properties; + + /** + * to be executed tasks. + */ + private List<StatisticsTask> tasks = Lists.newArrayList(); + private JobState jobState = JobState.PENDING; - // optional - // to be collected table stats - private List<Long> tableId = Lists.newArrayList(); - // to be collected column stats - private Map<Long, List<String>> tableIdToColumnName = Maps.newHashMap(); - private Map<String, String> properties; - // end - private List<StatisticsTask> taskList = Lists.newArrayList(); + private final long createTime = System.currentTimeMillis(); + private long scheduleTime = -1L; + private long finishTime = -1L; + private int progress = 0; + + public StatisticsJob(Long dbId, + List<Long> tableIdList, + Map<Long, List<String>> tableIdToColumnName, + Map<String, String> properties) { + this.dbId = dbId; + this.tableIds = tableIdList; + this.tableIdToColumnName = tableIdToColumnName; + this.properties = properties; + } public long getId() { - return id; + return this.id; + } + + public long getDbId() { + return this.dbId; + } + + public List<Long> getTableIds() { + return this.tableIds; + } + + public Map<Long, List<String>> getTableIdToColumnName() { + return this.tableIdToColumnName; + } + + public Map<String, String> getProperties() { + return this.properties; + } + + public List<StatisticsTask> getTasks() { + return this.tasks; + } + + public void setTasks(List<StatisticsTask> tasks) { + this.tasks = tasks; + } + + public JobState getJobState() { + return this.jobState; + } + + public void setJobState(JobState jobState) { + this.jobState = jobState; + } + + public long getCreateTime() { + return this.createTime; + } + + public long getScheduleTime() { + return this.scheduleTime; + } + + public void setScheduleTime(long scheduleTime) { + this.scheduleTime = scheduleTime; } - /* - AnalyzeStmt: Analyze t1(c1), t2 - StatisticsJob: - tableId [t1, t2] - tableIdToColumnName <t1, [c1]> <t2, [c1,c2,c3]> - */ - public static StatisticsJob fromAnalyzeStmt(AnalyzeStmt analyzeStmt) { - // TODO - return new StatisticsJob(); + public long getFinishTime() { + return this.finishTime; + } + + public void setFinishTime(long finishTime) { + this.finishTime = finishTime; + } + + public int getProgress() { + return this.progress; + } + + public void setProgress(int progress) { + this.progress = progress; + } + + /** + * get statisticsJob from analyzeStmt. + * AnalyzeStmt: analyze t1(c1,c2,c3) + * tableId: [t1] + * tableIdToColumnName <t1, [c1,c2,c3]> + */ + public static StatisticsJob fromAnalyzeStmt(AnalyzeStmt analyzeStmt) throws UserException { + List<Long> tableIdList = Lists.newArrayList(); + Map<Long, List<String>> tableIdToColumnName = Maps.newHashMap(); + List<String> columnNames = analyzeStmt.getColumnNames(); + + String dbName = analyzeStmt.getDbName(); + String tblName = analyzeStmt.getTblName(); + Database db = Catalog.getCurrentCatalog().getDbOrDdlException(dbName); + + if (Strings.isNullOrEmpty(tblName)) { Review Comment: duplicate code with AnalyzeStmt, could we save these infos in AnalyzeStmt, and reuse here? ########## fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java: ########## @@ -35,39 +44,74 @@ public class StatisticsJobManager { private static final Logger LOG = LogManager.getLogger(StatisticsJobManager.class); - // statistics job - private Map<Long, StatisticsJob> idToStatisticsJob = Maps.newConcurrentMap(); + /** + * save statistics job status information + */ + private final Map<Long, StatisticsJob> idToStatisticsJob = Maps.newConcurrentMap(); - public void createStatisticsJob(AnalyzeStmt analyzeStmt) { - // step0: init statistics job by analyzeStmt + public Map<Long, StatisticsJob> getIdToStatisticsJob() { + return this.idToStatisticsJob; + } + + public void createStatisticsJob(AnalyzeStmt analyzeStmt) throws UserException { + // step1: init statistics job by analyzeStmt StatisticsJob statisticsJob = StatisticsJob.fromAnalyzeStmt(analyzeStmt); - // step1: get statistics to be analyzed - Set<Long> tableIdList = statisticsJob.relatedTableId(); + // step2: check restrict - checkRestrict(tableIdList); - // step3: check permission - checkPermission(); - // step4: create it - createStatisticsJob(statisticsJob); + this.checkRestrict(statisticsJob.getDbId(), statisticsJob.relatedTableId()); + + // step3: create it + this.createStatisticsJob(statisticsJob); } - public void createStatisticsJob(StatisticsJob statisticsJob) { - idToStatisticsJob.put(statisticsJob.getId(), statisticsJob); + public void createStatisticsJob(StatisticsJob statisticsJob) throws DdlException { + this.idToStatisticsJob.put(statisticsJob.getId(), statisticsJob); try { Catalog.getCurrentCatalog().getStatisticsJobScheduler().addPendingJob(statisticsJob); } catch (IllegalStateException e) { - LOG.info("The pending statistics job is full. Please submit it again later."); + throw new DdlException("The pending statistics job is full, Please submit it again later."); } } - // Rule1: The same table cannot have two unfinished statistics jobs - // Rule2: The unfinished statistics job could not more then Config.max_statistics_job_num - // Rule3: The job for external table is not supported - private void checkRestrict(Set<Long> tableIdList) { - // TODO - } + /** + * The statistical job has the following restrict: + * - Rule1: The same table cannot have two unfinished statistics jobs + * - Rule2: The unfinished statistics job could not more then Config.max_statistics_job_num + * - Rule3: The job for external table is not supported + */ + private synchronized void checkRestrict(long dbId, Set<Long> tableIds) throws AnalysisException { + Database db = Catalog.getCurrentCatalog().getDbOrAnalysisException(dbId); - private void checkPermission() { - // TODO + // check table type + for (Long tableId : tableIds) { + Table table = db.getTableOrAnalysisException(tableId); + if (table.getType() != Table.TableType.OLAP) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_NOT_OLAP_TABLE, db.getFullName(),table.getName(), "ANALYZE"); + } + } + + int unfinishedJobs = 0; + + // check table unfinished job + for (Map.Entry<Long, StatisticsJob> jobEntry : this.idToStatisticsJob.entrySet()) { Review Comment: just use values here, suggest code: ```java for (StatisticsJob : this.idToStatisticsJob.values()) ``` ########## fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java: ########## @@ -35,39 +44,74 @@ public class StatisticsJobManager { private static final Logger LOG = LogManager.getLogger(StatisticsJobManager.class); - // statistics job - private Map<Long, StatisticsJob> idToStatisticsJob = Maps.newConcurrentMap(); + /** + * save statistics job status information + */ + private final Map<Long, StatisticsJob> idToStatisticsJob = Maps.newConcurrentMap(); - public void createStatisticsJob(AnalyzeStmt analyzeStmt) { - // step0: init statistics job by analyzeStmt + public Map<Long, StatisticsJob> getIdToStatisticsJob() { + return this.idToStatisticsJob; + } + + public void createStatisticsJob(AnalyzeStmt analyzeStmt) throws UserException { + // step1: init statistics job by analyzeStmt StatisticsJob statisticsJob = StatisticsJob.fromAnalyzeStmt(analyzeStmt); - // step1: get statistics to be analyzed - Set<Long> tableIdList = statisticsJob.relatedTableId(); + // step2: check restrict - checkRestrict(tableIdList); - // step3: check permission - checkPermission(); - // step4: create it - createStatisticsJob(statisticsJob); + this.checkRestrict(statisticsJob.getDbId(), statisticsJob.relatedTableId()); + + // step3: create it + this.createStatisticsJob(statisticsJob); } - public void createStatisticsJob(StatisticsJob statisticsJob) { - idToStatisticsJob.put(statisticsJob.getId(), statisticsJob); + public void createStatisticsJob(StatisticsJob statisticsJob) throws DdlException { + this.idToStatisticsJob.put(statisticsJob.getId(), statisticsJob); try { Catalog.getCurrentCatalog().getStatisticsJobScheduler().addPendingJob(statisticsJob); } catch (IllegalStateException e) { - LOG.info("The pending statistics job is full. Please submit it again later."); + throw new DdlException("The pending statistics job is full, Please submit it again later."); } } - // Rule1: The same table cannot have two unfinished statistics jobs - // Rule2: The unfinished statistics job could not more then Config.max_statistics_job_num - // Rule3: The job for external table is not supported - private void checkRestrict(Set<Long> tableIdList) { - // TODO - } + /** + * The statistical job has the following restrict: + * - Rule1: The same table cannot have two unfinished statistics jobs + * - Rule2: The unfinished statistics job could not more then Config.max_statistics_job_num + * - Rule3: The job for external table is not supported + */ + private synchronized void checkRestrict(long dbId, Set<Long> tableIds) throws AnalysisException { + Database db = Catalog.getCurrentCatalog().getDbOrAnalysisException(dbId); - private void checkPermission() { - // TODO + // check table type + for (Long tableId : tableIds) { + Table table = db.getTableOrAnalysisException(tableId); + if (table.getType() != Table.TableType.OLAP) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_NOT_OLAP_TABLE, db.getFullName(),table.getName(), "ANALYZE"); + } + } + + int unfinishedJobs = 0; + + // check table unfinished job + for (Map.Entry<Long, StatisticsJob> jobEntry : this.idToStatisticsJob.entrySet()) { + StatisticsJob statisticsJob = jobEntry.getValue(); + StatisticsJob.JobState jobState = statisticsJob.getJobState(); + List<Long> tableIdList = statisticsJob.getTableIds(); + if (jobState == StatisticsJob.JobState.PENDING + || jobState == StatisticsJob.JobState.SCHEDULING + || jobState == StatisticsJob.JobState.RUNNING) { + for (Long tableId : tableIds) { + if (tableIdList.contains(tableId)) { + throw new AnalysisException("The table(id=" + tableId + ") have two unfinished statistics jobs"); + } + } + unfinishedJobs++; + } + } + + // check the number of unfinished tasks + if (unfinishedJobs > Config.cbo_max_statistics_job_num) { + throw new AnalysisException("The unfinished statistics job could not more then cbo_max_statistics_job_num"); Review Comment: typo? then -> than BTW: should we print the number of unfinished jobs here? ########## fe/fe-core/src/main/java/org/apache/doris/common/Config.java: ########## @@ -1631,6 +1631,11 @@ */ @ConfField(mutable = true, masterOnly = true) public static int cbo_max_statistics_job_num = 20; + /* + * the timeout of a statistics task + */ + @ConfField(mutable = true, masterOnly = true) + public static int cbo_statistics_task_timeout = 60; Review Comment: nit: add unit in config name -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org