Kikyou1997 commented on code in PR #17790: URL: https://github.com/apache/doris/pull/17790#discussion_r1136989021
########## fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java: ########## @@ -436,12 +437,44 @@ private Statistics computeAggregate(Aggregate<? extends Plan> aggregate) { // TODO: since we have no column stats here. just use a fix ratio to compute the row count. List<Expression> groupByExpressions = aggregate.getGroupByExpressions(); Statistics childStats = groupExpression.childStatistics(0); - Map<Expression, ColumnStatistic> childSlotToColumnStats = childStats.columnStatistics(); - double resultSetCount = groupByExpressions.stream().flatMap(expr -> expr.getInputSlots().stream()) - .filter(childSlotToColumnStats::containsKey).map(childSlotToColumnStats::get).map(s -> s.ndv) - .reduce(1d, (a, b) -> a * b); - if (resultSetCount <= 0) { - resultSetCount = 1L; + double resultSetCount = 1; + if (!groupByExpressions.isEmpty()) { + Map<Expression, ColumnStatistic> childSlotToColumnStats = childStats.columnStatistics(); + double inputRowCount = childStats.getRowCount(); + if (inputRowCount == 0) { + //on empty relation, Agg output 1 tuple + resultSetCount = 1; + } else { + //TODO: add column correlation for agg estimation + // group by A1, A2 + // currently, the estimated row count is between max(ndv_A1, ndv_A2) and ndv_A1 * ndv_A2 + resultSetCount = groupByExpressions.stream().flatMap(expr -> expr.getInputSlots().stream()) + .map(Slot::getExprId) + .filter(childSlotToColumnStats::containsKey) + .map(childSlotToColumnStats::get) + .map(s -> { + double adjustedNdv = s.ndv; + if (s.isUnKnown) { + adjustedNdv = 0; + } + if (s.ndv > inputRowCount) { + adjustedNdv = inputRowCount; + } + return adjustedNdv; + }) + .reduce(1.0, (a, b) -> a * b); + if (resultSetCount == 0) { + //any group_by_key's stats is unknown + resultSetCount = inputRowCount * DEFAULT_AGGREGATE_RATIO; + } + if (resultSetCount > inputRowCount) { + // avoid ndv error propagation + resultSetCount = inputRowCount; + } + if (resultSetCount < 1) { + resultSetCount = 1; + } + } } resultSetCount = Math.min(resultSetCount, childStats.getRowCount()); Review Comment: 然后在这里把resultSetCount改成Math.max(Math.min(resultSetCount, childStats.getRowCount()), 1); -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org