[ https://issues.apache.org/jira/browse/FLINK-4937?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15676842#comment-15676842 ]
ASF GitHub Bot commented on FLINK-4937: --------------------------------------- Github user fhueske commented on a diff in the pull request: https://github.com/apache/flink/pull/2792#discussion_r88659144 --- Diff: flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/plan/nodes/datastream/DataStreamAggregate.scala --- @@ -135,50 +128,123 @@ class DataStreamAggregate( namedProperties) val prepareOpName = s"prepare select: ($aggString)" - val mappedInput = inputDS - .map(aggregateResult._1) - .name(prepareOpName) - - val groupReduceFunction = aggregateResult._2 - val rowTypeInfo = new RowTypeInfo(fieldTypes) - - val result = { - // grouped / keyed aggregation - if (groupingKeys.length > 0) { - val aggOpName = s"groupBy: (${groupingToString(inputType, grouping)}), " + - s"window: ($window), " + - s"select: ($aggString)" - val aggregateFunction = - createWindowAggregationFunction(window, namedProperties, groupReduceFunction) - - val keyedStream = mappedInput.keyBy(groupingKeys: _*) - - val windowedStream = createKeyedWindowedStream(window, keyedStream) - .asInstanceOf[WindowedStream[Row, Tuple, DataStreamWindow]] - - windowedStream - .apply(aggregateFunction) - .returns(rowTypeInfo) - .name(aggOpName) - .asInstanceOf[DataStream[Any]] + val keyedAggOpName = s"groupBy: (${groupingToString(inputType, grouping)}), " + + s"window: ($window), " + + s"select: ($aggString)" + val nonKeyedAggOpName = s"window: ($window), select: ($aggString)" + + val (aggFieldIndexes, aggregates) = + AggregateUtil.transformToAggregateFunctions( + namedAggregates.map(_.getKey), inputType, grouping.length) + + val result: DataStream[Any] = { + // check whether all aggregates support partial aggregate + if (aggregates.forall(_.supportPartial)){ + // do Incremental Aggregation + // add grouping fields, position keys in the input, and input type + val (mapFunction, + reduceFunction, + groupingOffsetMapping, + aggOffsetMapping, + intermediateRowArity) = AggregateUtil.createOperatorFunctionsForIncrementalAggregates( + namedAggregates, + inputType, + getRowType, + grouping, + aggregates, + aggFieldIndexes) + + val mappedInput = inputDS + .map(mapFunction) + .name(prepareOpName) + + // grouped / keyed aggregation + if (groupingKeys.length > 0) { + + val winFunction = + createWindowIncrementalAggregationFunction( + aggregates, + groupingOffsetMapping, + aggOffsetMapping, + getRowType.getFieldCount, + intermediateRowArity, + window, + namedProperties) + + val keyedStream = mappedInput.keyBy(groupingKeys: _*) + val windowedStream = createKeyedWindowedStream(window, keyedStream) + .asInstanceOf[WindowedStream[Row, Tuple, DataStreamWindow]] + + windowedStream + .apply(reduceFunction, winFunction) + .returns(rowTypeInfo) + .name(keyedAggOpName) + .asInstanceOf[DataStream[Any]] + } + // global / non-keyed aggregation + else { + val winFunction = + createAllWindowIncrementalAggregationFunction( + aggregates, + groupingOffsetMapping, + aggOffsetMapping, + getRowType.getFieldCount, + intermediateRowArity, + window, + namedProperties) + + val windowedStream = createNonKeyedWindowedStream(window, mappedInput) + .asInstanceOf[AllWindowedStream[Row, DataStreamWindow]] + windowedStream + .apply(reduceFunction, winFunction) + .returns(rowTypeInfo) + .name(nonKeyedAggOpName) + .asInstanceOf[DataStream[Any]] + } } - // global / non-keyed aggregation else { - val aggOpName = s"window: ($window), select: ($aggString)" - val aggregateFunction = - createAllWindowAggregationFunction(window, namedProperties, groupReduceFunction) - - val windowedStream = createNonKeyedWindowedStream(window, mappedInput) - .asInstanceOf[AllWindowedStream[Row, DataStreamWindow]] - - windowedStream - .apply(aggregateFunction) - .returns(rowTypeInfo) - .name(aggOpName) - .asInstanceOf[DataStream[Any]] + // do non-Incremental Aggregation + // add grouping fields, position keys in the input, and input type + val (mapFunction, groupReduceFunction) = AggregateUtil.createOperatorFunctionsForAggregates( --- End diff -- Separate into `createPrepareMapFunction()` and `createAggregateGroupReduceFunction()`. > Add incremental group window aggregation for streaming Table API > ---------------------------------------------------------------- > > Key: FLINK-4937 > URL: https://issues.apache.org/jira/browse/FLINK-4937 > Project: Flink > Issue Type: Sub-task > Components: Table API & SQL > Affects Versions: 1.2.0 > Reporter: Fabian Hueske > Assignee: sunjincheng > > Group-window aggregates for streaming tables are currently not done in an > incremental fashion. This means that the window collects all records and > performs the aggregation when the window is closed instead of eagerly > updating a partial aggregate for every added record. Since records are > buffered, non-incremental aggregation requires more storage space than > incremental aggregation. > The DataStream API which is used under the hood of the streaming Table API > features [incremental > aggregation|https://ci.apache.org/projects/flink/flink-docs-release-1.2/dev/windows.html#windowfunction-with-incremental-aggregation] > using a {{ReduceFunction}}. > We should add support for incremental aggregation in group-windows. > This is a follow-up task of FLINK-4691. -- This message was sent by Atlassian JIRA (v6.3.4#6332)