[ https://issues.apache.org/jira/browse/FLINK-5803?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15882975#comment-15882975 ]
ASF GitHub Bot commented on FLINK-5803: --------------------------------------- Github user fhueske commented on a diff in the pull request: https://github.com/apache/flink/pull/3397#discussion_r102951067 --- Diff: flink-libraries/flink-table/src/main/scala/org/apache/flink/table/plan/nodes/datastream/DataStreamOverAggregate.scala --- @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.flink.table.plan.nodes.datastream + +import org.apache.calcite.plan.{RelOptCluster, RelTraitSet} +import org.apache.calcite.rel.`type`.RelDataType +import org.apache.calcite.rel.core.AggregateCall +import org.apache.calcite.rel.{RelNode, RelWriter, SingleRel} +import org.apache.flink.api.common.typeinfo.TypeInformation +import org.apache.flink.api.java.typeutils.RowTypeInfo +import org.apache.flink.streaming.api.datastream.DataStream +import org.apache.flink.table.api.{StreamTableEnvironment, TableException} +import org.apache.flink.table.calcite.FlinkRelBuilder.NamedWindowProperty +import org.apache.flink.table.calcite.FlinkTypeFactory +import org.apache.flink.table.runtime.aggregate.AggregateUtil.{CalcitePair, _} +import org.apache.flink.table.runtime.aggregate._ +import org.apache.flink.table.plan.nodes.CommonAggregate +import org.apache.flink.types.Row +import org.apache.calcite.rel.core.Window +import org.apache.calcite.rel.core.Window.Group + +import java.util.{List => JList} + +import scala.collection.JavaConverters._ +import scala.collection.immutable.IndexedSeq + +class DataStreamOverAggregate( + logicWindow: Window, + namedProperties: Seq[NamedWindowProperty], + cluster: RelOptCluster, + traitSet: RelTraitSet, + inputNode: RelNode, + rowRelDataType: RelDataType, + inputType: RelDataType) + extends SingleRel(cluster, traitSet, inputNode) + with CommonAggregate + with DataStreamRel { + + override def deriveRowType(): RelDataType = rowRelDataType + + override def copy(traitSet: RelTraitSet, inputs: JList[RelNode]): RelNode = { + new DataStreamOverAggregate( + logicWindow, + namedProperties, + cluster, + traitSet, + inputs.get(0), + getRowType, + inputType) + } + + override def toString: String = { + val ( + overWindow: Group, + partitionKeys: Array[Int], + namedAggregates: IndexedSeq[CalcitePair[AggregateCall, String]] + ) = genPartitionKeysAndNamedAggregates + + s"Aggregate(${ + if (!partitionKeys.isEmpty) { + s"partitionBy: (${groupingToString(inputType, partitionKeys)}), " + } else { + "" + } + }window: ($overWindow), " + + s"select: (${ + aggregationToString( + inputType, + partitionKeys, + getRowType, + namedAggregates, + namedProperties) + }))" + } + + override def explainTerms(pw: RelWriter): RelWriter = { + val ( + overWindow: Group, + partitionKeys: Array[Int], + namedAggregates: IndexedSeq[CalcitePair[AggregateCall, String]] + ) = genPartitionKeysAndNamedAggregates + + super.explainTerms(pw) + .itemIf("partitionBy", groupingToString(inputType, partitionKeys), !partitionKeys.isEmpty) + .item("overWindow", overWindow) + .item( + "select", aggregationToString( + inputType, + partitionKeys, + getRowType, + namedAggregates, + namedProperties)) + } + + override def translateToPlan(tableEnv: StreamTableEnvironment): DataStream[Row] = { + + if (logicWindow.groups.size > 1) { + throw new UnsupportedOperationException( + "Unsupported different over window in the same projection") + } + + val overWindow: org.apache.calcite.rel.core.Window.Group = logicWindow.groups.get(0) + + val inputDS = input.asInstanceOf[DataStreamRel].translateToPlan(tableEnv) + + // TODO timeDomain will be fixed, once FLINK-5884 is solved + // both ROWS and RANGE clause with UNBOUNDED PRECEDING and CURRENT ROW condition. + if (/*logicWindow.timeDomain.equals( + TimeDomain.PROCESSING_TIME && */ + overWindow.lowerBound.isUnbounded && overWindow.upperBound.isCurrentRow) { + createUnboundedAndCurrentRowProcessingTimeOverWindow(inputDS) + } else { + throw new UnsupportedOperationException( + "Over window only support UNBOUNDED PRECEDING and CURRENT ROW condition.") + } + + } + + def createUnboundedAndCurrentRowProcessingTimeOverWindow( + inputDS: DataStream[Row]): DataStream[Row] = { + + val ( + overWindow: Group, + partitionKeys: Array[Int], + namedAggregates: IndexedSeq[CalcitePair[AggregateCall, String]] + ) = genPartitionKeysAndNamedAggregates + + val allInputProjections = (for (i <- 0 until inputType.getFieldCount) yield i).toArray --- End diff -- `(0 until inputType.getFieldCount).toArray` > Add [partitioned] processing time OVER RANGE BETWEEN UNBOUNDED PRECEDING > aggregation to SQL > ------------------------------------------------------------------------------------------- > > Key: FLINK-5803 > URL: https://issues.apache.org/jira/browse/FLINK-5803 > Project: Flink > Issue Type: Sub-task > Components: Table API & SQL > Reporter: sunjincheng > Assignee: sunjincheng > > The goal of this issue is to add support for OVER RANGE aggregations on > processing time streams to the SQL interface. > Queries similar to the following should be supported: > {code} > SELECT > a, > SUM(b) OVER (PARTITION BY c ORDER BY procTime() RANGE BETWEEN UNBOUNDED > PRECEDING AND CURRENT ROW) AS sumB, > MIN(b) OVER (PARTITION BY c ORDER BY procTime() RANGE BETWEEN UNBOUNDED > PRECEDING AND CURRENT ROW) AS minB > FROM myStream > {code} > The following restrictions should initially apply: > - All OVER clauses in the same SELECT clause must be exactly the same. > - The ORDER BY clause may only have procTime() as parameter. procTime() is a > parameterless scalar function that just indicates processing time mode. > - bounded PRECEDING is not supported (see FLINK-5654) > - FOLLOWING is not supported. > The restrictions will be resolved in follow up issues. If we find that some > of the restrictions are trivial to address, we can add the functionality in > this issue as well. > This issue includes: > - Design of the DataStream operator to compute OVER ROW aggregates > - Translation from Calcite's RelNode representation (LogicalProject with > RexOver expression). -- This message was sent by Atlassian JIRA (v6.3.15#6346)