[ https://issues.apache.org/jira/browse/FLINK-6232?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16085369#comment-16085369 ]
ASF GitHub Bot commented on FLINK-6232: --------------------------------------- Github user wuchong commented on a diff in the pull request: https://github.com/apache/flink/pull/4266#discussion_r127129047 --- Diff: flink-libraries/flink-table/src/main/scala/org/apache/flink/table/runtime/join/WindowJoinUtil.scala --- @@ -0,0 +1,349 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.flink.table.runtime.join + +import java.math.{BigDecimal => JBigDecimal} +import java.util + +import org.apache.calcite.plan.RelOptUtil +import org.apache.calcite.rel.`type`.RelDataType +import org.apache.calcite.rel.core.JoinRelType +import org.apache.calcite.rex._ +import org.apache.calcite.sql.SqlKind +import org.apache.flink.api.common.functions.FlatJoinFunction +import org.apache.flink.api.common.typeinfo.TypeInformation +import org.apache.flink.table.api.{TableConfig, TableException} +import org.apache.flink.table.calcite.FlinkTypeFactory +import org.apache.flink.table.codegen.{CodeGenerator, ExpressionReducer} +import org.apache.flink.table.plan.schema.{RowSchema, TimeIndicatorRelDataType} +import org.apache.flink.types.Row + +import scala.collection.JavaConverters._ + +/** + * An util class to help analyze and build join code . + */ +object WindowJoinUtil { + + /** + * Analyze time-condtion to get time boundary for each stream and get the time type + * and return remain condition. + * + * @param condition join condition + * @param leftLogicalFieldCnt left stream logical field num + * @param inputSchema join rowtype schema + * @param rexBuilder util to build rexNode + * @param config table environment config + * @return isRowTime, left lower boundary, right lower boundary, remain condition + */ + private[flink] def analyzeTimeBoundary( + condition: RexNode, + leftLogicalFieldCnt: Int, + inputSchema: RowSchema, + rexBuilder: RexBuilder, + config: TableConfig): (Boolean, Long, Long, Option[RexNode]) = { + + // Converts the condition to conjunctive normal form (CNF) + val cnfCondition = RexUtil.toCnf(rexBuilder, condition) + + // split the condition into time indicator condition and other condition + val (timeTerms, remainTerms) = cnfCondition match { + case c: RexCall if cnfCondition.getKind == SqlKind.AND => + c.getOperands.asScala + .map(analyzeCondtionTermType(_, leftLogicalFieldCnt, inputSchema.logicalType)) + .reduceLeft((l, r) => { + (l._1 ++ r._1, l._2 ++ r._2) + }) + case _ => + throw new TableException("A time-based stream join requires exactly " + + "two join predicates that bound the time in both directions.") + } + + if (timeTerms.size != 2) { + throw new TableException("A time-based stream join requires exactly " + + "two join predicates that bound the time in both directions.") + } + + // extract time offset from the time indicator conditon + val streamTimeOffsets = + timeTerms.map(x => extractTimeOffsetFromCondition(x._3, x._2, rexBuilder, config)) + + val (leftLowerBound, leftUpperBound) = + streamTimeOffsets match { + case Seq((x, true), (y, false)) => (x, y) + case Seq((x, false), (y, true)) => (y, x) + case _ => + throw new TableException( + "Time-based join conditions must reference the time attribute of both input tables.") + } + + // compose the remain condition list into one condition + val remainCondition = + remainTerms match { + case Seq() => None + case _ => + // Converts logical field references to physical ones. + Some(remainTerms.map(inputSchema.mapRexNode).reduceLeft((l, r) => { + RelOptUtil.andJoinFilters(rexBuilder, l, r) + })) + } + + val isRowTime: Boolean = timeTerms(0)._1 match { + case x if FlinkTypeFactory.isProctimeIndicatorType(x) => false + case _ => true + } + (isRowTime, leftLowerBound, leftUpperBound, remainCondition) + } + + /** + * Split the join conditions into time condition and non-time condition + * + * @return (Seq(timeTerms), Seq(remainTerms)), + */ + private def analyzeCondtionTermType( + conditionTerm: RexNode, + leftFieldCount: Int, + inputType: RelDataType): (Seq[(RelDataType, Boolean, RexNode)], Seq[RexNode]) = { + + conditionTerm match { + case c: RexCall if Seq(SqlKind.GREATER_THAN, SqlKind.GREATER_THAN_OR_EQUAL, + SqlKind.LESS_THAN, SqlKind.LESS_THAN_OR_EQUAL).contains(c.getKind) => + val timeIndicators = extractTimeIndicatorAccesses(c, leftFieldCount, inputType) + timeIndicators match { + case Seq() => + (Seq(), Seq(c)) + case Seq(v1, v2) => + if (v1._1 != v2._1) { + throw new TableException( + "Both time attributes in a join condition must be of the same type.") + } + if (v1._2 == v2._2) { + throw new TableException("Time-based join conditions " + + "must reference the time attribute of both input tables.") + } + (Seq((v1._1, v1._2, c)), Seq()) + case _ => + throw new TableException( + "Time-based join conditions must reference the time attribute of both input tables.") + } + case other => + val timeIndicators = extractTimeIndicatorAccesses(other, leftFieldCount, inputType) + timeIndicators match { + case Seq() => + (Seq(), Seq(other)) + case _ => + throw new TableException("Time indicators can not be used in non time-condition.") + } + } + } + + /** + * Extracts all time indicator attributes that are accessed in an expression. + * + * @return seq(timeType, is left input time indicator) + */ + def extractTimeIndicatorAccesses( + expression: RexNode, + leftFieldCount: Int, + inputType: RelDataType): Seq[(RelDataType, Boolean)] = { + + expression match { + case i: RexInputRef => + val idx = i.getIndex + inputType.getFieldList.get(idx).getType match { + case t: TimeIndicatorRelDataType if idx < leftFieldCount => + // left table time indicator + Seq((t, true)) + case t: TimeIndicatorRelDataType => + // right table time indicator + Seq((t, false)) + case _ => Seq() + } + case c: RexCall => + c.operands.asScala + .map(extractTimeIndicatorAccesses(_, leftFieldCount, inputType)) + .reduce(_ ++ _) + case _ => Seq() + } + } + + /** + * Computes the absolute bound on the left operand of a comparison expression and + * whether the bound is an upper or lower bound. + * + * @return window boundary, is left lower bound + */ + def extractTimeOffsetFromCondition( + timeTerm: RexNode, + isLeftExprBelongLeftTable: Boolean, + rexBuilder: RexBuilder, + config: TableConfig): (Long, Boolean) = { + + val timeCall: RexCall = timeTerm.asInstanceOf[RexCall] + + val isLeftLowerBound: Boolean = + timeTerm.getKind match { + // e.g a.proctime > b.proctime - 5 sec, then it's the lower bound of a and the value is -5 + // e.g b.proctime > a.proctime - 5 sec, then it's not the lower bound of a but upper bound + case kind@(SqlKind.GREATER_THAN | SqlKind.GREATER_THAN_OR_EQUAL) => + isLeftExprBelongLeftTable + // e.g a.proctime < b.proctime + 5 sec, the the upper bound of a is 5 + case kind@(SqlKind.LESS_THAN | SqlKind.LESS_THAN_OR_EQUAL) => + !isLeftExprBelongLeftTable + case _ => + throw new TableException("Unsupported time-condition.") + } + + val (leftLiteral, rightLiteral) = + reduceTimeExpression( + timeCall.operands.get(0), + timeCall.operands.get(1), + rexBuilder, + config) + val tmpTimeOffset: Long = + if (isLeftExprBelongLeftTable) rightLiteral - leftLiteral else leftLiteral - rightLiteral + + val boundary = + tmpTimeOffset.signum * ( + if (timeTerm.getKind == SqlKind.LESS_THAN || timeTerm.getKind == SqlKind.GREATER_THAN) { + tmpTimeOffset.abs - 1 + } else { + tmpTimeOffset.abs + }) + + (boundary, isLeftLowerBound) + } + + /** + * Calculates the time boundary by replacing the time attribute by a zero literal + * and reducing the expression. + * For example: + * b.proctime - interval '1' second - interval '2' second will be translated to + * 0 - 1000 - 2000 + */ + private def reduceTimeExpression( + leftRexNode: RexNode, + rightRexNode: RexNode, + rexBuilder: RexBuilder, + config: TableConfig): (Long, Long) = { + + /** + * replace the rowtime/proctime with zero literal. + */ + def replaceTimeFieldWithLiteral(expr: RexNode): RexNode = { + expr match { + case c: RexCall => + // replace in call operands + val newOps = c.operands.asScala.map(replaceTimeFieldWithLiteral(_)).asJava + rexBuilder.makeCall(c.getType, c.getOperator, newOps) + case i: RexInputRef if FlinkTypeFactory.isTimeIndicatorType(i.getType) => + // replace with timestamp + rexBuilder.makeZeroLiteral(expr.getType) + case _: RexInputRef => + throw new TableException("Time join condition may only reference time indicator fields.") + case _ => expr + } + } + + val literalLeftRex = replaceTimeFieldWithLiteral(leftRexNode) + val literalRightRex = replaceTimeFieldWithLiteral(rightRexNode) + + val exprReducer = new ExpressionReducer(config) + val originList = new util.ArrayList[RexNode]() + originList.add(literalLeftRex) + originList.add(literalRightRex) + val reduceList = new util.ArrayList[RexNode]() + exprReducer.reduce(rexBuilder, originList, reduceList) + + val literals = reduceList.asScala.map(f => f match { --- End diff -- Can be simplified to ```scala val literals = reduceList.asScala.map { case literal: RexLiteral => literal.getValue2.asInstanceOf[Long] case _ => throw TableException( "Time condition may only consist of time attributes, literals, and arithmetic operators.") } ``` > Support proctime inner equi-join between two streams in the SQL API > ------------------------------------------------------------------- > > Key: FLINK-6232 > URL: https://issues.apache.org/jira/browse/FLINK-6232 > Project: Flink > Issue Type: Sub-task > Components: Table API & SQL > Reporter: hongyuhong > Assignee: hongyuhong > > The goal of this issue is to add support for inner equi-join on proc time > streams to the SQL interface. > Queries similar to the following should be supported: > {code} > SELECT o.proctime, o.productId, o.orderId, s.proctime AS shipTime > FROM Orders AS o > JOIN Shipments AS s > ON o.orderId = s.orderId > AND o.proctime BETWEEN s.proctime AND s.proctime + INTERVAL '1' HOUR; > {code} > The following restrictions should initially apply: > * The join hint only support inner join > * The ON clause should include equi-join condition > * The time-condition {{o.proctime BETWEEN s.proctime AND s.proctime + > INTERVAL '1' HOUR}} only can use proctime that is a system attribute, the > time condition only support bounded time range like {{o.proctime BETWEEN > s.proctime - INTERVAL '1' HOUR AND s.proctime + INTERVAL '1' HOUR}}, not > support unbounded like {{o.proctime > s.protime}}, and should include both > two stream's proctime attribute, {{o.proctime between proctime() and > proctime() + 1}} should also not be supported. > This issue includes: > * Design of the DataStream operator to deal with stream join > * Translation from Calcite's RelNode representation (LogicalJoin). -- This message was sent by Atlassian JIRA (v6.4.14#64029)