[ https://issues.apache.org/jira/browse/FLINK-4391?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15635140#comment-15635140 ]
ASF GitHub Bot commented on FLINK-4391: --------------------------------------- Github user bjlovegithub commented on a diff in the pull request: https://github.com/apache/flink/pull/2629#discussion_r86486055 --- Diff: flink-examples/flink-examples-streaming/src/main/java/org/apache/flink/streaming/examples/async/AsyncIOExample.java --- @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.examples.async; + +import org.apache.flink.api.common.functions.FlatMapFunction; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.runtime.state.filesystem.FsStateBackend; +import org.apache.flink.streaming.api.CheckpointingMode; +import org.apache.flink.streaming.api.TimeCharacteristic; +import org.apache.flink.streaming.api.checkpoint.Checkpointed; +import org.apache.flink.streaming.api.datastream.AsyncDataStream; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.functions.async.AsyncFunction; +import org.apache.flink.streaming.api.functions.async.RichAsyncFunction; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.streaming.api.operators.async.AsyncCollector; +import org.apache.flink.util.Collector; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +/** + * Example to illustrates how to use {@link org.apache.flink.streaming.api.functions.async.AsyncFunction} + */ +public class AsyncIOExample { + + /** + * A checkpointed source. + */ + private static class SimpleSource implements SourceFunction<Integer>, Checkpointed<Integer> { + private static final long serialVersionUID = 1L; + + private volatile boolean isRunning = true; + private int counter = 0; + private int start = 0; + + @Override + public void restoreState(Integer state) throws Exception { + this.start = state; + } + + @Override + public Integer snapshotState(long checkpointId, long checkpointTimestamp) throws Exception { + return start; + } + + public SimpleSource(int maxNum) { + this.counter = maxNum; + } + + @Override + public void run(SourceContext<Integer> ctx) throws Exception { + while (start < counter && isRunning) { + synchronized (ctx.getCheckpointLock()) { + ctx.collect(start); + ++start; + } + Thread.sleep(10); + } + } + + @Override + public void cancel() { + isRunning = false; + } + } + + + public static void main(String[] args) throws Exception { + + // obtain execution environment and set setBufferTimeout to 1 to enable + // continuous flushing of the output buffers (lowest latency) + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment() + .setBufferTimeout(1); + + // configurations for the job + String statePath = args[0]; + String cpMode = args[1]; + int maxCount = Integer.valueOf(args[2]); + final int sleepFactor = Integer.valueOf(args[3]); + final float failRatio = Float.valueOf(args[4]); + String mode = args[5]; + int taskNum = Integer.valueOf(args[6]); + String timeType = args[7]; + + // setup state and checkpoint mode + env.setStateBackend(new FsStateBackend(statePath)); + if (cpMode.equals("exactly_once")) { + env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE); + } + else { + env.enableCheckpointing(1000, CheckpointingMode.AT_LEAST_ONCE); + env.disableOperatorChaining(); + } + + // enable watermark or not + if (timeType.equals("EventTime")) { + env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); + } + else if (timeType.equals("IngestionTime")) { + env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); + } + + // create input stream of integer pairs --- End diff -- fixed > Provide support for asynchronous operations over streams > -------------------------------------------------------- > > Key: FLINK-4391 > URL: https://issues.apache.org/jira/browse/FLINK-4391 > Project: Flink > Issue Type: New Feature > Components: DataStream API > Reporter: Jamie Grier > Assignee: david.wang > > Many Flink users need to do asynchronous processing driven by data from a > DataStream. The classic example would be joining against an external > database in order to enrich a stream with extra information. > It would be nice to add general support for this type of operation in the > Flink API. Ideally this could simply take the form of a new operator that > manages async operations, keeps so many of them in flight, and then emits > results to downstream operators as the async operations complete. -- This message was sent by Atlassian JIRA (v6.3.4#6332)