flink??????1.13.1
hdfs??3+????
??????????
2022-06-24 10:58:19,839 INFO
org.apache.flink.runtime.checkpoint.CheckpointCoordinator [] - Decline
checkpoint 1101 by task b3d88f9ef72bda003056856c4422742d of job
6bd7dc46451f01e008762c9b556cb08f at zhaohy4-test-taskmanager-1-1 @ 10.42.5.55
(dataPort=40558).
org.apache.flink.util.SerializedThrowable: Asynchronous task checkpoint failed.
at
org.apache.flink.streaming.runtime.tasks.AsyncCheckpointRunnable.handleExecutionException(AsyncCheckpointRunnable.java:279)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.streaming.runtime.tasks.AsyncCheckpointRunnable.run(AsyncCheckpointRunnable.java:175)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[?:1.8.0_202]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[?:1.8.0_202]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_202]
Caused by: org.apache.flink.util.SerializedThrowable: Could not materialize
checkpoint 1101 for operator IntervalJoin(joinType=[InnerJoin],
windowBounds=[isRowTime=true, leftLowerBound=-1296000000,
leftUpperBound=1296000000, leftTimeIndex=4, rightTimeIndex=4],
where=[((hire_contract_id = id) AND (last_modify_time >= (last_modify_time0
- 1296000000:INTERVAL DAY)) AND (last_modify_time <= (last_modify_time0 +
1296000000:INTERVAL DAY)))], select=[hire_contract_id, hire_status_code,
sign_date, confirm_date, last_modify_time, proctime, id, hire_contract_code,
ziroom_version_id, is_del, last_modify_time0]) ->
Calc(select=[hire_contract_id, hire_status_code, sign_date, confirm_date,
last_modify_time, proctime, hire_contract_code, ziroom_version_id, is_del AS
is_del0, last_modify_time0]) (1/1)#3.
at
org.apache.flink.streaming.runtime.tasks.AsyncCheckpointRunnable.handleExecutionException(AsyncCheckpointRunnable.java:257)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
... 4 more
Caused by: org.apache.flink.util.SerializedThrowable: java.io.IOException:
Could not flush to file and close the file system output stream to
hdfs://zrHdfsHa/user/flink/checkpointsdata/6bd7dc46451f01e008762c9b556cb08f/shared/5a5118ba-427f-4234-8e36-ec8d24418fe4
in order to obtain the stream state handle
at
java.util.concurrent.FutureTask.report(FutureTask.java:122) ~[?:1.8.0_202]
at
java.util.concurrent.FutureTask.get(FutureTask.java:192) ~[?:1.8.0_202]
at
org.apache.flink.runtime.concurrent.FutureUtils.runIfNotDoneAndGet(FutureUtils.java:636)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.streaming.api.operators.OperatorSnapshotFinalizer.<init>(OperatorSnapshotFinalizer.java:54)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.streaming.runtime.tasks.AsyncCheckpointRunnable.run(AsyncCheckpointRunnable.java:128)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
... 3 more
Caused by: org.apache.flink.util.SerializedThrowable: Could not flush to file
and close the file system output stream to
hdfs://zrHdfsHa/user/flink/checkpointsdata/6bd7dc46451f01e008762c9b556cb08f/shared/5a5118ba-427f-4234-8e36-ec8d24418fe4
in order to obtain the stream state handle
at
org.apache.flink.runtime.state.filesystem.FsCheckpointStreamFactory$FsCheckpointStateOutputStream.closeAndGetHandle(FsCheckpointStreamFactory.java:373)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.contrib.streaming.state.RocksDBStateUploader.uploadLocalFileToCheckpointFs(RocksDBStateUploader.java:143)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.contrib.streaming.state.RocksDBStateUploader.lambda$createUploadFutures$0(RocksDBStateUploader.java:101)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.util.function.CheckedSupplier.lambda$unchecked$0(CheckedSupplier.java:32)
~[flink-core-1.13.1.jar:1.13.1]
at
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1590)
~[?:1.8.0_202]
... 3 more
Caused by: org.apache.flink.util.SerializedThrowable: Unable to close file
because the last
blockBP-1965840142-10.216.138.23-1585685654447:blk_2926076096_1852445656 does
not have enough number of replicas.
at
org.apache.hadoop.hdfs.DFSOutputStream.completeFile(DFSOutputStream.java:966)
~[flink-shaded-hadoop-3-uber-3.1.1.7.2.1.0-327-9.0.jar:3.1.1.7.2.1.0-327-9.0]
at
org.apache.hadoop.hdfs.DFSOutputStream.completeFile(DFSOutputStream.java:909)
~[flink-shaded-hadoop-3-uber-3.1.1.7.2.1.0-327-9.0.jar:3.1.1.7.2.1.0-327-9.0]
at
org.apache.hadoop.hdfs.DFSOutputStream.closeImpl(DFSOutputStream.java:892)
~[flink-shaded-hadoop-3-uber-3.1.1.7.2.1.0-327-9.0.jar:3.1.1.7.2.1.0-327-9.0]
at
org.apache.hadoop.hdfs.DFSOutputStream.close(DFSOutputStream.java:847)
~[flink-shaded-hadoop-3-uber-3.1.1.7.2.1.0-327-9.0.jar:3.1.1.7.2.1.0-327-9.0]
at
org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
~[flink-shaded-hadoop-3-uber-3.1.1.7.2.1.0-327-9.0.jar:3.1.1.7.2.1.0-327-9.0]
at
org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:101)
~[flink-shaded-hadoop-3-uber-3.1.1.7.2.1.0-327-9.0.jar:3.1.1.7.2.1.0-327-9.0]
at
org.apache.flink.runtime.fs.hdfs.HadoopDataOutputStream.close(HadoopDataOutputStream.java:52)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.core.fs.ClosingFSDataOutputStream.close(ClosingFSDataOutputStream.java:64)
~[flink-core-1.13.1.jar:1.13.1]
at
org.apache.flink.runtime.state.filesystem.FsCheckpointStreamFactory$FsCheckpointStateOutputStream.closeAndGetHandle(FsCheckpointStreamFactory.java:354)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.contrib.streaming.state.RocksDBStateUploader.uploadLocalFileToCheckpointFs(RocksDBStateUploader.java:143)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.contrib.streaming.state.RocksDBStateUploader.lambda$createUploadFutures$0(RocksDBStateUploader.java:101)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.util.function.CheckedSupplier.lambda$unchecked$0(CheckedSupplier.java:32)
~[flink-core-1.13.1.jar:1.13.1]
at
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1590)
~[?:1.8.0_202]
... 3 more
2022-06-24 10:58:19,844 INFO org.apache.flink.runtime.jobmaster.JobMaster
[] - Trying to recover from a global failure.
org.apache.flink.util.FlinkRuntimeException: Exceeded checkpoint tolerable
failure threshold.
at
org.apache.flink.runtime.checkpoint.CheckpointFailureManager.handleCheckpointException(CheckpointFailureManager.java:98)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.runtime.checkpoint.CheckpointFailureManager.handleTaskLevelCheckpointException(CheckpointFailureManager.java:84)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.runtime.checkpoint.CheckpointCoordinator.abortPendingCheckpoint(CheckpointCoordinator.java:1931)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.runtime.checkpoint.CheckpointCoordinator.receiveDeclineMessage(CheckpointCoordinator.java:991)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.runtime.scheduler.ExecutionGraphHandler.lambda$declineCheckpoint$2(ExecutionGraphHandler.java:103)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
org.apache.flink.runtime.scheduler.ExecutionGraphHandler.lambda$processCheckpointCoordinatorMessage$3(ExecutionGraphHandler.java:119)
~[flink-dist_2.11-1.13.1.jar:1.13.1]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
~[?:1.8.0_202]
at
java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_202]
at
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
~[?:1.8.0_202]
at
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
~[?:1.8.0_202]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
~[?:1.8.0_202]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
~[?:1.8.0_202]
at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_202]
2022-06-24 10:58:19,846 INFO
org.apache.flink.runtime.executiongraph.ExecutionGraph [] - Job
zhaohy4-test (6bd7dc46451f01e008762c9b556cb08f) switched from state RUNNING to
RESTARTING.