flink??????1.13.1
hdfs??3+????
??????????

2022-06-24 10:58:19,839 INFO  
org.apache.flink.runtime.checkpoint.CheckpointCoordinator    [] - Decline 
checkpoint 1101 by task b3d88f9ef72bda003056856c4422742d of job 
6bd7dc46451f01e008762c9b556cb08f at zhaohy4-test-taskmanager-1-1 @ 10.42.5.55 
(dataPort=40558).

org.apache.flink.util.SerializedThrowable: Asynchronous task checkpoint failed.

    at 
org.apache.flink.streaming.runtime.tasks.AsyncCheckpointRunnable.handleExecutionException(AsyncCheckpointRunnable.java:279)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

    at 
org.apache.flink.streaming.runtime.tasks.AsyncCheckpointRunnable.run(AsyncCheckpointRunnable.java:175)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

    at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
[?:1.8.0_202]

    at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
[?:1.8.0_202]

    at java.lang.Thread.run(Thread.java:748) [?:1.8.0_202]

Caused by: org.apache.flink.util.SerializedThrowable: Could not materialize 
checkpoint 1101 for operator IntervalJoin(joinType=[InnerJoin], 
windowBounds=[isRowTime=true, leftLowerBound=-1296000000, 
leftUpperBound=1296000000, leftTimeIndex=4, rightTimeIndex=4], 
where=[((hire_contract_id = id) AND (last_modify_time >= (last_modify_time0 
- 1296000000:INTERVAL DAY)) AND (last_modify_time <= (last_modify_time0 + 
1296000000:INTERVAL DAY)))], select=[hire_contract_id, hire_status_code, 
sign_date, confirm_date, last_modify_time, proctime, id, hire_contract_code, 
ziroom_version_id, is_del, last_modify_time0]) -&gt; 
Calc(select=[hire_contract_id, hire_status_code, sign_date, confirm_date, 
last_modify_time, proctime, hire_contract_code, ziroom_version_id, is_del AS 
is_del0, last_modify_time0]) (1/1)#3.

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.streaming.runtime.tasks.AsyncCheckpointRunnable.handleExecutionException(AsyncCheckpointRunnable.java:257)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;... 4 more

Caused by: org.apache.flink.util.SerializedThrowable: java.io.IOException: 
Could not flush to file and close the file system output stream to 
hdfs://zrHdfsHa/user/flink/checkpointsdata/6bd7dc46451f01e008762c9b556cb08f/shared/5a5118ba-427f-4234-8e36-ec8d24418fe4
 in order to obtain the stream state handle

&nbsp;&nbsp;&nbsp;&nbsp;at 
java.util.concurrent.FutureTask.report(FutureTask.java:122) ~[?:1.8.0_202]

&nbsp;&nbsp;&nbsp;&nbsp;at 
java.util.concurrent.FutureTask.get(FutureTask.java:192) ~[?:1.8.0_202]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.runtime.concurrent.FutureUtils.runIfNotDoneAndGet(FutureUtils.java:636)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.streaming.api.operators.OperatorSnapshotFinalizer.<init&gt;(OperatorSnapshotFinalizer.java:54)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.streaming.runtime.tasks.AsyncCheckpointRunnable.run(AsyncCheckpointRunnable.java:128)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;... 3 more

Caused by: org.apache.flink.util.SerializedThrowable: Could not flush to file 
and close the file system output stream to 
hdfs://zrHdfsHa/user/flink/checkpointsdata/6bd7dc46451f01e008762c9b556cb08f/shared/5a5118ba-427f-4234-8e36-ec8d24418fe4
 in order to obtain the stream state handle

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.runtime.state.filesystem.FsCheckpointStreamFactory$FsCheckpointStateOutputStream.closeAndGetHandle(FsCheckpointStreamFactory.java:373)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.contrib.streaming.state.RocksDBStateUploader.uploadLocalFileToCheckpointFs(RocksDBStateUploader.java:143)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.contrib.streaming.state.RocksDBStateUploader.lambda$createUploadFutures$0(RocksDBStateUploader.java:101)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.util.function.CheckedSupplier.lambda$unchecked$0(CheckedSupplier.java:32)
 ~[flink-core-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1590)
 ~[?:1.8.0_202]

&nbsp;&nbsp;&nbsp;&nbsp;... 3 more

Caused by: org.apache.flink.util.SerializedThrowable: Unable to close file 
because the last 
blockBP-1965840142-10.216.138.23-1585685654447:blk_2926076096_1852445656 does 
not have enough number of replicas.

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.hadoop.hdfs.DFSOutputStream.completeFile(DFSOutputStream.java:966) 
~[flink-shaded-hadoop-3-uber-3.1.1.7.2.1.0-327-9.0.jar:3.1.1.7.2.1.0-327-9.0]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.hadoop.hdfs.DFSOutputStream.completeFile(DFSOutputStream.java:909) 
~[flink-shaded-hadoop-3-uber-3.1.1.7.2.1.0-327-9.0.jar:3.1.1.7.2.1.0-327-9.0]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.hadoop.hdfs.DFSOutputStream.closeImpl(DFSOutputStream.java:892) 
~[flink-shaded-hadoop-3-uber-3.1.1.7.2.1.0-327-9.0.jar:3.1.1.7.2.1.0-327-9.0]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.hadoop.hdfs.DFSOutputStream.close(DFSOutputStream.java:847) 
~[flink-shaded-hadoop-3-uber-3.1.1.7.2.1.0-327-9.0.jar:3.1.1.7.2.1.0-327-9.0]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
 ~[flink-shaded-hadoop-3-uber-3.1.1.7.2.1.0-327-9.0.jar:3.1.1.7.2.1.0-327-9.0]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:101) 
~[flink-shaded-hadoop-3-uber-3.1.1.7.2.1.0-327-9.0.jar:3.1.1.7.2.1.0-327-9.0]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.runtime.fs.hdfs.HadoopDataOutputStream.close(HadoopDataOutputStream.java:52)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.core.fs.ClosingFSDataOutputStream.close(ClosingFSDataOutputStream.java:64)
 ~[flink-core-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.runtime.state.filesystem.FsCheckpointStreamFactory$FsCheckpointStateOutputStream.closeAndGetHandle(FsCheckpointStreamFactory.java:354)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.contrib.streaming.state.RocksDBStateUploader.uploadLocalFileToCheckpointFs(RocksDBStateUploader.java:143)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.contrib.streaming.state.RocksDBStateUploader.lambda$createUploadFutures$0(RocksDBStateUploader.java:101)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.util.function.CheckedSupplier.lambda$unchecked$0(CheckedSupplier.java:32)
 ~[flink-core-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1590)
 ~[?:1.8.0_202]

&nbsp;&nbsp;&nbsp;&nbsp;... 3 more

2022-06-24 10:58:19,844 INFO  org.apache.flink.runtime.jobmaster.JobMaster      
           [] - Trying to recover from a global failure.

org.apache.flink.util.FlinkRuntimeException: Exceeded checkpoint tolerable 
failure threshold.

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.runtime.checkpoint.CheckpointFailureManager.handleCheckpointException(CheckpointFailureManager.java:98)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.runtime.checkpoint.CheckpointFailureManager.handleTaskLevelCheckpointException(CheckpointFailureManager.java:84)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.runtime.checkpoint.CheckpointCoordinator.abortPendingCheckpoint(CheckpointCoordinator.java:1931)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.runtime.checkpoint.CheckpointCoordinator.receiveDeclineMessage(CheckpointCoordinator.java:991)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.runtime.scheduler.ExecutionGraphHandler.lambda$declineCheckpoint$2(ExecutionGraphHandler.java:103)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
org.apache.flink.runtime.scheduler.ExecutionGraphHandler.lambda$processCheckpointCoordinatorMessage$3(ExecutionGraphHandler.java:119)
 ~[flink-dist_2.11-1.13.1.jar:1.13.1]

&nbsp;&nbsp;&nbsp;&nbsp;at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
~[?:1.8.0_202]

&nbsp;&nbsp;&nbsp;&nbsp;at 
java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_202]

&nbsp;&nbsp;&nbsp;&nbsp;at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
 ~[?:1.8.0_202]

&nbsp;&nbsp;&nbsp;&nbsp;at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
 ~[?:1.8.0_202]

&nbsp;&nbsp;&nbsp;&nbsp;at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
~[?:1.8.0_202]

&nbsp;&nbsp;&nbsp;&nbsp;at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
~[?:1.8.0_202]

&nbsp;&nbsp;&nbsp;&nbsp;at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_202]

2022-06-24 10:58:19,846 INFO  
org.apache.flink.runtime.executiongraph.ExecutionGraph       [] - Job 
zhaohy4-test (6bd7dc46451f01e008762c9b556cb08f) switched from state RUNNING to 
RESTARTING.

回复