zentol commented on code in PR #24382: URL: https://github.com/apache/flink/pull/24382#discussion_r1502844838
########## flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/DefaultExecutionGraph.java: ########## @@ -1031,34 +1028,18 @@ public void suspend(Throwable suspensionCause) { return; } else if (transitionState(state, JobStatus.SUSPENDED, suspensionCause)) { initFailureCause(suspensionCause, System.currentTimeMillis()); + resetExecutionGraph(ExecutionJobVertex::suspend) + .whenComplete( + (Void ignored, Throwable throwable) -> { + if (throwable != null) { + LOG.debug( + "Could not properly suspend the execution graph.", + throwable); + } - incrementRestarts(); - - // cancel ongoing scheduling action - if (schedulingFuture != null) { - schedulingFuture.cancel(false); - } - final ArrayList<CompletableFuture<Void>> executionJobVertexTerminationFutures = - new ArrayList<>(verticesInCreationOrder.size()); - - for (ExecutionJobVertex ejv : verticesInCreationOrder) { - executionJobVertexTerminationFutures.add(ejv.suspend()); - } - - final ConjunctFuture<Void> jobVerticesTerminationFuture = - FutureUtils.waitForAll(executionJobVertexTerminationFutures); - - checkState(jobVerticesTerminationFuture.isDone(), "Suspend needs to happen atomically"); Review Comment: waitForAll returns a future that completes _at some point_. This check here asserted that it completed _immediately_. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@flink.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org