XComp commented on a change in pull request #14847:
URL: https://github.com/apache/flink/pull/14847#discussion_r577152942



##########
File path: 
flink-runtime/src/test/java/org/apache/flink/runtime/scheduler/DefaultSchedulerTest.java
##########
@@ -605,6 +625,414 @@ public void abortPendingCheckpointsWhenRestartingTasks() 
throws Exception {
         assertThat(checkpointCoordinator.getNumberOfPendingCheckpoints(), 
is(equalTo(0)));
     }
 
+    @Test
+    public void testStopWithSavepointFailingAfterSavepointCreation() throws 
Exception {
+        // initially, we don't allow any restarts since the first phase 
(savepoint creation)
+        // succeeds without any failures
+        testRestartBackoffTimeStrategy.setCanRestart(false);
+
+        final JobGraph jobGraph = 
createTwoVertexJobGraphWithCheckpointingEnabled();
+
+        final SimpleAckingTaskManagerGateway taskManagerGateway =
+                new SimpleAckingTaskManagerGateway();
+        final CountDownLatch checkpointTriggeredLatch =
+                getCheckpointTriggeredLatch(taskManagerGateway);
+
+        // collect executions to which the checkpoint completion was confirmed
+        final List<ExecutionAttemptID> 
executionAttemptIdsWithCompletedCheckpoint =
+                new ArrayList<>();
+        taskManagerGateway.setNotifyCheckpointCompleteConsumer(
+                (executionAttemptId, jobId, actualCheckpointId, timestamp) ->
+                        
executionAttemptIdsWithCompletedCheckpoint.add(executionAttemptId));
+        taskManagerGateway.setNotifyCheckpointAbortedConsumer(
+                (ignored0, ignored1, ignored2, ignored3) -> {
+                    throw new 
UnsupportedOperationException("notifyCheckpointAborted was called");
+                });
+
+        final DefaultScheduler scheduler = 
createSchedulerAndStartScheduling(jobGraph);
+
+        final ExecutionAttemptID succeedingExecutionAttemptId =
+                
Iterables.get(scheduler.getExecutionGraph().getAllExecutionVertices(), 0)
+                        .getCurrentExecutionAttempt()
+                        .getAttemptId();
+        final ExecutionAttemptID failingExecutionAttemptId =
+                
Iterables.getLast(scheduler.getExecutionGraph().getAllExecutionVertices())
+                        .getCurrentExecutionAttempt()
+                        .getAttemptId();
+
+        // we have to make sure that the tasks are running before 
stop-with-savepoint is triggered
+        scheduler.updateTaskExecutionState(
+                new TaskExecutionState(
+                        jobGraph.getJobID(), failingExecutionAttemptId, 
ExecutionState.RUNNING));
+        scheduler.updateTaskExecutionState(
+                new TaskExecutionState(
+                        jobGraph.getJobID(), succeedingExecutionAttemptId, 
ExecutionState.RUNNING));
+
+        final String savepointFolder = 
TEMPORARY_FOLDER.newFolder().getAbsolutePath();
+
+        // trigger savepoint and wait for checkpoint to be retrieved by 
TaskManagerGateway
+        final CompletableFuture<String> stopWithSavepointFuture =
+                scheduler.stopWithSavepoint(savepointFolder, false);
+        checkpointTriggeredLatch.await();
+
+        acknowledgePendingCheckpoint(scheduler, 1);
+
+        assertThat(
+                "Both the executions where notified about the completed 
checkpoint.",
+                executionAttemptIdsWithCompletedCheckpoint,
+                containsInAnyOrder(failingExecutionAttemptId, 
succeedingExecutionAttemptId));
+
+        // The savepoint creation succeeded a failure happens in the second 
phase when finishing
+        // the tasks. That's why, the restarting policy is enabled.
+        testRestartBackoffTimeStrategy.setCanRestart(true);
+
+        scheduler.updateTaskExecutionState(
+                new TaskExecutionState(
+                        jobGraph.getJobID(), failingExecutionAttemptId, 
ExecutionState.FAILED));
+        scheduler.updateTaskExecutionState(
+                new TaskExecutionState(
+                        jobGraph.getJobID(),
+                        succeedingExecutionAttemptId,
+                        ExecutionState.FINISHED));
+
+        // the restarts due to local failure handling and global job fail-over 
are triggered
+        assertThat(taskRestartExecutor.getNonPeriodicScheduledTask(), 
hasSize(2));

Review comment:
       `StopWithSavepointContextTest` replaces this test focussing more on the 
actually functionality we want to test.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to