tillrohrmann commented on a change in pull request #9663: [WIP][FLINK-12433][runtime] Implement DefaultScheduler stub URL: https://github.com/apache/flink/pull/9663#discussion_r326137598
########## File path: flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/DefaultScheduler.java ########## @@ -75,10 +137,281 @@ public DefaultScheduler( slotRequestTimeout, shuffleMaster, partitionTracker); + + this.restartBackoffTimeStrategy = checkNotNull(restartBackoffTimeStrategy); + this.slotRequestTimeout = checkNotNull(slotRequestTimeout); + this.slotProvider = checkNotNull(slotProvider); + this.delayExecutor = checkNotNull(delayExecutor); + this.userCodeLoader = checkNotNull(userCodeLoader); + this.schedulingStrategyFactory = checkNotNull(schedulingStrategyFactory); + this.failoverStrategyFactory = checkNotNull(failoverStrategyFactory); + this.executionVertexOperations = checkNotNull(executionVertexOperations); + this.executionVertexVersioner = checkNotNull(executionVertexVersioner); + this.conditionalFutureHandlerFactory = new ConditionalFutureHandlerFactory(executionVertexVersioner); } + // ------------------------------------------------------------------------ + // SchedulerNG + // ------------------------------------------------------------------------ + @Override - public void startScheduling() { - throw new UnsupportedOperationException(); + public void startSchedulingInternal() { + initializeScheduling(); + schedulingStrategy.startScheduling(); + } + + private void initializeScheduling() { + executionFailureHandler = new ExecutionFailureHandler(failoverStrategyFactory.create(getFailoverTopology()), restartBackoffTimeStrategy); + schedulingStrategy = schedulingStrategyFactory.createInstance(this, getSchedulingTopology(), getJobGraph()); + executionSlotAllocator = new DefaultExecutionSlotAllocator(slotProvider, getInputsLocationsRetriever(), slotRequestTimeout); + setTaskFailureListener(new UpdateTaskExecutionStateInDefaultSchedulerListener(this, getJobGraph().getJobID())); + prepareExecutionGraphForScheduling(); + } + + @Override + public boolean updateTaskExecutionState(final TaskExecutionState taskExecutionState) { + final Optional<ExecutionVertexID> executionVertexIdOptional = getExecutionVertexId(taskExecutionState.getID()); + if (executionVertexIdOptional.isPresent()) { + final ExecutionVertexID executionVertexId = executionVertexIdOptional.get(); + updateState(taskExecutionState); + schedulingStrategy.onExecutionStateChange(executionVertexId, taskExecutionState.getExecutionState()); + maybeHandleTaskFailure(taskExecutionState, executionVertexId); + return true; + } + + return false; + } + + private void maybeHandleTaskFailure(final TaskExecutionState taskExecutionState, final ExecutionVertexID executionVertexId) { + if (taskExecutionState.getExecutionState() == ExecutionState.FAILED) { + final Throwable error = taskExecutionState.getError(userCodeLoader); + handleTaskFailure(executionVertexId, error); + } + } + + private void handleTaskFailure(final ExecutionVertexID executionVertexId, final Throwable error) { + final FailureHandlingResult failureHandlingResult = executionFailureHandler.getFailureHandlingResult(executionVertexId, error); + maybeRestartTasks(failureHandlingResult); + } + + private void maybeRestartTasks(final FailureHandlingResult failureHandlingResult) { + if (failureHandlingResult.canRestart()) { + restartTasksWithDelay(failureHandlingResult); + } else { + failJob(failureHandlingResult.getError()); + } + } + + private void restartTasksWithDelay(final FailureHandlingResult failureHandlingResult) { + final Set<ExecutionVertexID> verticesToRestart = failureHandlingResult.getVerticesToRestart(); + + final Set<ExecutionVertexVersion> executionVertexVersions = + new HashSet<>(executionVertexVersioner.recordVertexModifications(verticesToRestart).values()); + + final CompletableFuture<?> cancelFuture = cancelTasksAsync(verticesToRestart); + + delayExecutor.schedule( Review comment: I agree that this should be ok to do. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services