[ https://issues.apache.org/jira/browse/FLINK-10193?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16607137#comment-16607137 ]
ASF GitHub Bot commented on FLINK-10193: ---------------------------------------- asfgit closed pull request #6601: [FLINK-10193] Add @RpcTimeout to JobMasterGateway.triggerSavepoint URL: https://github.com/apache/flink/pull/6601 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java index 01cb2b6b099..4a66d32a2ac 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java @@ -95,7 +95,6 @@ import org.apache.flink.runtime.rpc.FatalErrorHandler; import org.apache.flink.runtime.rpc.FencedRpcEndpoint; import org.apache.flink.runtime.rpc.RpcService; -import org.apache.flink.runtime.rpc.RpcTimeout; import org.apache.flink.runtime.rpc.akka.AkkaRpcServiceUtils; import org.apache.flink.runtime.state.KeyGroupRange; import org.apache.flink.runtime.taskexecutor.AccumulatorReport; @@ -909,7 +908,7 @@ public void heartbeatFromResourceManager(final ResourceID resourceID) { } @Override - public CompletableFuture<JobDetails> requestJobDetails(@RpcTimeout Time timeout) { + public CompletableFuture<JobDetails> requestJobDetails(Time timeout) { final ExecutionGraph currentExecutionGraph = executionGraph; return CompletableFuture.supplyAsync(() -> WebMonitorUtils.createDetailsForJob(currentExecutionGraph), scheduledExecutorService); } diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMasterGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMasterGateway.java index 981222d17a6..bc073c192bd 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMasterGateway.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMasterGateway.java @@ -268,7 +268,7 @@ void heartbeatFromTaskManager( CompletableFuture<String> triggerSavepoint( @Nullable final String targetDirectory, final boolean cancelJob, - final Time timeout); + @RpcTimeout final Time timeout); /** * Requests the statistics on operator back pressure. diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/JobMasterTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/JobMasterTest.java index 66ca769165a..9a2bc97b62b 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/JobMasterTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/JobMasterTest.java @@ -92,6 +92,7 @@ import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.runtime.testtasks.NoOpInvokable; import org.apache.flink.runtime.util.TestingFatalErrorHandler; +import org.apache.flink.util.ExceptionUtils; import org.apache.flink.util.FlinkException; import org.apache.flink.util.InstantiationUtil; import org.apache.flink.util.TestLogger; @@ -108,6 +109,7 @@ import org.junit.rules.TemporaryFolder; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import java.io.File; import java.io.FileOutputStream; @@ -122,24 +124,28 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; /** * Tests for {@link JobMaster}. */ public class JobMasterTest extends TestLogger { - static final TestingInputSplit[] EMPTY_TESTING_INPUT_SPLITS = new TestingInputSplit[0]; + private static final TestingInputSplit[] EMPTY_TESTING_INPUT_SPLITS = new TestingInputSplit[0]; @ClassRule public static TemporaryFolder temporaryFolder = new TemporaryFolder(); @@ -418,7 +424,7 @@ public void testAutomaticRestartingWhenCheckpointing() throws Exception { } /** - * Tests that an existing checkpoint will have precedence over an savepoint + * Tests that an existing checkpoint will have precedence over an savepoint. */ @Test public void testCheckpointPrecedesSavepointRecovery() throws Exception { @@ -470,7 +476,7 @@ public void testCheckpointPrecedesSavepointRecovery() throws Exception { /** * Tests that the JobMaster retries the scheduling of a job - * in case of a missing slot offering from a registered TaskExecutor + * in case of a missing slot offering from a registered TaskExecutor. */ @Test public void testSlotRequestTimeoutWhenNoSlotOffering() throws Exception { @@ -874,9 +880,9 @@ public void testRequestPartitionState() throws Exception { final CompletableFuture<TaskDeploymentDescriptor> tddFuture = new CompletableFuture<>(); final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder() .setSubmitTaskConsumer((taskDeploymentDescriptor, jobMasterId) -> { - tddFuture.complete(taskDeploymentDescriptor); - return CompletableFuture.completedFuture(Acknowledge.get()); - }) + tddFuture.complete(taskDeploymentDescriptor); + return CompletableFuture.completedFuture(Acknowledge.get()); + }) .createTestingTaskExecutorGateway(); rpcService.registerGateway(testingTaskExecutorGateway.getAddress(), testingTaskExecutorGateway); @@ -917,6 +923,58 @@ public void testRequestPartitionState() throws Exception { } } + /** + * Tests that the timeout in {@link JobMasterGateway#triggerSavepoint(String, boolean, Time)} + * is respected. + */ + @Test + public void testTriggerSavepointTimeout() throws Exception { + final JobMaster jobMaster = new JobMaster( + rpcService, + JobMasterConfiguration.fromConfiguration(configuration), + jmResourceId, + jobGraph, + haServices, + DefaultSlotPoolFactory.fromConfiguration(configuration, rpcService), + new TestingJobManagerSharedServicesBuilder().build(), + heartbeatServices, + blobServer, + UnregisteredJobManagerJobMetricGroupFactory.INSTANCE, + new NoOpOnCompletionActions(), + testingFatalErrorHandler, + JobMasterTest.class.getClassLoader()) { + + @Override + public CompletableFuture<String> triggerSavepoint( + @Nullable final String targetDirectory, + final boolean cancelJob, + final Time timeout) { + return new CompletableFuture<>(); + } + }; + + try { + final CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId, testingTimeout); + startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS); + + final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class); + final CompletableFuture<String> savepointFutureLowTimeout = jobMasterGateway.triggerSavepoint("/tmp", false, Time.milliseconds(1)); + final CompletableFuture<String> savepointFutureHighTimeout = jobMasterGateway.triggerSavepoint("/tmp", false, RpcUtils.INF_TIMEOUT); + + try { + savepointFutureLowTimeout.get(testingTimeout.getSize(), testingTimeout.getUnit()); + fail(); + } catch (final ExecutionException e) { + final Throwable cause = ExceptionUtils.stripExecutionException(e); + assertThat(cause, instanceOf(TimeoutException.class)); + } + + assertThat(savepointFutureHighTimeout.isDone(), is(equalTo(false))); + } finally { + RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); + } + } + private JobGraph producerConsumerJobGraph() { final JobVertex producer = new JobVertex("Producer"); producer.setInvokableClass(NoOpInvokable.class); ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Default RPC timeout is used when triggering savepoint via JobMasterGateway > -------------------------------------------------------------------------- > > Key: FLINK-10193 > URL: https://issues.apache.org/jira/browse/FLINK-10193 > Project: Flink > Issue Type: Bug > Components: Distributed Coordination > Affects Versions: 1.5.3, 1.6.0, 1.7.0 > Reporter: Gary Yao > Assignee: Gary Yao > Priority: Critical > Labels: pull-request-available > Fix For: 1.6.1, 1.7.0, 1.5.4 > > Attachments: SlowToCheckpoint.java > > > When calling {{JobMasterGateway#triggerSavepoint(String, boolean, Time)}}, > the default timeout is used because the time parameter of the method is not > annotated with {{@RpcTimeout}}. > *Expected behavior* > * timeout for the RPC should be {{RpcUtils.INF_TIMEOUT}} -- This message was sent by Atlassian JIRA (v7.6.3#76005)