[ 
https://issues.apache.org/jira/browse/FLINK-10193?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16607137#comment-16607137
 ] 

ASF GitHub Bot commented on FLINK-10193:
----------------------------------------

asfgit closed pull request #6601: [FLINK-10193] Add @RpcTimeout to 
JobMasterGateway.triggerSavepoint
URL: https://github.com/apache/flink/pull/6601
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java 
b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java
index 01cb2b6b099..4a66d32a2ac 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java
@@ -95,7 +95,6 @@
 import org.apache.flink.runtime.rpc.FatalErrorHandler;
 import org.apache.flink.runtime.rpc.FencedRpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcService;
-import org.apache.flink.runtime.rpc.RpcTimeout;
 import org.apache.flink.runtime.rpc.akka.AkkaRpcServiceUtils;
 import org.apache.flink.runtime.state.KeyGroupRange;
 import org.apache.flink.runtime.taskexecutor.AccumulatorReport;
@@ -909,7 +908,7 @@ public void heartbeatFromResourceManager(final ResourceID 
resourceID) {
        }
 
        @Override
-       public CompletableFuture<JobDetails> requestJobDetails(@RpcTimeout Time 
timeout) {
+       public CompletableFuture<JobDetails> requestJobDetails(Time timeout) {
                final ExecutionGraph currentExecutionGraph = executionGraph;
                return CompletableFuture.supplyAsync(() -> 
WebMonitorUtils.createDetailsForJob(currentExecutionGraph), 
scheduledExecutorService);
        }
diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMasterGateway.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMasterGateway.java
index 981222d17a6..bc073c192bd 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMasterGateway.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMasterGateway.java
@@ -268,7 +268,7 @@ void heartbeatFromTaskManager(
        CompletableFuture<String> triggerSavepoint(
                @Nullable final String targetDirectory,
                final boolean cancelJob,
-               final Time timeout);
+               @RpcTimeout final Time timeout);
 
        /**
         * Requests the statistics on operator back pressure.
diff --git 
a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/JobMasterTest.java
 
b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/JobMasterTest.java
index 66ca769165a..9a2bc97b62b 100644
--- 
a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/JobMasterTest.java
+++ 
b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/JobMasterTest.java
@@ -92,6 +92,7 @@
 import org.apache.flink.runtime.taskmanager.TaskManagerLocation;
 import org.apache.flink.runtime.testtasks.NoOpInvokable;
 import org.apache.flink.runtime.util.TestingFatalErrorHandler;
+import org.apache.flink.util.ExceptionUtils;
 import org.apache.flink.util.FlinkException;
 import org.apache.flink.util.InstantiationUtil;
 import org.apache.flink.util.TestLogger;
@@ -108,6 +109,7 @@
 import org.junit.rules.TemporaryFolder;
 
 import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
 
 import java.io.File;
 import java.io.FileOutputStream;
@@ -122,24 +124,28 @@
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.nullValue;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 /**
  * Tests for {@link JobMaster}.
  */
 public class JobMasterTest extends TestLogger {
 
-       static final TestingInputSplit[] EMPTY_TESTING_INPUT_SPLITS = new 
TestingInputSplit[0];
+       private static final TestingInputSplit[] EMPTY_TESTING_INPUT_SPLITS = 
new TestingInputSplit[0];
 
        @ClassRule
        public static TemporaryFolder temporaryFolder = new TemporaryFolder();
@@ -418,7 +424,7 @@ public void testAutomaticRestartingWhenCheckpointing() 
throws Exception {
        }
 
        /**
-        * Tests that an existing checkpoint will have precedence over an 
savepoint
+        * Tests that an existing checkpoint will have precedence over an 
savepoint.
         */
        @Test
        public void testCheckpointPrecedesSavepointRecovery() throws Exception {
@@ -470,7 +476,7 @@ public void testCheckpointPrecedesSavepointRecovery() 
throws Exception {
 
        /**
         * Tests that the JobMaster retries the scheduling of a job
-        * in case of a missing slot offering from a registered TaskExecutor
+        * in case of a missing slot offering from a registered TaskExecutor.
         */
        @Test
        public void testSlotRequestTimeoutWhenNoSlotOffering() throws Exception 
{
@@ -874,9 +880,9 @@ public void testRequestPartitionState() throws Exception {
                        final CompletableFuture<TaskDeploymentDescriptor> 
tddFuture = new CompletableFuture<>();
                        final TestingTaskExecutorGateway 
testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
                                
.setSubmitTaskConsumer((taskDeploymentDescriptor, jobMasterId) -> {
-                                         
tddFuture.complete(taskDeploymentDescriptor);
-                                         return 
CompletableFuture.completedFuture(Acknowledge.get());
-                                 })
+                                       
tddFuture.complete(taskDeploymentDescriptor);
+                                       return 
CompletableFuture.completedFuture(Acknowledge.get());
+                               })
                                .createTestingTaskExecutorGateway();
                        
rpcService.registerGateway(testingTaskExecutorGateway.getAddress(), 
testingTaskExecutorGateway);
 
@@ -917,6 +923,58 @@ public void testRequestPartitionState() throws Exception {
                }
        }
 
+       /**
+        * Tests that the timeout in {@link 
JobMasterGateway#triggerSavepoint(String, boolean, Time)}
+        * is respected.
+        */
+       @Test
+       public void testTriggerSavepointTimeout() throws Exception {
+               final JobMaster jobMaster = new JobMaster(
+                       rpcService,
+                       JobMasterConfiguration.fromConfiguration(configuration),
+                       jmResourceId,
+                       jobGraph,
+                       haServices,
+                       DefaultSlotPoolFactory.fromConfiguration(configuration, 
rpcService),
+                       new TestingJobManagerSharedServicesBuilder().build(),
+                       heartbeatServices,
+                       blobServer,
+                       UnregisteredJobManagerJobMetricGroupFactory.INSTANCE,
+                       new NoOpOnCompletionActions(),
+                       testingFatalErrorHandler,
+                       JobMasterTest.class.getClassLoader()) {
+
+                       @Override
+                       public CompletableFuture<String> triggerSavepoint(
+                                       @Nullable final String targetDirectory,
+                                       final boolean cancelJob,
+                                       final Time timeout) {
+                               return new CompletableFuture<>();
+                       }
+               };
+
+               try {
+                       final CompletableFuture<Acknowledge> startFuture = 
jobMaster.start(jobMasterId, testingTimeout);
+                       startFuture.get(testingTimeout.toMilliseconds(), 
TimeUnit.MILLISECONDS);
+
+                       final JobMasterGateway jobMasterGateway = 
jobMaster.getSelfGateway(JobMasterGateway.class);
+                       final CompletableFuture<String> 
savepointFutureLowTimeout = jobMasterGateway.triggerSavepoint("/tmp", false, 
Time.milliseconds(1));
+                       final CompletableFuture<String> 
savepointFutureHighTimeout = jobMasterGateway.triggerSavepoint("/tmp", false, 
RpcUtils.INF_TIMEOUT);
+
+                       try {
+                               
savepointFutureLowTimeout.get(testingTimeout.getSize(), 
testingTimeout.getUnit());
+                               fail();
+                       } catch (final ExecutionException e) {
+                               final Throwable cause = 
ExceptionUtils.stripExecutionException(e);
+                               assertThat(cause, 
instanceOf(TimeoutException.class));
+                       }
+
+                       assertThat(savepointFutureHighTimeout.isDone(), 
is(equalTo(false)));
+               } finally {
+                       RpcUtils.terminateRpcEndpoint(jobMaster, 
testingTimeout);
+               }
+       }
+
        private JobGraph producerConsumerJobGraph() {
                final JobVertex producer = new JobVertex("Producer");
                producer.setInvokableClass(NoOpInvokable.class);


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Default RPC timeout is used when triggering savepoint via JobMasterGateway
> --------------------------------------------------------------------------
>
>                 Key: FLINK-10193
>                 URL: https://issues.apache.org/jira/browse/FLINK-10193
>             Project: Flink
>          Issue Type: Bug
>          Components: Distributed Coordination
>    Affects Versions: 1.5.3, 1.6.0, 1.7.0
>            Reporter: Gary Yao
>            Assignee: Gary Yao
>            Priority: Critical
>              Labels: pull-request-available
>             Fix For: 1.6.1, 1.7.0, 1.5.4
>
>         Attachments: SlowToCheckpoint.java
>
>
> When calling {{JobMasterGateway#triggerSavepoint(String, boolean, Time)}}, 
> the default timeout is used because the time parameter of the method  is not 
> annotated with {{@RpcTimeout}}. 
> *Expected behavior*
> * timeout for the RPC should be {{RpcUtils.INF_TIMEOUT}}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to