Github user tzulitai commented on a diff in the pull request: https://github.com/apache/flink/pull/4728#discussion_r141817520 --- Diff: flink-runtime/src/test/java/org/apache/flink/runtime/rest/handler/legacy/ExecutionGraphCacheTest.java --- @@ -0,0 +1,357 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.rest.handler.legacy; + +import org.apache.flink.api.common.ArchivedExecutionConfig; +import org.apache.flink.api.common.ExecutionConfig; +import org.apache.flink.api.common.JobID; +import org.apache.flink.api.common.time.Time; +import org.apache.flink.runtime.accumulators.StringifiedAccumulatorResult; +import org.apache.flink.runtime.concurrent.FutureUtils; +import org.apache.flink.runtime.executiongraph.AccessExecutionGraph; +import org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph; +import org.apache.flink.runtime.executiongraph.ErrorInfo; +import org.apache.flink.runtime.executiongraph.ExecutionGraph; +import org.apache.flink.runtime.jobgraph.JobStatus; +import org.apache.flink.runtime.jobmanager.JobManager; +import org.apache.flink.runtime.jobmaster.JobManagerGateway; +import org.apache.flink.runtime.messages.JobNotFoundException; +import org.apache.flink.runtime.testingUtils.TestingUtils; +import org.apache.flink.util.FlinkException; +import org.apache.flink.util.TestLogger; + +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.function.Function; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +/** + * Tests for the {@link ExecutionGraphCache}. + */ +public class ExecutionGraphCacheTest extends TestLogger { + + /** + * Tests that we can cache AccessExecutionGraphs over multiple accesses. + */ + @Test + public void testExecutionGraphCaching() throws Exception { + final Time timeout = Time.milliseconds(100L); + final Time timeToLive = Time.hours(1L); + final JobID jobId = new JobID(); + final AccessExecutionGraph accessExecutionGraph = mock(AccessExecutionGraph.class); + + final JobManagerGateway jobManagerGateway = mock(JobManagerGateway.class); + when(jobManagerGateway.requestJob(eq(jobId), any(Time.class))).thenReturn(CompletableFuture.completedFuture(accessExecutionGraph)); + + try (ExecutionGraphCache executionGraphCache = new ExecutionGraphCache(timeout, timeToLive)) { + CompletableFuture<AccessExecutionGraph> accessExecutionGraphFuture = executionGraphCache.getExecutionGraph(jobId, jobManagerGateway); + + assertEquals(accessExecutionGraph, accessExecutionGraphFuture.get()); + + CompletableFuture<AccessExecutionGraph> accessExecutionGraphFuture2 = executionGraphCache.getExecutionGraph(jobId, jobManagerGateway); + + assertEquals(accessExecutionGraph, accessExecutionGraphFuture2.get()); + + // verify that we only issued a single request to the gateway + verify(jobManagerGateway, times(1)).requestJob(eq(jobId), any(Time.class)); + } + } + + /** + * Tests that an AccessExecutionGraph is invalidated after its TTL expired. + */ + @Test + public void testExecutionGraphEntryInvalidation() throws Exception { + final Time timeout = Time.milliseconds(100L); + final Time timeToLive = Time.milliseconds(1L); + final JobID jobId = new JobID(); + final AccessExecutionGraph accessExecutionGraph = mock(AccessExecutionGraph.class); + + final JobManagerGateway jobManagerGateway = mock(JobManagerGateway.class); + when(jobManagerGateway.requestJob(eq(jobId), any(Time.class))).thenReturn(CompletableFuture.completedFuture(accessExecutionGraph)); + + try (ExecutionGraphCache executionGraphCache = new ExecutionGraphCache(timeout, timeToLive)) { + CompletableFuture<AccessExecutionGraph> executionGraphFuture = executionGraphCache.getExecutionGraph(jobId, jobManagerGateway); + + assertEquals(accessExecutionGraph, executionGraphFuture.get()); + + // sleep for the TTL + Thread.sleep(timeToLive.toMilliseconds()); + + CompletableFuture<AccessExecutionGraph> executionGraphFuture2 = executionGraphCache.getExecutionGraph(jobId, jobManagerGateway); + + assertEquals(accessExecutionGraph, executionGraphFuture2.get()); + + verify(jobManagerGateway, times(2)).requestJob(eq(jobId), any(Time.class)); + } + } + + + /** + * Tests that a failure in requesting an AccessExecutionGraph from the gateway, will not create + * a cache entry --> another cache request will trigger a new gateway request. + */ + @Test + public void testImmediateCacheInvalidationAfterFailure() throws Exception { + final Time timeout = Time.milliseconds(100L); + final Time timeToLive = Time.hours(1L); + final JobID jobId = new JobID(); + + final AccessExecutionGraph accessExecutionGraph = mock(AccessExecutionGraph.class); + + final JobManagerGateway jobManagerGateway = mock(JobManagerGateway.class); + // let's first answer with a JobNotFoundException and then only with the correct result + when(jobManagerGateway.requestJob(eq(jobId), any(Time.class))).thenReturn( + FutureUtils.completedExceptionally(new JobNotFoundException(jobId)), + CompletableFuture.completedFuture(accessExecutionGraph)); + + try (ExecutionGraphCache executionGraphCache = new ExecutionGraphCache(timeout, timeToLive)) { + CompletableFuture<AccessExecutionGraph> executionGraphFuture = executionGraphCache.getExecutionGraph(jobId, jobManagerGateway); + + try { + executionGraphFuture.get(); + + fail("The execution graph future should have been completed exceptionally."); + } catch (ExecutionException ee) { + assertTrue(ee.getCause() instanceof FlinkException); + } + + CompletableFuture<AccessExecutionGraph> executionGraphFuture2 = executionGraphCache.getExecutionGraph(jobId, jobManagerGateway); + + assertEquals(accessExecutionGraph, executionGraphFuture2.get()); --- End diff -- I see. Ok, lets leave it as is then.
---