mjsax commented on a change in pull request #9047: URL: https://github.com/apache/kafka/pull/9047#discussion_r458483259
########## File path: streams/src/test/java/org/apache/kafka/streams/processor/internals/GlobalStateManagerImplTest.java ########## @@ -612,72 +617,521 @@ public boolean lockGlobalState() throws IOException { } } - @SuppressWarnings("deprecation") // TODO revisit in follow up PR @Test - public void shouldRetryWhenEndOffsetsThrowsTimeoutException() { - final int retries = 2; + public void shouldNotRetryWhenEndOffsetsThrowsTimeoutExceptionAndTaskTimeoutIsZero() { final AtomicInteger numberOfCalls = new AtomicInteger(0); consumer = new MockConsumer<byte[], byte[]>(OffsetResetStrategy.EARLIEST) { @Override - public synchronized Map<TopicPartition, Long> endOffsets(final Collection<org.apache.kafka.common.TopicPartition> partitions) { + public synchronized Map<TopicPartition, Long> endOffsets(final Collection<TopicPartition> partitions) { numberOfCalls.incrementAndGet(); - throw new TimeoutException(); + throw new TimeoutException("KABOOM!"); } }; + initializeConsumer(0, 0, t1, t2, t3, t4); + streamsConfig = new StreamsConfig(new Properties() { { put(StreamsConfig.APPLICATION_ID_CONFIG, "appId"); put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "dummy:1234"); put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath()); - put(StreamsConfig.RETRIES_CONFIG, retries); + put(StreamsConfig.TASK_TIMEOUT_MS_CONFIG, 0L); } }); - try { - new GlobalStateManagerImpl( - new LogContext("mock"), - topology, - consumer, - stateDirectory, - stateRestoreListener, - streamsConfig); - } catch (final StreamsException expected) { - assertEquals(numberOfCalls.get(), retries); - } + stateManager = new GlobalStateManagerImpl( + new LogContext("mock"), + time, + topology, + consumer, + stateDirectory, + stateRestoreListener, + streamsConfig + ); + processorContext.setStateManger(stateManager); + stateManager.setGlobalProcessorContext(processorContext); + + final StreamsException expected = assertThrows( + StreamsException.class, + () -> stateManager.initialize() + ); + final Throwable cause = expected.getCause(); + assertThat(cause, instanceOf(TimeoutException.class)); + assertThat(cause.getMessage(), equalTo("KABOOM!")); + + assertEquals(numberOfCalls.get(), 1); } - @SuppressWarnings("deprecation") // TODO revisit in follow up PR @Test - public void shouldRetryWhenPartitionsForThrowsTimeoutException() { - final int retries = 2; + public void shouldRetryAtLeastOnceWhenEndOffsetsThrowsTimeoutException() { final AtomicInteger numberOfCalls = new AtomicInteger(0); consumer = new MockConsumer<byte[], byte[]>(OffsetResetStrategy.EARLIEST) { @Override - public synchronized List<PartitionInfo> partitionsFor(final String topic) { + public synchronized Map<TopicPartition, Long> endOffsets(final Collection<TopicPartition> partitions) { + time.sleep(100L); numberOfCalls.incrementAndGet(); - throw new TimeoutException(); + throw new TimeoutException("KABOOM!"); } }; + initializeConsumer(0, 0, t1, t2, t3, t4); + streamsConfig = new StreamsConfig(new Properties() { { put(StreamsConfig.APPLICATION_ID_CONFIG, "appId"); put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "dummy:1234"); put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath()); - put(StreamsConfig.RETRIES_CONFIG, retries); + put(StreamsConfig.TASK_TIMEOUT_MS_CONFIG, 1L); } }); - try { - new GlobalStateManagerImpl( - new LogContext("mock"), - topology, - consumer, - stateDirectory, - stateRestoreListener, - streamsConfig); - } catch (final StreamsException expected) { - assertEquals(numberOfCalls.get(), retries); - } + stateManager = new GlobalStateManagerImpl( + new LogContext("mock"), + time, + topology, + consumer, + stateDirectory, + stateRestoreListener, + streamsConfig + ); + processorContext.setStateManger(stateManager); + stateManager.setGlobalProcessorContext(processorContext); + + final TimeoutException expected = assertThrows( + TimeoutException.class, + () -> stateManager.initialize() + ); + assertThat(expected.getMessage(), equalTo("Global task did not make progress to restore state within 100 ms. Adjust `task.timeout.ms` if needed.")); + + assertEquals(numberOfCalls.get(), 2); + } + + @Test + public void shouldRetryWhenEndOffsetsThrowsTimeoutExceptionUntilTaskTimeoutExpired() { + final AtomicInteger numberOfCalls = new AtomicInteger(0); + consumer = new MockConsumer<byte[], byte[]>(OffsetResetStrategy.EARLIEST) { + @Override + public synchronized Map<TopicPartition, Long> endOffsets(final Collection<TopicPartition> partitions) { + time.sleep(100L); + numberOfCalls.incrementAndGet(); + throw new TimeoutException("KABOOM!"); + } + }; + initializeConsumer(0, 0, t1, t2, t3, t4); + + streamsConfig = new StreamsConfig(new Properties() { + { + put(StreamsConfig.APPLICATION_ID_CONFIG, "appId"); + put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "dummy:1234"); + put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath()); + put(StreamsConfig.TASK_TIMEOUT_MS_CONFIG, 1000L); + } + }); + + stateManager = new GlobalStateManagerImpl( + new LogContext("mock"), + time, + topology, + consumer, + stateDirectory, + stateRestoreListener, + streamsConfig + ); + processorContext.setStateManger(stateManager); + stateManager.setGlobalProcessorContext(processorContext); + + final TimeoutException expected = assertThrows( + TimeoutException.class, + () -> stateManager.initialize() + ); + assertThat(expected.getMessage(), equalTo("Global task did not make progress to restore state within 1100 ms. Adjust `task.timeout.ms` if needed.")); + + assertEquals(numberOfCalls.get(), 12); + } + + @Test + public void shouldNotFailOnSlowProgressWhenEndOffsetsThrowsTimeoutException() { + final AtomicInteger numberOfCalls = new AtomicInteger(0); + consumer = new MockConsumer<byte[], byte[]>(OffsetResetStrategy.EARLIEST) { + @Override + public synchronized Map<TopicPartition, Long> endOffsets(final Collection<TopicPartition> partitions) { + time.sleep(1L); + if (numberOfCalls.incrementAndGet() % 3 == 0) { + return super.endOffsets(partitions); + } + throw new TimeoutException("KABOOM!"); + } + + @Override + public synchronized long position(final TopicPartition partition) { + return numberOfCalls.incrementAndGet(); + } + }; + initializeConsumer(0, 0, t1, t2, t3, t4); + + streamsConfig = new StreamsConfig(new Properties() { + { + put(StreamsConfig.APPLICATION_ID_CONFIG, "appId"); + put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "dummy:1234"); + put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath()); + put(StreamsConfig.TASK_TIMEOUT_MS_CONFIG, 1L); + } + }); + + stateManager = new GlobalStateManagerImpl( + new LogContext("mock"), + time, + topology, + consumer, + stateDirectory, + stateRestoreListener, + streamsConfig + ); + processorContext.setStateManger(stateManager); + stateManager.setGlobalProcessorContext(processorContext); + + stateManager.initialize(); + } + + @Test + public void shouldNotRetryWhenPartitionsForThrowsTimeoutExceptionAndTaskTimeoutIsZero() { + final AtomicInteger numberOfCalls = new AtomicInteger(0); + consumer = new MockConsumer<byte[], byte[]>(OffsetResetStrategy.EARLIEST) { + @Override + public List<PartitionInfo> partitionsFor(final String topic) { + numberOfCalls.incrementAndGet(); + throw new TimeoutException("KABOOM!"); + } + }; + initializeConsumer(0, 0, t1, t2, t3, t4); + + streamsConfig = new StreamsConfig(new Properties() { + { + put(StreamsConfig.APPLICATION_ID_CONFIG, "appId"); + put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "dummy:1234"); + put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath()); + put(StreamsConfig.TASK_TIMEOUT_MS_CONFIG, 0L); + } + }); + + stateManager = new GlobalStateManagerImpl( + new LogContext("mock"), + time, + topology, + consumer, + stateDirectory, + stateRestoreListener, + streamsConfig + ); + processorContext.setStateManger(stateManager); + stateManager.setGlobalProcessorContext(processorContext); + + final StreamsException expected = assertThrows( + StreamsException.class, + () -> stateManager.initialize() + ); + final Throwable cause = expected.getCause(); + assertThat(cause, instanceOf(TimeoutException.class)); + assertThat(cause.getMessage(), equalTo("KABOOM!")); + + assertEquals(numberOfCalls.get(), 1); + } + + @Test + public void shouldRetryAtLeastOnceWhenPartitionsForThrowsTimeoutException() { + final AtomicInteger numberOfCalls = new AtomicInteger(0); + consumer = new MockConsumer<byte[], byte[]>(OffsetResetStrategy.EARLIEST) { + @Override + public List<PartitionInfo> partitionsFor(final String topic) { + time.sleep(100L); + numberOfCalls.incrementAndGet(); + throw new TimeoutException("KABOOM!"); + } + }; + initializeConsumer(0, 0, t1, t2, t3, t4); + + streamsConfig = new StreamsConfig(new Properties() { + { + put(StreamsConfig.APPLICATION_ID_CONFIG, "appId"); + put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "dummy:1234"); + put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath()); + put(StreamsConfig.TASK_TIMEOUT_MS_CONFIG, 1L); + } + }); + + stateManager = new GlobalStateManagerImpl( + new LogContext("mock"), + time, + topology, + consumer, + stateDirectory, + stateRestoreListener, + streamsConfig + ); + processorContext.setStateManger(stateManager); + stateManager.setGlobalProcessorContext(processorContext); + + final TimeoutException expected = assertThrows( + TimeoutException.class, + () -> stateManager.initialize() + ); + assertThat(expected.getMessage(), equalTo("Global task did not make progress to restore state within 100 ms. Adjust `task.timeout.ms` if needed.")); + + assertEquals(numberOfCalls.get(), 2); + } + + @Test + public void shouldRetryWhenPartitionsForThrowsTimeoutExceptionUntilTaskTimeoutExpires() { + final AtomicInteger numberOfCalls = new AtomicInteger(0); + consumer = new MockConsumer<byte[], byte[]>(OffsetResetStrategy.EARLIEST) { + @Override + public List<PartitionInfo> partitionsFor(final String topic) { + time.sleep(100L); + numberOfCalls.incrementAndGet(); + throw new TimeoutException("KABOOM!"); + } + }; + initializeConsumer(0, 0, t1, t2, t3, t4); + + streamsConfig = new StreamsConfig(new Properties() { + { + put(StreamsConfig.APPLICATION_ID_CONFIG, "appId"); + put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "dummy:1234"); + put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath()); + put(StreamsConfig.TASK_TIMEOUT_MS_CONFIG, 1000L); + } + }); + + stateManager = new GlobalStateManagerImpl( + new LogContext("mock"), + time, + topology, + consumer, + stateDirectory, + stateRestoreListener, + streamsConfig + ); + processorContext.setStateManger(stateManager); + stateManager.setGlobalProcessorContext(processorContext); + + final TimeoutException expected = assertThrows( + TimeoutException.class, + () -> stateManager.initialize() + ); + assertThat(expected.getMessage(), equalTo("Global task did not make progress to restore state within 1100 ms. Adjust `task.timeout.ms` if needed.")); + + assertEquals(numberOfCalls.get(), 12); + } + + @Test + public void shouldNotFailOnSlowProgressWhenPartitionForThrowsTimeoutException() { + final AtomicInteger numberOfCalls = new AtomicInteger(0); + consumer = new MockConsumer<byte[], byte[]>(OffsetResetStrategy.EARLIEST) { + @Override + public List<PartitionInfo> partitionsFor(final String topic) { + time.sleep(1L); + if (numberOfCalls.incrementAndGet() % 3 == 0) { + return super.partitionsFor(topic); + } + throw new TimeoutException("KABOOM!"); + } + + @Override + public synchronized long position(final TopicPartition partition) { + return numberOfCalls.incrementAndGet(); + } + }; + initializeConsumer(0, 0, t1, t2, t3, t4); + + streamsConfig = new StreamsConfig(new Properties() { + { + put(StreamsConfig.APPLICATION_ID_CONFIG, "appId"); + put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "dummy:1234"); + put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath()); + put(StreamsConfig.TASK_TIMEOUT_MS_CONFIG, 1L); + } + }); + + stateManager = new GlobalStateManagerImpl( + new LogContext("mock"), + time, + topology, + consumer, + stateDirectory, + stateRestoreListener, + streamsConfig + ); + processorContext.setStateManger(stateManager); + stateManager.setGlobalProcessorContext(processorContext); + + stateManager.initialize(); + } + + @Test + public void shouldNotRetryWhenPositionThrowsTimeoutExceptionAndTaskTimeoutIsZero() { Review comment: Replicated the tests from above (`endOffset` and `partitionFor`) for the `position` call. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org