satishd commented on a change in pull request #10579: URL: https://github.com/apache/kafka/pull/10579#discussion_r663808257
########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/ConsumerManager.java ########## @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.utils.KafkaThread; +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.common.utils.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Optional; +import java.util.Set; + +/** + * This class manages the consumer thread viz {@link ConsumerTask} that polls messages from the assigned metadata topic partitions. + * It also provides a way to wait until the given record is received by the consumer before it is timed out with an interval of + * {@link TopicBasedRemoteLogMetadataManagerConfig#consumeWaitMs()}. + */ +public class ConsumerManager implements Closeable { + + private static final Logger log = LoggerFactory.getLogger(ConsumerManager.class); + private static final long CONSUME_RECHECK_INTERVAL_MS = 50L; + + private final TopicBasedRemoteLogMetadataManagerConfig rlmmConfig; + private final Time time; + private final ConsumerTask consumerTask; + private final Thread consumerTaskThread; + + public ConsumerManager(TopicBasedRemoteLogMetadataManagerConfig rlmmConfig, + RemotePartitionMetadataEventHandler remotePartitionMetadataEventHandler, + RemoteLogMetadataTopicPartitioner rlmmTopicPartitioner, + Time time) { + this.rlmmConfig = rlmmConfig; + this.time = time; + + //Create a task to consume messages and submit the respective events to RemotePartitionMetadataEventHandler. + KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(rlmmConfig.consumerProperties()); + consumerTask = new ConsumerTask(consumer, remotePartitionMetadataEventHandler, rlmmTopicPartitioner); + consumerTaskThread = KafkaThread.daemon("RLMMConsumerTask", consumerTask); Review comment: Good catch, updated it. ########## File path: storage/src/test/java/org/apache/kafka/server/log/remote/metadata/storage/RemoteLogSegmentLifecycleManager.java ########## @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadataUpdate; +import org.apache.kafka.server.log.remote.storage.RemoteStorageException; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Iterator; +import java.util.Optional; + +/** + * This interface defines the lifecycle methods for {@code RemoteLogSegmentMetadata}. {@link RemoteLogSegmentLifecycleTest} tests + * different implementations of this interface. This is responsible for managing all the segments for a given {@code topicIdPartition} + * registered with {@link #initialize(TopicIdPartition)}. + */ +public interface RemoteLogSegmentLifecycleManager extends Closeable { Review comment: There are two implementations about this class for both `RemoteLogMetadataCache` and `TopicBasedRemoteLogMetadataManager`. There are common tests in `RemoteLogSegmentLifecycleTest `that we want to run for both of them. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManager.java ########## @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.errors.RetriableException; +import org.apache.kafka.common.utils.KafkaThread; +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadataManager; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadataUpdate; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentState; +import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteStorageException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * This is the {@link RemoteLogMetadataManager} implementation with storage as an internal topic with name {@link TopicBasedRemoteLogMetadataManagerConfig#REMOTE_LOG_METADATA_TOPIC_NAME}. + * This is used to publish and fetch {@link RemoteLogMetadata} for the registered user topic partitions with + * {@link #onPartitionLeadershipChanges(Set, Set)}. Each broker will have an instance of this class and it subscribes + * to metadata updates for the registered user topic partitions. + */ +public class TopicBasedRemoteLogMetadataManager implements RemoteLogMetadataManager { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManager.class); + + private volatile boolean configured = false; + + // It indicates whether the close process of this instance is started or not via #close() method. + // Using AtomicBoolean instead of volatile as it may encounter http://findbugs.sourceforge.net/bugDescriptions.html#SP_SPIN_ON_FIELD + // if the field is read but not updated in a spin loop like in #initializeResources() method. + private final AtomicBoolean closing = new AtomicBoolean(false); + private final AtomicBoolean initialized = new AtomicBoolean(false); + + private Thread initializationThread; + private Time time = Time.SYSTEM; + private volatile ProducerManager producerManager; + private volatile ConsumerManager consumerManager; + + // This allows to gracefully close this instance using {@link #close()} method while there are some pending or new + // requests calling different methods which use the resources like producer/consumer managers. + private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + + private final RemotePartitionMetadataStore remotePartitionMetadataStore = new RemotePartitionMetadataStore(); + private volatile TopicBasedRemoteLogMetadataManagerConfig rlmmConfig; + private RemoteLogMetadataTopicPartitioner rlmmTopicPartitioner; + + public TopicBasedRemoteLogMetadataManager() { + } + + // Visible for testing. + public TopicBasedRemoteLogMetadataManager(Time time) { + this.time = time; + } + + @Override + public void addRemoteLogSegmentMetadata(RemoteLogSegmentMetadata remoteLogSegmentMetadata) + throws RemoteStorageException { + Objects.requireNonNull(remoteLogSegmentMetadata, "remoteLogSegmentMetadata can not be null"); + + // This allows gracefully rejecting the requests while closing of this instance is in progress, which triggers + // closing the producer/consumer manager instances. + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + // This method is allowed only to add remote log segment with the initial state(which is RemoteLogSegmentState.COPY_SEGMENT_STARTED) + // but not to update the existing remote log segment metadata. + if (remoteLogSegmentMetadata.state() != RemoteLogSegmentState.COPY_SEGMENT_STARTED) { + throw new IllegalArgumentException( + "Given remoteLogSegmentMetadata should have state as " + RemoteLogSegmentState.COPY_SEGMENT_STARTED + + " but it contains state as: " + remoteLogSegmentMetadata.state()); + } + + // Publish the message to the topic. + doPublishMetadata(remoteLogSegmentMetadata.remoteLogSegmentId().topicIdPartition(), + remoteLogSegmentMetadata); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public void updateRemoteLogSegmentMetadata(RemoteLogSegmentMetadataUpdate segmentMetadataUpdate) + throws RemoteStorageException { + Objects.requireNonNull(segmentMetadataUpdate, "segmentMetadataUpdate can not be null"); + + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + // Callers should use addRemoteLogSegmentMetadata to add RemoteLogSegmentMetadata with state as + // RemoteLogSegmentState.COPY_SEGMENT_STARTED. + if (segmentMetadataUpdate.state() == RemoteLogSegmentState.COPY_SEGMENT_STARTED) { + throw new IllegalArgumentException("Given remoteLogSegmentMetadata should not have the state as: " + + RemoteLogSegmentState.COPY_SEGMENT_STARTED); + } + + // Publish the message to the topic. + doPublishMetadata(segmentMetadataUpdate.remoteLogSegmentId().topicIdPartition(), segmentMetadataUpdate); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public void putRemotePartitionDeleteMetadata(RemotePartitionDeleteMetadata remotePartitionDeleteMetadata) + throws RemoteStorageException { + Objects.requireNonNull(remotePartitionDeleteMetadata, "remotePartitionDeleteMetadata can not be null"); + + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + doPublishMetadata(remotePartitionDeleteMetadata.topicIdPartition(), remotePartitionDeleteMetadata); + } finally { + lock.readLock().unlock(); + } + } + + private void doPublishMetadata(TopicIdPartition topicIdPartition, RemoteLogMetadata remoteLogMetadata) + throws RemoteStorageException { + log.debug("Publishing metadata for partition: [{}] with context: [{}]", topicIdPartition, remoteLogMetadata); + + try { + // Publish the message to the topic. + RecordMetadata recordMetadata = producerManager.publishMessage( + remoteLogMetadata); + // Wait until the consumer catches up with this offset. This will ensure read-after-write consistency + // semantics. + consumerManager.waitTillConsumptionCatchesUp(recordMetadata); + } catch (KafkaException e) { + if (e instanceof RetriableException) { + throw e; + } else { + throw new RemoteStorageException(e); + } + } + } + + @Override + public Optional<RemoteLogSegmentMetadata> remoteLogSegmentMetadata(TopicIdPartition topicIdPartition, + int epochForOffset, + long offset) + throws RemoteStorageException { + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + return remotePartitionMetadataStore.remoteLogSegmentMetadata(topicIdPartition, offset, epochForOffset); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public Optional<Long> highestOffsetForEpoch(TopicIdPartition topicIdPartition, + int leaderEpoch) + throws RemoteStorageException { + lock.readLock().lock(); + try { + + ensureInitializedAndNotClosed(); + + return remotePartitionMetadataStore.highestLogOffset(topicIdPartition, leaderEpoch); + } finally { + lock.readLock().unlock(); + } + + } + + @Override + public Iterator<RemoteLogSegmentMetadata> listRemoteLogSegments(TopicIdPartition topicIdPartition) + throws RemoteStorageException { + Objects.requireNonNull(topicIdPartition, "topicIdPartition can not be null"); + + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + return remotePartitionMetadataStore.listRemoteLogSegments(topicIdPartition); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public Iterator<RemoteLogSegmentMetadata> listRemoteLogSegments(TopicIdPartition topicIdPartition, int leaderEpoch) + throws RemoteStorageException { + Objects.requireNonNull(topicIdPartition, "topicIdPartition can not be null"); + + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + return remotePartitionMetadataStore.listRemoteLogSegments(topicIdPartition, leaderEpoch); + } finally { + lock.readLock().unlock(); + } + } + + public int metadataPartition(TopicIdPartition topicIdPartition) { + return rlmmTopicPartitioner.metadataPartition(topicIdPartition); + } + + // Visible For Testing + public Optional<Long> receivedOffsetForPartition(int metadataPartition) { + return consumerManager.receivedOffsetForPartition(metadataPartition); + } + + @Override + public void onPartitionLeadershipChanges(Set<TopicIdPartition> leaderPartitions, + Set<TopicIdPartition> followerPartitions) { + Objects.requireNonNull(leaderPartitions, "leaderPartitions can not be null"); + Objects.requireNonNull(followerPartitions, "followerPartitions can not be null"); + + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + log.info("Received leadership notifications with leader partitions {} and follower partitions {}", + leaderPartitions, followerPartitions); + + HashSet<TopicIdPartition> allPartitions = new HashSet<>(leaderPartitions); + allPartitions.addAll(followerPartitions); + consumerManager.addAssignmentsForPartitions(allPartitions); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public void onStopPartitions(Set<TopicIdPartition> partitions) { + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + consumerManager.removeAssignmentsForPartitions(partitions); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public void configure(Map<String, ?> configs) { + Objects.requireNonNull(configs, "configs can not be null."); + + lock.writeLock().lock(); + try { + if (configured) { + log.info("Skipping configure as it is already configured."); + return; + } + + log.info("Started initializing with configs: {}", configs); + + rlmmConfig = new TopicBasedRemoteLogMetadataManagerConfig(configs); + rlmmTopicPartitioner = new RemoteLogMetadataTopicPartitioner(rlmmConfig.metadataTopicPartitionsCount()); + configured = true; + log.info("Successfully initialized with rlmmConfig: {}", rlmmConfig); + + // Scheduling the initialization producer/consumer managers in a separate thread. Required resources may + // not yet be available now. This thread makes sure that it is retried at regular intervals until it is + // successful. + initializationThread = KafkaThread.daemon("RLMMInitializationThread", () -> initializeResources()); Review comment: It was required to be retried until the topic is successfully created. I added the logic to check for topic creation too. ########## File path: storage/src/test/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManagerHarness.java ########## @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import kafka.api.IntegrationTestHarness; +import kafka.utils.TestUtils; +import org.apache.kafka.clients.CommonClientConfigs; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.config.TopicConfig; +import org.apache.kafka.common.utils.MockTime; +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.common.utils.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.concurrent.TimeoutException; + +import static org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManagerConfig.REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP; +import static org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManagerConfig.REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_PROP; +import static org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManagerConfig.REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP; + +public class TopicBasedRemoteLogMetadataManagerHarness extends IntegrationTestHarness { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManagerHarness.class); + + protected static final int METADATA_TOPIC_PARTITIONS_COUNT = 3; + protected static final int METADATA_TOPIC_REPLICATION_FACTOR = 2; + protected static final long METADATA_TOPIC_RETENTION_MS = 24 * 60 * 60 * 1000L; + + private final Time time = new MockTime(1); + private TopicBasedRemoteLogMetadataManager topicBasedRemoteLogMetadataManager; + + protected Map<String, Object> overrideRemoteLogMetadataManagerProps() { + return Collections.emptyMap(); + } + + public void initialize(Set<TopicIdPartition> topicIdPartitions) { + // Call setup to start the cluster. + super.setUp(); + + // Make sure the remote log metadata topic is created before it is used. + createMetadataTopic(); + + topicBasedRemoteLogMetadataManager = new TopicBasedRemoteLogMetadataManager(time); + + // Initialize TopicBasedRemoteLogMetadataManager. + Map<String, Object> configs = new HashMap<>(); + configs.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, brokerList()); + configs.put("broker.id", 0); + configs.put(REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP, METADATA_TOPIC_PARTITIONS_COUNT); + configs.put(REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_PROP, METADATA_TOPIC_REPLICATION_FACTOR); + configs.put(REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP, METADATA_TOPIC_RETENTION_MS); + + log.debug("TopicBasedRemoteLogMetadataManager configs before adding overridden properties: {}", configs); + // Add override properties. + configs.putAll(overrideRemoteLogMetadataManagerProps()); + log.debug("TopicBasedRemoteLogMetadataManager configs after adding overridden properties: {}", configs); + + topicBasedRemoteLogMetadataManager.configure(configs); + try { + waitUntilInitialized(120_000); + } catch (TimeoutException e) { + throw new RuntimeException(e); + } + + topicBasedRemoteLogMetadataManager.onPartitionLeadershipChanges(topicIdPartitions, Collections.emptySet()); + } + + // Visible for testing. + public void waitUntilInitialized(long waitTimeMs) throws TimeoutException { + long startMs = System.currentTimeMillis(); + while (!topicBasedRemoteLogMetadataManager.isInitialized()) { + long currentTimeMs = System.currentTimeMillis(); + if (currentTimeMs > startMs + waitTimeMs) { + throw new TimeoutException("Time out reached before it is initialized successfully"); + } + + Utils.sleep(1000); + } + } + + @Override + public int brokerCount() { + return 3; + } + + protected TopicBasedRemoteLogMetadataManager topicBasedRlmm() { + return topicBasedRemoteLogMetadataManager; + } + + private void createMetadataTopic() { Review comment: I was planning to add that later as mentioned earlier. I updated with the required changes in latest commit. ########## File path: storage/src/test/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManagerTest.java ########## @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.Uuid; +import org.apache.kafka.common.utils.MockTime; +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentId; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteStorageException; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Collections; +import java.util.Map; +import java.util.concurrent.TimeoutException; + +import static org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManagerConfig.REMOTE_LOG_METADATA_CONSUME_WAIT_MS_PROP; + +public class TopicBasedRemoteLogMetadataManagerTest { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManagerTest.class); + + private static final int SEG_SIZE = 1024 * 1024; + + private final Time time = new MockTime(1); + private final TopicBasedRemoteLogMetadataManagerHarness remoteLogMetadataManagerHarness = new TopicBasedRemoteLogMetadataManagerHarness() { + @Override + protected Map<String, Object> overrideRemoteLogMetadataManagerProps() { + return Collections.singletonMap(REMOTE_LOG_METADATA_CONSUME_WAIT_MS_PROP, 5000L); + } + }; + + @BeforeEach + public void setup() { + // Start the cluster and initialize TopicBasedRemoteLogMetadataManager. + remoteLogMetadataManagerHarness.initialize(Collections.emptySet()); + } + + @AfterEach + public void teardown() throws IOException { + remoteLogMetadataManagerHarness.close(); + } + + public TopicBasedRemoteLogMetadataManager topicBasedRlmm() { + return remoteLogMetadataManagerHarness.topicBasedRlmm(); + } + + @Test + public void testNewPartitionUpdates() throws Exception { + final TopicIdPartition newLeaderTopicIdPartition = new TopicIdPartition(Uuid.randomUuid(), new TopicPartition("new-leader", 0)); + final TopicIdPartition newFollowerTopicIdPartition = new TopicIdPartition(Uuid.randomUuid(), new TopicPartition("new-follower", 0)); + + // Add segments for these partitions but they are not available as they have not yet been subscribed. + RemoteLogSegmentMetadata leaderSegmentMetadata = new RemoteLogSegmentMetadata(new RemoteLogSegmentId(newLeaderTopicIdPartition, Uuid.randomUuid()), + 0, 100, -1L, 0, + time.milliseconds(), SEG_SIZE, Collections.singletonMap(0, 0L)); + topicBasedRlmm().addRemoteLogSegmentMetadata(leaderSegmentMetadata); + + RemoteLogSegmentMetadata followerSegmentMetadata = new RemoteLogSegmentMetadata(new RemoteLogSegmentId(newFollowerTopicIdPartition, Uuid.randomUuid()), + 0, 100, -1L, 0, + time.milliseconds(), SEG_SIZE, Collections.singletonMap(0, 0L)); + topicBasedRlmm().addRemoteLogSegmentMetadata(followerSegmentMetadata); + + // `listRemoteLogSegments` will receive an exception as these topic partitions are not yet registered. + Assertions.assertThrows(RemoteStorageException.class, () -> topicBasedRlmm().listRemoteLogSegments(newLeaderTopicIdPartition)); + Assertions.assertThrows(RemoteStorageException.class, () -> topicBasedRlmm().listRemoteLogSegments(newFollowerTopicIdPartition)); + + topicBasedRlmm().onPartitionLeadershipChanges(Collections.singleton(newLeaderTopicIdPartition), + Collections.singleton(newFollowerTopicIdPartition)); + + waitUntilConsumerCatchesup(newLeaderTopicIdPartition, newFollowerTopicIdPartition, 30000L); + + Assertions.assertTrue(topicBasedRlmm().listRemoteLogSegments(newLeaderTopicIdPartition).hasNext()); + Assertions.assertTrue(topicBasedRlmm().listRemoteLogSegments(newFollowerTopicIdPartition).hasNext()); + } + + private void waitUntilConsumerCatchesup(TopicIdPartition newLeaderTopicIdPartition, + TopicIdPartition newFollowerTopicIdPartition, + long timeoutMs) throws TimeoutException { + int leaderMetadataPartition = topicBasedRlmm().metadataPartition(newLeaderTopicIdPartition); + int followerMetadataPartition = topicBasedRlmm().metadataPartition(newFollowerTopicIdPartition); + + log.debug("Metadata partition for newLeaderTopicIdPartition: [{}], is: [{}]", newLeaderTopicIdPartition, leaderMetadataPartition); + log.debug("Metadata partition for newFollowerTopicIdPartition: [{}], is: [{}]", newFollowerTopicIdPartition, followerMetadataPartition); + + long sleepMs = 100L; + long time = System.currentTimeMillis(); + + while (true) { + if (System.currentTimeMillis() - time > timeoutMs) { + throw new TimeoutException("Timed out after " + timeoutMs + "ms "); + } + + if (leaderMetadataPartition == followerMetadataPartition) { + if (topicBasedRlmm().receivedOffsetForPartition(leaderMetadataPartition).orElse(-1L) > 0) { + break; + } + } else { + if (topicBasedRlmm().receivedOffsetForPartition(leaderMetadataPartition).orElse(-1L) > -1 || Review comment: Updated with a comment in the code. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/RemotePartitionMetadataStore.java ########## @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentId; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadataUpdate; +import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteMetadata; +import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteState; +import org.apache.kafka.server.log.remote.storage.RemoteResourceNotFoundException; +import org.apache.kafka.server.log.remote.storage.RemoteStorageException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Collections; +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; + +public class RemotePartitionMetadataStore extends RemotePartitionMetadataEventHandler implements Closeable { Review comment: Updated with a comment. ########## File path: storage/src/test/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManagerHarness.java ########## @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import kafka.api.IntegrationTestHarness; +import kafka.utils.TestUtils; +import org.apache.kafka.clients.CommonClientConfigs; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.config.TopicConfig; +import org.apache.kafka.common.utils.MockTime; +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.common.utils.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.concurrent.TimeoutException; + +import static org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManagerConfig.REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP; +import static org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManagerConfig.REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_PROP; +import static org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManagerConfig.REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP; + +public class TopicBasedRemoteLogMetadataManagerHarness extends IntegrationTestHarness { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManagerHarness.class); + + protected static final int METADATA_TOPIC_PARTITIONS_COUNT = 3; + protected static final int METADATA_TOPIC_REPLICATION_FACTOR = 2; + protected static final long METADATA_TOPIC_RETENTION_MS = 24 * 60 * 60 * 1000L; + + private final Time time = new MockTime(1); + private TopicBasedRemoteLogMetadataManager topicBasedRemoteLogMetadataManager; + + protected Map<String, Object> overrideRemoteLogMetadataManagerProps() { + return Collections.emptyMap(); + } + + public void initialize(Set<TopicIdPartition> topicIdPartitions) { + // Call setup to start the cluster. + super.setUp(); + + // Make sure the remote log metadata topic is created before it is used. + createMetadataTopic(); + + topicBasedRemoteLogMetadataManager = new TopicBasedRemoteLogMetadataManager(time); + + // Initialize TopicBasedRemoteLogMetadataManager. + Map<String, Object> configs = new HashMap<>(); + configs.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, brokerList()); + configs.put("broker.id", 0); + configs.put(REMOTE_LOG_METADATA_TOPIC_PARTITIONS_PROP, METADATA_TOPIC_PARTITIONS_COUNT); + configs.put(REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR_PROP, METADATA_TOPIC_REPLICATION_FACTOR); + configs.put(REMOTE_LOG_METADATA_TOPIC_RETENTION_MILLIS_PROP, METADATA_TOPIC_RETENTION_MS); + + log.debug("TopicBasedRemoteLogMetadataManager configs before adding overridden properties: {}", configs); + // Add override properties. + configs.putAll(overrideRemoteLogMetadataManagerProps()); + log.debug("TopicBasedRemoteLogMetadataManager configs after adding overridden properties: {}", configs); + + topicBasedRemoteLogMetadataManager.configure(configs); + try { + waitUntilInitialized(120_000); Review comment: I guess 60s may be sufficient, updated with that. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManager.java ########## @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.errors.RetriableException; +import org.apache.kafka.common.utils.KafkaThread; +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadataManager; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadataUpdate; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentState; +import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteStorageException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * This is the {@link RemoteLogMetadataManager} implementation with storage as an internal topic with name {@link TopicBasedRemoteLogMetadataManagerConfig#REMOTE_LOG_METADATA_TOPIC_NAME}. + * This is used to publish and fetch {@link RemoteLogMetadata} for the registered user topic partitions with + * {@link #onPartitionLeadershipChanges(Set, Set)}. Each broker will have an instance of this class and it subscribes + * to metadata updates for the registered user topic partitions. + */ +public class TopicBasedRemoteLogMetadataManager implements RemoteLogMetadataManager { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManager.class); + + private volatile boolean configured = false; + + // It indicates whether the close process of this instance is started or not via #close() method. + // Using AtomicBoolean instead of volatile as it may encounter http://findbugs.sourceforge.net/bugDescriptions.html#SP_SPIN_ON_FIELD + // if the field is read but not updated in a spin loop like in #initializeResources() method. + private final AtomicBoolean closing = new AtomicBoolean(false); Review comment: `closing` is accessed in `initializeResources` and we do not need to take a lock there. I would like to keep this as `AtomicBoolean` which addresses that and it is easy to understand the semantics. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManager.java ########## @@ -0,0 +1,402 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.NewTopic; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.config.TopicConfig; +import org.apache.kafka.common.errors.RetriableException; +import org.apache.kafka.common.errors.TopicExistsException; +import org.apache.kafka.common.utils.KafkaThread; +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadataManager; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadataUpdate; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentState; +import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteStorageException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.time.Duration; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * This is the {@link RemoteLogMetadataManager} implementation with storage as an internal topic with name {@link TopicBasedRemoteLogMetadataManagerConfig#REMOTE_LOG_METADATA_TOPIC_NAME}. + * This is used to publish and fetch {@link RemoteLogMetadata} for the registered user topic partitions with + * {@link #onPartitionLeadershipChanges(Set, Set)}. Each broker will have an instance of this class and it subscribes + * to metadata updates for the registered user topic partitions. + */ +public class TopicBasedRemoteLogMetadataManager implements RemoteLogMetadataManager { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManager.class); + private static final long INITIALIZATION_RETRY_INTERVAL_MS = 30000L; + + private volatile boolean configured = false; + + // It indicates whether the close process of this instance is started or not via #close() method. + // Using AtomicBoolean instead of volatile as it may encounter http://findbugs.sourceforge.net/bugDescriptions.html#SP_SPIN_ON_FIELD + // if the field is read but not updated in a spin loop like in #initializeResources() method. + private final AtomicBoolean closing = new AtomicBoolean(false); + private final AtomicBoolean initialized = new AtomicBoolean(false); + private final Time time = Time.SYSTEM; + + private Thread initializationThread; + private volatile ProducerManager producerManager; + private volatile ConsumerManager consumerManager; + + // This allows to gracefully close this instance using {@link #close()} method while there are some pending or new + // requests calling different methods which use the resources like producer/consumer managers. + private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + + private final RemotePartitionMetadataStore remotePartitionMetadataStore = new RemotePartitionMetadataStore(); + private volatile TopicBasedRemoteLogMetadataManagerConfig rlmmConfig; + private volatile RemoteLogMetadataTopicPartitioner rlmmTopicPartitioner; + + @Override + public void addRemoteLogSegmentMetadata(RemoteLogSegmentMetadata remoteLogSegmentMetadata) + throws RemoteStorageException { + Objects.requireNonNull(remoteLogSegmentMetadata, "remoteLogSegmentMetadata can not be null"); + + // This allows gracefully rejecting the requests while closing of this instance is in progress, which triggers + // closing the producer/consumer manager instances. + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + // This method is allowed only to add remote log segment with the initial state(which is RemoteLogSegmentState.COPY_SEGMENT_STARTED) + // but not to update the existing remote log segment metadata. + if (remoteLogSegmentMetadata.state() != RemoteLogSegmentState.COPY_SEGMENT_STARTED) { + throw new IllegalArgumentException( + "Given remoteLogSegmentMetadata should have state as " + RemoteLogSegmentState.COPY_SEGMENT_STARTED + + " but it contains state as: " + remoteLogSegmentMetadata.state()); + } + + // Publish the message to the topic. + doPublishMetadata(remoteLogSegmentMetadata.remoteLogSegmentId().topicIdPartition(), + remoteLogSegmentMetadata); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public void updateRemoteLogSegmentMetadata(RemoteLogSegmentMetadataUpdate segmentMetadataUpdate) + throws RemoteStorageException { + Objects.requireNonNull(segmentMetadataUpdate, "segmentMetadataUpdate can not be null"); + + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + // Callers should use addRemoteLogSegmentMetadata to add RemoteLogSegmentMetadata with state as + // RemoteLogSegmentState.COPY_SEGMENT_STARTED. + if (segmentMetadataUpdate.state() == RemoteLogSegmentState.COPY_SEGMENT_STARTED) { + throw new IllegalArgumentException("Given remoteLogSegmentMetadata should not have the state as: " + + RemoteLogSegmentState.COPY_SEGMENT_STARTED); + } + + // Publish the message to the topic. + doPublishMetadata(segmentMetadataUpdate.remoteLogSegmentId().topicIdPartition(), segmentMetadataUpdate); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public void putRemotePartitionDeleteMetadata(RemotePartitionDeleteMetadata remotePartitionDeleteMetadata) + throws RemoteStorageException { + Objects.requireNonNull(remotePartitionDeleteMetadata, "remotePartitionDeleteMetadata can not be null"); + + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + doPublishMetadata(remotePartitionDeleteMetadata.topicIdPartition(), remotePartitionDeleteMetadata); + } finally { + lock.readLock().unlock(); + } + } + + private void doPublishMetadata(TopicIdPartition topicIdPartition, RemoteLogMetadata remoteLogMetadata) + throws RemoteStorageException { + log.debug("Publishing metadata for partition: [{}] with context: [{}]", topicIdPartition, remoteLogMetadata); + + try { + // Publish the message to the topic. + RecordMetadata recordMetadata = producerManager.publishMessage(remoteLogMetadata); + // Wait until the consumer catches up with this offset. This will ensure read-after-write consistency + // semantics. + consumerManager.waitTillConsumptionCatchesUp(recordMetadata); + } catch (KafkaException e) { + if (e instanceof RetriableException) { + throw e; + } else { + throw new RemoteStorageException(e); + } + } + } + + @Override + public Optional<RemoteLogSegmentMetadata> remoteLogSegmentMetadata(TopicIdPartition topicIdPartition, + int epochForOffset, + long offset) + throws RemoteStorageException { + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + return remotePartitionMetadataStore.remoteLogSegmentMetadata(topicIdPartition, offset, epochForOffset); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public Optional<Long> highestOffsetForEpoch(TopicIdPartition topicIdPartition, + int leaderEpoch) + throws RemoteStorageException { + lock.readLock().lock(); + try { + + ensureInitializedAndNotClosed(); + + return remotePartitionMetadataStore.highestLogOffset(topicIdPartition, leaderEpoch); + } finally { + lock.readLock().unlock(); + } + + } + + @Override + public Iterator<RemoteLogSegmentMetadata> listRemoteLogSegments(TopicIdPartition topicIdPartition) + throws RemoteStorageException { + Objects.requireNonNull(topicIdPartition, "topicIdPartition can not be null"); + + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + return remotePartitionMetadataStore.listRemoteLogSegments(topicIdPartition); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public Iterator<RemoteLogSegmentMetadata> listRemoteLogSegments(TopicIdPartition topicIdPartition, int leaderEpoch) + throws RemoteStorageException { + Objects.requireNonNull(topicIdPartition, "topicIdPartition can not be null"); + + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + return remotePartitionMetadataStore.listRemoteLogSegments(topicIdPartition, leaderEpoch); + } finally { + lock.readLock().unlock(); + } + } + + public int metadataPartition(TopicIdPartition topicIdPartition) { + return rlmmTopicPartitioner.metadataPartition(topicIdPartition); + } + + // Visible For Testing + public Optional<Long> receivedOffsetForPartition(int metadataPartition) { + return consumerManager.receivedOffsetForPartition(metadataPartition); + } + + @Override + public void onPartitionLeadershipChanges(Set<TopicIdPartition> leaderPartitions, Review comment: There can be a race here between this method and initializeResources() which I plan to address in a followup PR. I am working on a separate consumer to act on assigned partitions to cathcup. This race is being handled with those changes., same applies to onStopPartitions method too. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManager.java ########## @@ -0,0 +1,402 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.NewTopic; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.config.TopicConfig; +import org.apache.kafka.common.errors.RetriableException; +import org.apache.kafka.common.errors.TopicExistsException; +import org.apache.kafka.common.utils.KafkaThread; +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadataManager; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadataUpdate; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentState; +import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteStorageException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.time.Duration; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * This is the {@link RemoteLogMetadataManager} implementation with storage as an internal topic with name {@link TopicBasedRemoteLogMetadataManagerConfig#REMOTE_LOG_METADATA_TOPIC_NAME}. + * This is used to publish and fetch {@link RemoteLogMetadata} for the registered user topic partitions with + * {@link #onPartitionLeadershipChanges(Set, Set)}. Each broker will have an instance of this class and it subscribes + * to metadata updates for the registered user topic partitions. + */ +public class TopicBasedRemoteLogMetadataManager implements RemoteLogMetadataManager { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManager.class); + private static final long INITIALIZATION_RETRY_INTERVAL_MS = 30000L; + + private volatile boolean configured = false; + + // It indicates whether the close process of this instance is started or not via #close() method. + // Using AtomicBoolean instead of volatile as it may encounter http://findbugs.sourceforge.net/bugDescriptions.html#SP_SPIN_ON_FIELD + // if the field is read but not updated in a spin loop like in #initializeResources() method. + private final AtomicBoolean closing = new AtomicBoolean(false); + private final AtomicBoolean initialized = new AtomicBoolean(false); + private final Time time = Time.SYSTEM; + + private Thread initializationThread; + private volatile ProducerManager producerManager; + private volatile ConsumerManager consumerManager; + + // This allows to gracefully close this instance using {@link #close()} method while there are some pending or new + // requests calling different methods which use the resources like producer/consumer managers. + private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + + private final RemotePartitionMetadataStore remotePartitionMetadataStore = new RemotePartitionMetadataStore(); + private volatile TopicBasedRemoteLogMetadataManagerConfig rlmmConfig; + private volatile RemoteLogMetadataTopicPartitioner rlmmTopicPartitioner; + + @Override + public void addRemoteLogSegmentMetadata(RemoteLogSegmentMetadata remoteLogSegmentMetadata) + throws RemoteStorageException { + Objects.requireNonNull(remoteLogSegmentMetadata, "remoteLogSegmentMetadata can not be null"); + + // This allows gracefully rejecting the requests while closing of this instance is in progress, which triggers + // closing the producer/consumer manager instances. + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + // This method is allowed only to add remote log segment with the initial state(which is RemoteLogSegmentState.COPY_SEGMENT_STARTED) + // but not to update the existing remote log segment metadata. + if (remoteLogSegmentMetadata.state() != RemoteLogSegmentState.COPY_SEGMENT_STARTED) { + throw new IllegalArgumentException( + "Given remoteLogSegmentMetadata should have state as " + RemoteLogSegmentState.COPY_SEGMENT_STARTED + + " but it contains state as: " + remoteLogSegmentMetadata.state()); + } + + // Publish the message to the topic. + doPublishMetadata(remoteLogSegmentMetadata.remoteLogSegmentId().topicIdPartition(), + remoteLogSegmentMetadata); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public void updateRemoteLogSegmentMetadata(RemoteLogSegmentMetadataUpdate segmentMetadataUpdate) + throws RemoteStorageException { + Objects.requireNonNull(segmentMetadataUpdate, "segmentMetadataUpdate can not be null"); + + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + // Callers should use addRemoteLogSegmentMetadata to add RemoteLogSegmentMetadata with state as + // RemoteLogSegmentState.COPY_SEGMENT_STARTED. + if (segmentMetadataUpdate.state() == RemoteLogSegmentState.COPY_SEGMENT_STARTED) { + throw new IllegalArgumentException("Given remoteLogSegmentMetadata should not have the state as: " + + RemoteLogSegmentState.COPY_SEGMENT_STARTED); + } + + // Publish the message to the topic. + doPublishMetadata(segmentMetadataUpdate.remoteLogSegmentId().topicIdPartition(), segmentMetadataUpdate); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public void putRemotePartitionDeleteMetadata(RemotePartitionDeleteMetadata remotePartitionDeleteMetadata) + throws RemoteStorageException { + Objects.requireNonNull(remotePartitionDeleteMetadata, "remotePartitionDeleteMetadata can not be null"); + + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + doPublishMetadata(remotePartitionDeleteMetadata.topicIdPartition(), remotePartitionDeleteMetadata); + } finally { + lock.readLock().unlock(); + } + } + + private void doPublishMetadata(TopicIdPartition topicIdPartition, RemoteLogMetadata remoteLogMetadata) + throws RemoteStorageException { + log.debug("Publishing metadata for partition: [{}] with context: [{}]", topicIdPartition, remoteLogMetadata); + + try { + // Publish the message to the topic. + RecordMetadata recordMetadata = producerManager.publishMessage(remoteLogMetadata); + // Wait until the consumer catches up with this offset. This will ensure read-after-write consistency + // semantics. + consumerManager.waitTillConsumptionCatchesUp(recordMetadata); + } catch (KafkaException e) { + if (e instanceof RetriableException) { + throw e; + } else { + throw new RemoteStorageException(e); + } + } + } + + @Override + public Optional<RemoteLogSegmentMetadata> remoteLogSegmentMetadata(TopicIdPartition topicIdPartition, + int epochForOffset, + long offset) + throws RemoteStorageException { + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + return remotePartitionMetadataStore.remoteLogSegmentMetadata(topicIdPartition, offset, epochForOffset); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public Optional<Long> highestOffsetForEpoch(TopicIdPartition topicIdPartition, + int leaderEpoch) + throws RemoteStorageException { + lock.readLock().lock(); + try { + + ensureInitializedAndNotClosed(); + + return remotePartitionMetadataStore.highestLogOffset(topicIdPartition, leaderEpoch); + } finally { + lock.readLock().unlock(); + } + + } + + @Override + public Iterator<RemoteLogSegmentMetadata> listRemoteLogSegments(TopicIdPartition topicIdPartition) + throws RemoteStorageException { + Objects.requireNonNull(topicIdPartition, "topicIdPartition can not be null"); + + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + return remotePartitionMetadataStore.listRemoteLogSegments(topicIdPartition); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public Iterator<RemoteLogSegmentMetadata> listRemoteLogSegments(TopicIdPartition topicIdPartition, int leaderEpoch) + throws RemoteStorageException { + Objects.requireNonNull(topicIdPartition, "topicIdPartition can not be null"); + + lock.readLock().lock(); + try { + ensureInitializedAndNotClosed(); + + return remotePartitionMetadataStore.listRemoteLogSegments(topicIdPartition, leaderEpoch); + } finally { + lock.readLock().unlock(); + } + } + + public int metadataPartition(TopicIdPartition topicIdPartition) { + return rlmmTopicPartitioner.metadataPartition(topicIdPartition); + } + + // Visible For Testing + public Optional<Long> receivedOffsetForPartition(int metadataPartition) { + return consumerManager.receivedOffsetForPartition(metadataPartition); + } + + @Override + public void onPartitionLeadershipChanges(Set<TopicIdPartition> leaderPartitions, Review comment: There can be a race here between this method and initializeResources() which I plan to address in a followup PR. I am working on a separate consumer to act on assigned partitions to cathcup. This race is being handled with those changes., same applies to onStopPartitions method too. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org