satishd commented on a change in pull request #10579: URL: https://github.com/apache/kafka/pull/10579#discussion_r642095239
########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManager.java ########## @@ -0,0 +1,346 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.errors.RetriableException; +import org.apache.kafka.common.utils.KafkaThread; +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadataManager; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadataUpdate; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentState; +import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteStorageException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * This is the {@link RemoteLogMetadataManager} implementation with storage as an internal topic with name {@link TopicBasedRemoteLogMetadataManagerConfig#REMOTE_LOG_METADATA_TOPIC_NAME}. + * This is used to publish and fetch {@link RemoteLogMetadata} for the registered user topic partitions with + * {@link #onPartitionLeadershipChanges(Set, Set)}. Each broker will have an instance of this class and it subscribes + * to metadata updates for the registered user topic partitions. + */ +public class TopicBasedRemoteLogMetadataManager implements RemoteLogMetadataManager { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManager.class); + + // Take these as configs with the respective default values. + private static final long INITIALIZATION_RETRY_INTERVAL_MS = 30_000L; + + private volatile boolean configured = false; + + // Using AtomicBoolean instead of volatile as it may encounter http://findbugs.sourceforge.net/bugDescriptions.html#SP_SPIN_ON_FIELD + // if the field is read but not updated in a spin loop like in #initializeResources() method. + private final AtomicBoolean close = new AtomicBoolean(false); + private final AtomicBoolean initialized = new AtomicBoolean(false); + + private Thread initializationThread; + private Time time = Time.SYSTEM; + private ProducerManager producerManager; + private ConsumerManager consumerManager; + + // This allows to gracefully close this instance using {@link #close()} method while there are some pending or new + // requests calling different methods which use the resources like producer/consumer managers. + private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + + private final RemotePartitionMetadataStore remotePartitionMetadataStore = new RemotePartitionMetadataStore(); + private volatile TopicBasedRemoteLogMetadataManagerConfig rlmmConfig; + private RemoteLogMetadataTopicPartitioner rlmmTopicPartitioner; + + @Override + public void addRemoteLogSegmentMetadata(RemoteLogSegmentMetadata remoteLogSegmentMetadata) + throws RemoteStorageException { + Objects.requireNonNull(remoteLogSegmentMetadata, "remoteLogSegmentMetadata can not be null"); + + ensureInitializedAndNotClosed(); + + // This allows gracefully rejecting the requests while closing of this instance is in progress, which triggers + // closing the producer/consumer manager instances. + lock.readLock().lock(); + try { + + // This method is allowed only to add remote log segment with the initial state(which is RemoteLogSegmentState.COPY_SEGMENT_STARTED) + // but not to update the existing remote log segment metadata. + if (remoteLogSegmentMetadata.state() != RemoteLogSegmentState.COPY_SEGMENT_STARTED) { + throw new IllegalArgumentException( + "Given remoteLogSegmentMetadata should have state as " + RemoteLogSegmentState.COPY_SEGMENT_STARTED + + " but it contains state as: " + remoteLogSegmentMetadata.state()); + } + + // Publish the message to the topic. + doPublishMetadata(remoteLogSegmentMetadata.remoteLogSegmentId().topicIdPartition(), + remoteLogSegmentMetadata); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public void updateRemoteLogSegmentMetadata(RemoteLogSegmentMetadataUpdate segmentMetadataUpdate) + throws RemoteStorageException { + Objects.requireNonNull(segmentMetadataUpdate, "segmentMetadataUpdate can not be null"); + + ensureInitializedAndNotClosed(); + + lock.readLock().lock(); + try { + // Callers should use addRemoteLogSegmentMetadata to add RemoteLogSegmentMetadata with state as + // RemoteLogSegmentState.COPY_SEGMENT_STARTED. + if (segmentMetadataUpdate.state() == RemoteLogSegmentState.COPY_SEGMENT_STARTED) { + throw new IllegalArgumentException("Given remoteLogSegmentMetadata should not have the state as: " + + RemoteLogSegmentState.COPY_SEGMENT_STARTED); + } + + // Publish the message to the topic. + doPublishMetadata(segmentMetadataUpdate.remoteLogSegmentId().topicIdPartition(), segmentMetadataUpdate); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public void putRemotePartitionDeleteMetadata(RemotePartitionDeleteMetadata remotePartitionDeleteMetadata) + throws RemoteStorageException { + Objects.requireNonNull(remotePartitionDeleteMetadata, "remotePartitionDeleteMetadata can not be null"); + + ensureInitializedAndNotClosed(); + + lock.readLock().lock(); + try { + + doPublishMetadata(remotePartitionDeleteMetadata.topicIdPartition(), remotePartitionDeleteMetadata); + } finally { + lock.readLock().unlock(); + } + } + + private void doPublishMetadata(TopicIdPartition topicIdPartition, RemoteLogMetadata remoteLogMetadata) + throws RemoteStorageException { + log.debug("Publishing metadata for partition: [{}] with context: [{}]", topicIdPartition, remoteLogMetadata); + ensureInitializedAndNotClosed(); Review comment: Removed the check as you suggsted. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManager.java ########## @@ -0,0 +1,346 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.errors.RetriableException; +import org.apache.kafka.common.utils.KafkaThread; +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadataManager; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadataUpdate; +import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentState; +import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteMetadata; +import org.apache.kafka.server.log.remote.storage.RemoteStorageException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * This is the {@link RemoteLogMetadataManager} implementation with storage as an internal topic with name {@link TopicBasedRemoteLogMetadataManagerConfig#REMOTE_LOG_METADATA_TOPIC_NAME}. + * This is used to publish and fetch {@link RemoteLogMetadata} for the registered user topic partitions with + * {@link #onPartitionLeadershipChanges(Set, Set)}. Each broker will have an instance of this class and it subscribes + * to metadata updates for the registered user topic partitions. + */ +public class TopicBasedRemoteLogMetadataManager implements RemoteLogMetadataManager { + private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManager.class); + + // Take these as configs with the respective default values. + private static final long INITIALIZATION_RETRY_INTERVAL_MS = 30_000L; + + private volatile boolean configured = false; + + // Using AtomicBoolean instead of volatile as it may encounter http://findbugs.sourceforge.net/bugDescriptions.html#SP_SPIN_ON_FIELD + // if the field is read but not updated in a spin loop like in #initializeResources() method. + private final AtomicBoolean close = new AtomicBoolean(false); + private final AtomicBoolean initialized = new AtomicBoolean(false); + + private Thread initializationThread; + private Time time = Time.SYSTEM; + private ProducerManager producerManager; + private ConsumerManager consumerManager; + + // This allows to gracefully close this instance using {@link #close()} method while there are some pending or new + // requests calling different methods which use the resources like producer/consumer managers. + private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + + private final RemotePartitionMetadataStore remotePartitionMetadataStore = new RemotePartitionMetadataStore(); + private volatile TopicBasedRemoteLogMetadataManagerConfig rlmmConfig; + private RemoteLogMetadataTopicPartitioner rlmmTopicPartitioner; + + @Override + public void addRemoteLogSegmentMetadata(RemoteLogSegmentMetadata remoteLogSegmentMetadata) + throws RemoteStorageException { + Objects.requireNonNull(remoteLogSegmentMetadata, "remoteLogSegmentMetadata can not be null"); + + ensureInitializedAndNotClosed(); + + // This allows gracefully rejecting the requests while closing of this instance is in progress, which triggers + // closing the producer/consumer manager instances. + lock.readLock().lock(); + try { + + // This method is allowed only to add remote log segment with the initial state(which is RemoteLogSegmentState.COPY_SEGMENT_STARTED) + // but not to update the existing remote log segment metadata. + if (remoteLogSegmentMetadata.state() != RemoteLogSegmentState.COPY_SEGMENT_STARTED) { + throw new IllegalArgumentException( + "Given remoteLogSegmentMetadata should have state as " + RemoteLogSegmentState.COPY_SEGMENT_STARTED + + " but it contains state as: " + remoteLogSegmentMetadata.state()); + } + + // Publish the message to the topic. + doPublishMetadata(remoteLogSegmentMetadata.remoteLogSegmentId().topicIdPartition(), + remoteLogSegmentMetadata); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public void updateRemoteLogSegmentMetadata(RemoteLogSegmentMetadataUpdate segmentMetadataUpdate) + throws RemoteStorageException { + Objects.requireNonNull(segmentMetadataUpdate, "segmentMetadataUpdate can not be null"); + + ensureInitializedAndNotClosed(); + + lock.readLock().lock(); + try { + // Callers should use addRemoteLogSegmentMetadata to add RemoteLogSegmentMetadata with state as + // RemoteLogSegmentState.COPY_SEGMENT_STARTED. + if (segmentMetadataUpdate.state() == RemoteLogSegmentState.COPY_SEGMENT_STARTED) { + throw new IllegalArgumentException("Given remoteLogSegmentMetadata should not have the state as: " + + RemoteLogSegmentState.COPY_SEGMENT_STARTED); + } + + // Publish the message to the topic. + doPublishMetadata(segmentMetadataUpdate.remoteLogSegmentId().topicIdPartition(), segmentMetadataUpdate); + } finally { + lock.readLock().unlock(); + } + } + + @Override + public void putRemotePartitionDeleteMetadata(RemotePartitionDeleteMetadata remotePartitionDeleteMetadata) + throws RemoteStorageException { + Objects.requireNonNull(remotePartitionDeleteMetadata, "remotePartitionDeleteMetadata can not be null"); + + ensureInitializedAndNotClosed(); + + lock.readLock().lock(); + try { + + doPublishMetadata(remotePartitionDeleteMetadata.topicIdPartition(), remotePartitionDeleteMetadata); + } finally { + lock.readLock().unlock(); + } + } + + private void doPublishMetadata(TopicIdPartition topicIdPartition, RemoteLogMetadata remoteLogMetadata) + throws RemoteStorageException { + log.debug("Publishing metadata for partition: [{}] with context: [{}]", topicIdPartition, remoteLogMetadata); + ensureInitializedAndNotClosed(); + + try { + // Publish the message to the topic. + RecordMetadata recordMetadata = producerManager.publishMessage(topicIdPartition, + remoteLogMetadata); + // Wait until the consumer catches up with this offset. This will ensure read-after-write consistency + // semantics. + consumerManager.waitTillConsumptionCatchesUp(recordMetadata); Review comment: This method is invoked from multiple threads for different topic partitions. This will not be a bottleneck as each partition's segments will be uploaded in a sequential manner. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/ProducerManager.java ########## @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.server.log.remote.metadata.storage.serialization.RemoteLogMetadataSerde; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.time.Duration; + +/** + * This class is responsible for publishing messages into the remote log metadata topic partitions. + */ +public class ProducerManager implements Closeable { + private static final Logger log = LoggerFactory.getLogger(ProducerManager.class); + + private final RemoteLogMetadataSerde serde = new RemoteLogMetadataSerde(); + private final KafkaProducer<byte[], byte[]> producer; + private final RemoteLogMetadataTopicPartitioner topicPartitioner; + private final TopicBasedRemoteLogMetadataManagerConfig rlmmConfig; + + private volatile boolean close = false; + + public ProducerManager(TopicBasedRemoteLogMetadataManagerConfig rlmmConfig, + RemoteLogMetadataTopicPartitioner rlmmTopicPartitioner) { + this.rlmmConfig = rlmmConfig; + this.producer = new KafkaProducer<>(rlmmConfig.producerProperties()); + topicPartitioner = rlmmTopicPartitioner; + } + + public RecordMetadata publishMessage(TopicIdPartition topicIdPartition, + RemoteLogMetadata remoteLogMetadataContext) throws KafkaException { + ensureNotClosed(); + + int metadataPartitionNo = topicPartitioner.metadataPartition(topicIdPartition); + log.debug("Publishing metadata message of partition:[{}] into metadata topic partition:[{}] with payload: [{}]", + topicIdPartition, metadataPartitionNo, remoteLogMetadataContext); + + ProducerCallback callback = new ProducerCallback(); + try { + if (metadataPartitionNo >= rlmmConfig.metadataTopicPartitionsCount()) { + // This should never occur as long as metadata partitions always remain the same. + throw new KafkaException("Chosen partition no " + metadataPartitionNo + + " is more than the partition count: " + rlmmConfig.metadataTopicPartitionsCount()); + } + producer.send(new ProducerRecord<>(rlmmConfig.remoteLogMetadataTopicName(), metadataPartitionNo, null, + serde.serialize(remoteLogMetadataContext)), callback).get(); + } catch (KafkaException e) { + throw e; + } catch (Exception e) { + throw new KafkaException("Exception occurred while publishing message for topicIdPartition: " + topicIdPartition, e); + } + + if (callback.exception() != null) { + Exception ex = callback.exception(); + if (ex instanceof KafkaException) { + throw (KafkaException) ex; Review comment: It is needed as the method is declared with throws KafkaException. I am also fine with removing it as it is a RTE. ########## File path: storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/ProducerManager.java ########## @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.log.remote.metadata.storage; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.server.log.remote.metadata.storage.serialization.RemoteLogMetadataSerde; +import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.time.Duration; + +/** + * This class is responsible for publishing messages into the remote log metadata topic partitions. + */ +public class ProducerManager implements Closeable { + private static final Logger log = LoggerFactory.getLogger(ProducerManager.class); + + private final RemoteLogMetadataSerde serde = new RemoteLogMetadataSerde(); + private final KafkaProducer<byte[], byte[]> producer; + private final RemoteLogMetadataTopicPartitioner topicPartitioner; + private final TopicBasedRemoteLogMetadataManagerConfig rlmmConfig; + + private volatile boolean close = false; + + public ProducerManager(TopicBasedRemoteLogMetadataManagerConfig rlmmConfig, + RemoteLogMetadataTopicPartitioner rlmmTopicPartitioner) { + this.rlmmConfig = rlmmConfig; + this.producer = new KafkaProducer<>(rlmmConfig.producerProperties()); + topicPartitioner = rlmmTopicPartitioner; + } + + public RecordMetadata publishMessage(TopicIdPartition topicIdPartition, + RemoteLogMetadata remoteLogMetadataContext) throws KafkaException { + ensureNotClosed(); + + int metadataPartitionNo = topicPartitioner.metadataPartition(topicIdPartition); + log.debug("Publishing metadata message of partition:[{}] into metadata topic partition:[{}] with payload: [{}]", + topicIdPartition, metadataPartitionNo, remoteLogMetadataContext); + + ProducerCallback callback = new ProducerCallback(); + try { + if (metadataPartitionNo >= rlmmConfig.metadataTopicPartitionsCount()) { + // This should never occur as long as metadata partitions always remain the same. + throw new KafkaException("Chosen partition no " + metadataPartitionNo + + " is more than the partition count: " + rlmmConfig.metadataTopicPartitionsCount()); + } + producer.send(new ProducerRecord<>(rlmmConfig.remoteLogMetadataTopicName(), metadataPartitionNo, null, + serde.serialize(remoteLogMetadataContext)), callback).get(); Review comment: Multiple events are published by multiple threads and batching will occur in the producer. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org