artemlivshits commented on code in PR #14774:
URL: https://github.com/apache/kafka/pull/14774#discussion_r1414599997
##########
core/src/main/scala/kafka/server/KafkaApis.scala:
##########
@@ -708,23 +708,40 @@ class KafkaApis(val requestChannel: RequestChannel,
}
}
- if (authorizedRequestInfo.isEmpty)
- sendResponseCallback(Map.empty)
- else {
- val internalTopicsAllowed = request.header.clientId ==
AdminUtils.ADMIN_CLIENT_ID
+ val internalTopicsAllowed = request.header.clientId ==
AdminUtils.ADMIN_CLIENT_ID
+ val transactionVerificationEntries = new
ReplicaManager.TransactionVerificationEntries
- // call the replica manager to append messages to the replicas
+ def postVerificationCallback(newRequestLocal: RequestLocal)
Review Comment:
Do we need to change this file? I think we could preserve the abstraction
and keep the verification guts encapsulated in the async call (as it is now),
and only let the group coordinator use the explicit stages.
I.e. replicaManager.appendRecords would do what it does now (under the
covers call the stages), then we don't have to bring the complexity into the
code that works well with the abstraction.
##########
core/src/main/scala/kafka/coordinator/group/GroupMetadataManager.scala:
##########
@@ -349,146 +468,68 @@ class GroupMetadataManager(brokerId: Int,
consumerId: String,
offsetMetadata: immutable.Map[TopicIdPartition,
OffsetAndMetadata],
responseCallback: immutable.Map[TopicIdPartition, Errors]
=> Unit,
- transactionalId: String = null,
producerId: Long = RecordBatch.NO_PRODUCER_ID,
producerEpoch: Short = RecordBatch.NO_PRODUCER_EPOCH,
requestLocal: RequestLocal = RequestLocal.NoCaching): Unit
= {
- // first filter out partitions with offset metadata size exceeding limit
- val filteredOffsetMetadata = offsetMetadata.filter { case (_,
offsetAndMetadata) =>
- validateOffsetMetadataLength(offsetAndMetadata.metadata)
- }
-
group.inLock {
if (!group.hasReceivedConsistentOffsetCommits)
warn(s"group: ${group.groupId} with leader: ${group.leaderOrNull} has
received offset commits from consumers as well " +
s"as transactional producers. Mixing both types of offset commits
will generally result in surprises and " +
s"should be avoided.")
}
- val isTxnOffsetCommit = producerId != RecordBatch.NO_PRODUCER_ID
- // construct the message set to append
+ val filteredOffsetMetadata = offsetMetadata.filter { case (_,
offsetAndMetadata) =>
+ validateOffsetMetadataLength(offsetAndMetadata.metadata)
+ }
if (filteredOffsetMetadata.isEmpty) {
// compute the final error codes for the commit response
val commitStatus = offsetMetadata.map { case (k, _) => k ->
Errors.OFFSET_METADATA_TOO_LARGE }
responseCallback(commitStatus)
- } else {
- getMagic(partitionFor(group.groupId)) match {
- case Some(magicValue) =>
- // We always use CREATE_TIME, like the producer. The conversion to
LOG_APPEND_TIME (if necessary) happens automatically.
- val timestampType = TimestampType.CREATE_TIME
- val timestamp = time.milliseconds()
-
- val records = filteredOffsetMetadata.map { case (topicIdPartition,
offsetAndMetadata) =>
- val key = GroupMetadataManager.offsetCommitKey(group.groupId,
topicIdPartition.topicPartition)
- val value =
GroupMetadataManager.offsetCommitValue(offsetAndMetadata,
interBrokerProtocolVersion)
- new SimpleRecord(timestamp, key, value)
- }
- val offsetTopicPartition = new
TopicPartition(Topic.GROUP_METADATA_TOPIC_NAME, partitionFor(group.groupId))
- val buffer =
ByteBuffer.allocate(AbstractRecords.estimateSizeInBytes(magicValue,
compressionType, records.asJava))
-
- if (isTxnOffsetCommit && magicValue < RecordBatch.MAGIC_VALUE_V2)
- throw Errors.UNSUPPORTED_FOR_MESSAGE_FORMAT.exception("Attempting
to make a transaction offset commit with an invalid magic: " + magicValue)
-
- val builder = MemoryRecords.builder(buffer, magicValue,
compressionType, timestampType, 0L, time.milliseconds(),
- producerId, producerEpoch, 0, isTxnOffsetCommit,
RecordBatch.NO_PARTITION_LEADER_EPOCH)
-
- records.foreach(builder.append)
- val entries = Map(offsetTopicPartition -> builder.build())
-
- // set the callback function to insert offsets into cache after log
append completed
- def putCacheCallback(responseStatus: Map[TopicPartition,
PartitionResponse]): Unit = {
- // the append response should only contain the topics partition
- if (responseStatus.size != 1 ||
!responseStatus.contains(offsetTopicPartition))
- throw new IllegalStateException("Append status %s should only
have one partition %s"
- .format(responseStatus, offsetTopicPartition))
-
- // construct the commit response status and insert
- // the offset and metadata to cache if the append status has no
error
- val status = responseStatus(offsetTopicPartition)
-
- val responseError = group.inLock {
- if (status.error == Errors.NONE) {
- if (!group.is(Dead)) {
- filteredOffsetMetadata.forKeyValue { (topicIdPartition,
offsetAndMetadata) =>
- if (isTxnOffsetCommit)
- group.onTxnOffsetCommitAppend(producerId,
topicIdPartition, CommitRecordMetadataAndOffset(Some(status.baseOffset),
offsetAndMetadata))
- else
- group.onOffsetCommitAppend(topicIdPartition,
CommitRecordMetadataAndOffset(Some(status.baseOffset), offsetAndMetadata))
- }
- }
-
- // Record the number of offsets committed to the log
- offsetCommitsSensor.record(records.size)
-
- Errors.NONE
- } else {
- if (!group.is(Dead)) {
- if (!group.hasPendingOffsetCommitsFromProducer(producerId))
- removeProducerGroup(producerId, group.groupId)
- filteredOffsetMetadata.forKeyValue { (topicIdPartition,
offsetAndMetadata) =>
- if (isTxnOffsetCommit)
- group.failPendingTxnOffsetCommit(producerId,
topicIdPartition)
- else
- group.failPendingOffsetWrite(topicIdPartition,
offsetAndMetadata)
- }
- }
-
- debug(s"Offset commit $filteredOffsetMetadata from group
${group.groupId}, consumer $consumerId " +
- s"with generation ${group.generationId} failed when
appending to log due to ${status.error.exceptionName}")
-
- // transform the log append error code to the corresponding
the commit status error code
- status.error match {
- case Errors.UNKNOWN_TOPIC_OR_PARTITION
- | Errors.NOT_ENOUGH_REPLICAS
- | Errors.NOT_ENOUGH_REPLICAS_AFTER_APPEND =>
- Errors.COORDINATOR_NOT_AVAILABLE
-
- case Errors.NOT_LEADER_OR_FOLLOWER
- | Errors.KAFKA_STORAGE_ERROR =>
- Errors.NOT_COORDINATOR
-
- case Errors.MESSAGE_TOO_LARGE
- | Errors.RECORD_LIST_TOO_LARGE
- | Errors.INVALID_FETCH_SIZE =>
- Errors.INVALID_COMMIT_OFFSET_SIZE
+ return
+ }
- case other => other
- }
- }
- }
+ val magicOpt = getMagic(partitionFor(group.groupId))
+ if (magicOpt.isEmpty) {
+ val commitStatus = offsetMetadata.map { case (topicIdPartition, _) =>
+ (topicIdPartition, Errors.NOT_COORDINATOR)
+ }
+ responseCallback(commitStatus)
+ return
+ }
- // compute the final error codes for the commit response
- val commitStatus = offsetMetadata.map { case (topicIdPartition,
offsetAndMetadata) =>
- if (validateOffsetMetadataLength(offsetAndMetadata.metadata))
- (topicIdPartition, responseError)
- else
- (topicIdPartition, Errors.OFFSET_METADATA_TOO_LARGE)
- }
+ val isTxnOffsetCommit = producerId != RecordBatch.NO_PRODUCER_ID
+ val records = generateOffsetRecords(magicOpt.get, isTxnOffsetCommit,
group.groupId, filteredOffsetMetadata, producerId, producerEpoch)
+ val putCacheCallback = createPutCacheCallback(isTxnOffsetCommit, group,
consumerId, offsetMetadata, filteredOffsetMetadata, responseCallback,
producerId, records)
- // finally trigger the callback logic passed from the API layer
- responseCallback(commitStatus)
- }
+ group.inLock {
+ group.prepareOffsetCommit(offsetMetadata)
+ }
- if (isTxnOffsetCommit) {
- group.inLock {
- addProducerGroup(producerId, group.groupId)
- group.prepareTxnOffsetCommit(producerId, offsetMetadata)
- }
- } else {
- group.inLock {
- group.prepareOffsetCommit(offsetMetadata)
- }
- }
+ appendForGroup(group, records, requestLocal, putCacheCallback)
+ }
- appendForGroup(group, entries, requestLocal, putCacheCallback,
transactionalId)
+ def storeOffsetsAfterVerification(group: GroupMetadata,
+ verifiedOffsetMetadata:
immutable.Map[TopicIdPartition, OffsetAndMetadata],
+ records: Map[TopicPartition,
MemoryRecords],
+ putCacheCallback: Map[TopicPartition,
PartitionResponse] => Unit,
+ producerId: Long,
+ transactionVerificationEntries:
TransactionVerificationEntries,
+ errorResults: Map[TopicPartition,
LogAppendResult],
+ requestLocal: RequestLocal =
RequestLocal.NoCaching): Unit = {
+ group.inLock {
Review Comment:
Wouldn't this whole function have to be called under group lock in order for
append to happen partially under the lock (sort of the reason for this change)?
If so let's add a comment that explains the precondition and remove extra lock
taking. Re-taking the locks when not necessary makes it hard to track lock
scopes.
##########
core/src/main/scala/kafka/server/ReplicaManager.scala:
##########
@@ -941,39 +857,129 @@ class ReplicaManager(val config: KafkaConfig,
}
}
- private def partitionEntriesForVerification(verificationGuards:
mutable.Map[TopicPartition, VerificationGuard],
- entriesPerPartition:
Map[TopicPartition, MemoryRecords],
- verifiedEntries:
mutable.Map[TopicPartition, MemoryRecords],
- unverifiedEntries:
mutable.Map[TopicPartition, MemoryRecords],
- errorEntries:
mutable.Map[TopicPartition, Errors]): Unit= {
+ private def sendInvalidRequiredAcksResponse(entries: Map[TopicPartition,
MemoryRecords],
+ responseCallback:
Map[TopicPartition, PartitionResponse] => Unit): Unit = {
+ // If required.acks is outside accepted range, something is wrong with the
client
+ // Just return an error and don't handle the request at all
+ val responseStatus = entries.map { case (topicPartition, _) =>
+ topicPartition -> new PartitionResponse(
+ Errors.INVALID_REQUIRED_ACKS,
+ LogAppendInfo.UNKNOWN_LOG_APPEND_INFO.firstOffset,
+ RecordBatch.NO_TIMESTAMP,
+ LogAppendInfo.UNKNOWN_LOG_APPEND_INFO.logStartOffset
+ )
+ }
+ responseCallback(responseStatus)
+ }
+
+ /**
+ * Apply the postVerificationCallback asynchronously only after verifying
the partitions have been added to the transaction.
+ * The postVerificationCallback takes the arguments of the requestLocal for
the thread that will be doing the append as
+ * well as a mapping of topic partitions to LogAppendResult for the
partitions that saw errors when verifying.
+ *
+ * This method will start the verification process for all the
topicPartitions in entriesPerPartition and supply the
+ * postVerificationCallback to be run on a request handler thread when the
response is received.
+ *
+ * @param entriesPerPartition the records per partition to be
appended and therefore need verification
+ * @param transactionVerificationEntries the object that will store the
entries to verify, the errors, and the verification guards
+ * @param transactionalId the id for the transaction
+ * @param requestLocal container for the stateful
instances scoped to this request -- this must correspond to the
+ * thread calling this method
+ * @param postVerificationCallback the method to be called when
verification completes and the verification errors
+ * and the thread's RequestLocal are
supplied
+ */
+ def appendRecordsWithTransactionVerification(entriesPerPartition:
Map[TopicPartition, MemoryRecords],
+ transactionVerificationEntries:
TransactionVerificationEntries,
+ transactionalId: String,
+ requestLocal: RequestLocal,
+ postVerificationCallback:
RequestLocal => Map[TopicPartition, LogAppendResult] => Unit): Unit = {
+ if (transactionalId != null &&
config.transactionPartitionVerificationEnable &&
addPartitionsToTxnManager.isDefined)
+ partitionEntriesForVerification(transactionVerificationEntries,
entriesPerPartition)
+
+ val onVerificationComplete: (RequestLocal, Map[TopicPartition, Errors]) =>
Unit =
+ executePostVerificationCallback(
+ transactionVerificationEntries,
+ postVerificationCallback,
+ )
+
+ if (transactionVerificationEntries.unverified.isEmpty) {
+ onVerificationComplete(requestLocal,
transactionVerificationEntries.errors.toMap)
+ } else {
+ // For unverified entries, send a request to verify. When verified, the
append process will proceed via the callback.
+ // We verify above that all partitions use the same producer ID.
+ val batchInfo =
transactionVerificationEntries.unverified.head._2.firstBatch()
+ addPartitionsToTxnManager.foreach(_.verifyTransaction(
+ transactionalId = transactionalId,
+ producerId = batchInfo.producerId,
+ producerEpoch = batchInfo.producerEpoch,
+ topicPartitions =
transactionVerificationEntries.unverified.keySet.toSeq,
+ callback =
KafkaRequestHandler.wrapAsyncCallback(onVerificationComplete, requestLocal)
+ ))
+ }
+ }
+
+ /**
+ * A helper method to compile the results from the transaction verification
and call the postVerificationCallback.
+ *
+ * @param transactionVerificationEntries the object that will store the
entries to verify, the errors, and the verification guards
+ * @param postVerificationCallback the method to be called when
verification completes and the verification errors
+ * and the thread's RequestLocal are
supplied
+ * @param requestLocal container for the stateful
instances scoped to this request -- this must correspond to the
+ * thread calling this method
+ *
+ */
+ private def executePostVerificationCallback(transactionVerificationEntries:
TransactionVerificationEntries,
+
postVerificationCallback: RequestLocal => Map[TopicPartition, LogAppendResult]
=> Unit)
+ (requestLocal:
RequestLocal, unverifiedEntries: Map[TopicPartition, Errors]): Unit = {
+ val errorResults = (unverifiedEntries ++
transactionVerificationEntries.errors).map {
+ case (topicPartition, error) =>
+ // translate transaction coordinator errors to known producer response
errors
+ val customException =
+ error match {
+ case Errors.INVALID_TXN_STATE => Some(error.exception("Partition
was not added to the transaction"))
+ case Errors.CONCURRENT_TRANSACTIONS |
+ Errors.COORDINATOR_LOAD_IN_PROGRESS |
+ Errors.COORDINATOR_NOT_AVAILABLE |
+ Errors.NOT_COORDINATOR => Some(new NotEnoughReplicasException(
+ s"Unable to verify the partition has been added to the
transaction. Underlying error: ${error.toString}"))
+ case _ => None
+ }
+ topicPartition -> LogAppendResult(
+ LogAppendInfo.UNKNOWN_LOG_APPEND_INFO,
+ Some(customException.getOrElse(error.exception)),
+ hasCustomErrorMessage = customException.isDefined
+ )
+ }
+ postVerificationCallback(requestLocal)(errorResults)
+ }
+
+ private def partitionEntriesForVerification(transactionVerificationEntries:
TransactionVerificationEntries, entriesPerPartition: Map[TopicPartition,
MemoryRecords]): TransactionVerificationEntries = {
Review Comment:
Do we use the result of this function?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]