hachikuji commented on a change in pull request #8850:
URL: https://github.com/apache/kafka/pull/8850#discussion_r438559177



##########
File path: core/src/main/scala/kafka/log/Log.scala
##########
@@ -1784,8 +1784,18 @@ class Log(@volatile private var _dir: File,
   private def deleteRetentionMsBreachedSegments(): Int = {
     if (config.retentionMs < 0) return 0
     val startMs = time.milliseconds
-    deleteOldSegments((segment, _) => startMs - segment.largestTimestamp > 
config.retentionMs,
-      reason = s"retention time ${config.retentionMs}ms breach")
+
+    def shouldDelete(segment: LogSegment, nextSegmentOpt: Option[LogSegment]) 
= {
+      if (startMs - segment.largestTimestamp > config.retentionMs) {
+        info(s"Segment with base offset ${segment.baseOffset} will be deleted 
due to" +

Review comment:
       `LogSegment.largestTimestamp` may refer to either the largest record 
timestamp for newer formats or the last modified time of the segment for older 
formats. I think it would be helpful if the log message indicated which case it 
is. Perhaps we could add a method like this to `LogSegment`?
   ```scala
   def largestRecordTimestamp: Option[Long]
   ```

##########
File path: core/src/main/scala/kafka/log/Log.scala
##########
@@ -1804,8 +1816,15 @@ class Log(@volatile private var _dir: File,
   }
 
   private def deleteLogStartOffsetBreachedSegments(): Int = {
-    def shouldDelete(segment: LogSegment, nextSegmentOpt: Option[LogSegment]) =
-      nextSegmentOpt.exists(_.baseOffset <= logStartOffset)
+    def shouldDelete(segment: LogSegment, nextSegmentOpt: Option[LogSegment]) 
= {
+      if (nextSegmentOpt.exists(_.baseOffset <= logStartOffset)) {
+        info (s"Segment with base offset ${segment.baseOffset} will be deleted 
due to" +

Review comment:
       nit: space after `info`

##########
File path: core/src/main/scala/kafka/log/Log.scala
##########
@@ -1784,8 +1784,18 @@ class Log(@volatile private var _dir: File,
   private def deleteRetentionMsBreachedSegments(): Int = {
     if (config.retentionMs < 0) return 0
     val startMs = time.milliseconds
-    deleteOldSegments((segment, _) => startMs - segment.largestTimestamp > 
config.retentionMs,
-      reason = s"retention time ${config.retentionMs}ms breach")
+
+    def shouldDelete(segment: LogSegment, nextSegmentOpt: Option[LogSegment]) 
= {
+      if (startMs - segment.largestTimestamp > config.retentionMs) {
+        info(s"Segment with base offset ${segment.baseOffset} will be deleted 
due to" +
+          s" retentionMs breach. Largest timestamp of segment is 
${segment.largestTimestamp}")
+        true
+      } else {
+        false
+      }
+    }
+
+    deleteOldSegments(shouldDelete, reason = s"retention time 
${config.retentionMs}ms breach")

Review comment:
       With the logging we have above, do you think we still need the message 
in `deleteOldSegments`? Perhaps we could make it more concise at least. Maybe 
just mention the number of segments to be deleted for example.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to