errose28 commented on code in PR #7732: URL: https://github.com/apache/ozone/pull/7732#discussion_r1927649216
########## hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReconcileContainerCommandHandler.java: ########## @@ -141,7 +144,8 @@ public void testReconcileContainerCommandReports(ContainerLayoutVersion layout) */ @ContainerLayoutTestInfo.ContainerTest public void testReconcileContainerCommandMetrics(ContainerLayoutVersion layout) throws Exception { - init(layout, c -> { }); + init(layout, c -> { Review Comment: nit. we can remove the whitespace change here and on line 95 ########## hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReconcileContainerCommandHandler.java: ########## @@ -150,7 +154,17 @@ public void testReconcileContainerCommandMetrics(ContainerLayoutVersion layout) ReconcileContainerCommand cmd = new ReconcileContainerCommand(id, Collections.emptySet()); subject.handle(cmd, ozoneContainer, context, null); Review Comment: I can't comment on the line directly but 152 has a comment from an older implementation we can remove here. I forgot to remove it in #7076 when we initially moved the timing handling to the supervisor ########## hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java: ########## @@ -513,6 +522,54 @@ public void testMultipleReplication(ContainerLayoutVersion layout, } } + @ContainerLayoutTestInfo.ContainerTest + public void testReconciliationTaskMetrics(ContainerLayoutVersion layout) throws IOException { + this.layoutVersion = layout; + // GIVEN + ReplicationSupervisor replicationSupervisor = + supervisorWithReplicator(FakeReplicator::new); + ReplicationSupervisorMetrics replicationMetrics = + ReplicationSupervisorMetrics.create(replicationSupervisor); + + try { + //WHEN + replicationSupervisor.addTask(createReconciliationTask(1L)); + replicationSupervisor.addTask(createReconciliationTask(2L)); + + ReconcileContainerTask reconciliationTask = createReconciliationTask(6L); + clock.fastForward(15000); + replicationSupervisor.addTask(reconciliationTask); + doThrow(IOException.class).when(mockController).reconcileContainer(any(), anyLong(), any()); + replicationSupervisor.addTask(createReconciliationTask(7L)); + + //THEN + assertEquals(2, replicationSupervisor.getReplicationSuccessCount()); + + assertEquals(2, replicationSupervisor.getReplicationSuccessCount( + reconciliationTask.getMetricName())); + assertEquals(1, replicationSupervisor.getReplicationFailureCount()); + assertEquals(1, replicationSupervisor.getReplicationFailureCount( + reconciliationTask.getMetricName())); + assertEquals(1, replicationSupervisor.getReplicationTimeoutCount()); + assertEquals(1, replicationSupervisor.getReplicationTimeoutCount( + reconciliationTask.getMetricName())); + assertEquals(4, replicationSupervisor.getReplicationRequestCount()); + assertEquals(4, replicationSupervisor.getReplicationRequestCount( + reconciliationTask.getMetricName())); + + + assertTrue(replicationSupervisor.getReplicationRequestTotalTime( Review Comment: Let's check `getReplicationRequestAvgTime` here too ########## hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconcileContainerCommandHandler.java: ########## @@ -28,28 +28,28 @@ import org.apache.hadoop.ozone.protocol.commands.ReconcileContainerCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; -import java.util.concurrent.atomic.AtomicLong; - /** * Handles commands from SCM to reconcile a container replica on this datanode with the replicas on its peers. */ public class ReconcileContainerCommandHandler implements CommandHandler { private final ReplicationSupervisor supervisor; - private final AtomicLong invocationCount; private final DNContainerOperationClient dnClient; + private String metricsName; public ReconcileContainerCommandHandler(ReplicationSupervisor supervisor, DNContainerOperationClient dnClient) { this.supervisor = supervisor; this.dnClient = dnClient; - this.invocationCount = new AtomicLong(0); } @Override public void handle(SCMCommand command, OzoneContainer container, StateContext context, SCMConnectionManager connectionManager) { - invocationCount.incrementAndGet(); ReconcileContainerCommand reconcileCommand = (ReconcileContainerCommand) command; - supervisor.addTask(new ReconcileContainerTask(container.getController(), dnClient, reconcileCommand)); + ReconcileContainerTask task = new ReconcileContainerTask(container.getController(), dnClient, reconcileCommand); + if (metricsName == null) { Review Comment: Just a note that I filed HDDS-12133 to improve this in general for commands going through the replication supervisor. For now we can leave it as is since it is consistent with other replication type commands. ########## hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java: ########## @@ -1449,6 +1449,10 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container<?> Set<DatanodeDetails> peers) throws IOException { // TODO Just a deterministic placeholder hash for testing until actual implementation is finished. ContainerData data = container.getContainerData(); + if (container.getContainerState() == State.DELETED) { Review Comment: I think this was intended for #7474 ########## hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java: ########## @@ -146,6 +146,15 @@ public static void logRecovered(ContainerData containerData) { LOG.info(getMessage(containerData)); } + /** + * Logged when a container is reconciled. + * + * @param containerData The container that was reconciled on this datanode. + */ + public static void logContainerReconciled(ContainerData containerData) { + LOG.info(getMessage(containerData, "Container reconciled")); + } Review Comment: In `getMessage`, let's start logging the data checksum for all containers. For the reconcile message, let's note the old and new checksum before and after reconciliation. These can passed as parameters to `logReconciled` ```suggestion public static void logReconciled(ContainerData containerData, long oldDataChecksum, long newDataChecksum) { LOG.info(getMessage(containerData, "Container reconciled. Old checksum is " ...)); // Format longs as hex strings and concatenate to the message. We want to make sure this format matches what comes out of `ozone admin container info` } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@ozone.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@ozone.apache.org For additional commands, e-mail: issues-h...@ozone.apache.org