ashishkumar50 commented on code in PR #8264:
URL: https://github.com/apache/ozone/pull/8264#discussion_r2053803258
##########
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/containerlog/parser/ContainerDatanodeDatabase.java:
##########
@@ -233,5 +239,184 @@ private void dropTable(String tableName, Statement stmt)
throws SQLException {
stmt.executeUpdate(dropTableSQL);
}
+ /**
+ * Displays detailed information about a container based on its ID,
including its state, BCSID,
+ * timestamp, message, and index value. It also checks for issues such as
UNHEALTHY
+ * replicas, under-replication, over-replication, OPEN_UNHEALTHY,
OUASI_CLOSED_STUCK, mismatched replication
+ * and duplicate open.
+ *
+ * @param containerID The ID of the container to display details for.
+ */
+
+ public void showContainerDetails(Long containerID) throws SQLException {
+
+ try (Connection connection = getConnection()) {
+ List<DatanodeContainerInfo> logEntries =
getContainerLogData(containerID, connection);
+
+ if (logEntries.isEmpty()) {
+ System.out.println("Missing container with ID: " + containerID);
+ return;
+ }
+
+ System.out.printf("%-25s | %-15s | %-35s | %-20s | %-10s | %-30s |
%-12s%n",
+ "Timestamp", "Container ID", "Datanode ID", "Container State",
"BCSID", "Message", "Index Value");
+
System.out.println("-----------------------------------------------------------------------------------"
+
+
"-------------------------------------------------------------------------------------------------");
+
+ for (DatanodeContainerInfo entry : logEntries) {
+ System.out.printf("%-25s | %-15d | %-35s | %-20s | %-10d | %-30s |
%-12d%n",
+ entry.getTimestamp(),
+ entry.getContainerId(),
+ entry.getDatanodeId(),
+ entry.getState(),
+ entry.getBcsid(),
+ entry.getErrorMessage(),
+ entry.getIndexValue());
+ }
+
+
logEntries.sort(Comparator.comparing(DatanodeContainerInfo::getTimestamp));
+
+ if (checkForMultipleOpenStates(logEntries)) {
+ System.out.println("Container " + containerID + " might have duplicate
OPEN state.");
+ return;
+ }
+
+ Map<String, DatanodeContainerInfo> latestPerDatanode = new HashMap<>();
+ for (DatanodeContainerInfo entry : logEntries) {
+ String datanodeId = entry.getDatanodeId();
+ DatanodeContainerInfo existing = latestPerDatanode.get(datanodeId);
+ if (existing == null ||
entry.getTimestamp().compareTo(existing.getTimestamp()) > 0) {
+ latestPerDatanode.put(datanodeId, entry);
+ }
+ }
+
+ analyzeContainerHealth(containerID, latestPerDatanode);
+
+ } catch (SQLException e) {
+ throw e;
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void analyzeContainerHealth(Long containerID,
+ Map<String, DatanodeContainerInfo>
latestPerDatanode) {
+
+ Set<String> unhealthyReplicas = new HashSet<>();
+ Set<String> closedReplicas = new HashSet<>();
+ Set<String> openReplicas = new HashSet<>();
+ Set<String> quasiclosedReplicas = new HashSet<>();
+ Set<String> deletedReplicas = new HashSet<>();
+ Set<Long> bcsids = new HashSet<>();
+ Set<String> datanodeIds = new HashSet<>();
+
+ for (DatanodeContainerInfo entry : latestPerDatanode.values()) {
+ String datanodeId = entry.getDatanodeId();
+ String state = entry.getState();
+ long bcsid = entry.getBcsid();
+
+ datanodeIds.add(datanodeId);
+
+ switch (state.toUpperCase()) {
+ case "UNHEALTHY": unhealthyReplicas.add(datanodeId); break;
+ case "CLOSED": closedReplicas.add(datanodeId); bcsids.add(bcsid); break;
+ case "OPEN": openReplicas.add(datanodeId); break;
+ case "QUASI_CLOSED": quasiclosedReplicas.add(datanodeId); break;
+ case "DELETED": deletedReplicas.add(datanodeId); bcsids.add(bcsid);
break;
+ default:
+ break;
+ }
+ }
+
+ int unhealthyCount = unhealthyReplicas.size();
+ int replicaCount = datanodeIds.size();
+ int openReplicasCount = openReplicas.size();
+ int closedReplicasCount = closedReplicas.size();
+
+ if (bcsids.size() > 1) {
+ System.out.println("Container " + containerID + " has MISMATCHED
REPLICATION.");
+ } else if (unhealthyCount == replicaCount && replicaCount >=
DEFAULT_REPLICATION_FACTOR) {
+ System.out.println("Container " + containerID + " is UNHEALTHY across
all datanodes.");
+ } else if (unhealthyCount >= 2 && closedReplicasCount == replicaCount -
unhealthyCount) {
+ System.out.println("Container " + containerID + " is both UNHEALTHY and
UNDER-REPLICATED.");
+ } else if (unhealthyCount == 1 && closedReplicasCount == replicaCount -
unhealthyCount) {
+ System.out.println("Container " + containerID + " is UNDER-REPLICATED.");
Review Comment:
If there is CLOSED replica but number of CLOSED replicas are less than
REPLICATION_FACTOR, we can just mark container as UNDER-REPLICATED.
If there are any UNHEALTHY container, but number of CLOSED replica is equal
to REPLICATION_FACTOR, and those CLOSED replica time is after UNHEALTHY
container time, we can assume container as normal.
##########
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/containerlog/parser/ContainerDatanodeDatabase.java:
##########
@@ -42,6 +47,7 @@
public class ContainerDatanodeDatabase {
private static Map<String, String> queries;
+ private static final int DEFAULT_REPLICATION_FACTOR = 3;
Review Comment:
Instead of hardcoding use conf to get replication factor
##########
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/containerlog/parser/ContainerDatanodeDatabase.java:
##########
@@ -233,5 +239,184 @@ private void dropTable(String tableName, Statement stmt)
throws SQLException {
stmt.executeUpdate(dropTableSQL);
}
+ /**
+ * Displays detailed information about a container based on its ID,
including its state, BCSID,
+ * timestamp, message, and index value. It also checks for issues such as
UNHEALTHY
+ * replicas, under-replication, over-replication, OPEN_UNHEALTHY,
OUASI_CLOSED_STUCK, mismatched replication
+ * and duplicate open.
+ *
+ * @param containerID The ID of the container to display details for.
+ */
+
+ public void showContainerDetails(Long containerID) throws SQLException {
+
+ try (Connection connection = getConnection()) {
+ List<DatanodeContainerInfo> logEntries =
getContainerLogData(containerID, connection);
+
+ if (logEntries.isEmpty()) {
+ System.out.println("Missing container with ID: " + containerID);
+ return;
+ }
+
+ System.out.printf("%-25s | %-15s | %-35s | %-20s | %-10s | %-30s |
%-12s%n",
+ "Timestamp", "Container ID", "Datanode ID", "Container State",
"BCSID", "Message", "Index Value");
+
System.out.println("-----------------------------------------------------------------------------------"
+
+
"-------------------------------------------------------------------------------------------------");
+
+ for (DatanodeContainerInfo entry : logEntries) {
+ System.out.printf("%-25s | %-15d | %-35s | %-20s | %-10d | %-30s |
%-12d%n",
+ entry.getTimestamp(),
+ entry.getContainerId(),
+ entry.getDatanodeId(),
+ entry.getState(),
+ entry.getBcsid(),
+ entry.getErrorMessage(),
+ entry.getIndexValue());
+ }
+
+
logEntries.sort(Comparator.comparing(DatanodeContainerInfo::getTimestamp));
+
+ if (checkForMultipleOpenStates(logEntries)) {
+ System.out.println("Container " + containerID + " might have duplicate
OPEN state.");
+ return;
+ }
+
+ Map<String, DatanodeContainerInfo> latestPerDatanode = new HashMap<>();
+ for (DatanodeContainerInfo entry : logEntries) {
+ String datanodeId = entry.getDatanodeId();
+ DatanodeContainerInfo existing = latestPerDatanode.get(datanodeId);
+ if (existing == null ||
entry.getTimestamp().compareTo(existing.getTimestamp()) > 0) {
+ latestPerDatanode.put(datanodeId, entry);
+ }
+ }
+
+ analyzeContainerHealth(containerID, latestPerDatanode);
+
+ } catch (SQLException e) {
+ throw e;
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void analyzeContainerHealth(Long containerID,
+ Map<String, DatanodeContainerInfo>
latestPerDatanode) {
+
+ Set<String> unhealthyReplicas = new HashSet<>();
+ Set<String> closedReplicas = new HashSet<>();
+ Set<String> openReplicas = new HashSet<>();
+ Set<String> quasiclosedReplicas = new HashSet<>();
+ Set<String> deletedReplicas = new HashSet<>();
+ Set<Long> bcsids = new HashSet<>();
+ Set<String> datanodeIds = new HashSet<>();
+
+ for (DatanodeContainerInfo entry : latestPerDatanode.values()) {
+ String datanodeId = entry.getDatanodeId();
+ String state = entry.getState();
+ long bcsid = entry.getBcsid();
+
+ datanodeIds.add(datanodeId);
+
+ switch (state.toUpperCase()) {
+ case "UNHEALTHY": unhealthyReplicas.add(datanodeId); break;
+ case "CLOSED": closedReplicas.add(datanodeId); bcsids.add(bcsid); break;
+ case "OPEN": openReplicas.add(datanodeId); break;
+ case "QUASI_CLOSED": quasiclosedReplicas.add(datanodeId); break;
+ case "DELETED": deletedReplicas.add(datanodeId); bcsids.add(bcsid);
break;
+ default:
+ break;
+ }
+ }
+
+ int unhealthyCount = unhealthyReplicas.size();
+ int replicaCount = datanodeIds.size();
+ int openReplicasCount = openReplicas.size();
+ int closedReplicasCount = closedReplicas.size();
+
+ if (bcsids.size() > 1) {
+ System.out.println("Container " + containerID + " has MISMATCHED
REPLICATION.");
Review Comment:
Give more details in message, as There are multiple CLOSED containers with
different BCSID.
##########
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/containerlog/parser/ContainerDatanodeDatabase.java:
##########
@@ -233,5 +239,184 @@ private void dropTable(String tableName, Statement stmt)
throws SQLException {
stmt.executeUpdate(dropTableSQL);
}
+ /**
+ * Displays detailed information about a container based on its ID,
including its state, BCSID,
+ * timestamp, message, and index value. It also checks for issues such as
UNHEALTHY
+ * replicas, under-replication, over-replication, OPEN_UNHEALTHY,
OUASI_CLOSED_STUCK, mismatched replication
+ * and duplicate open.
+ *
+ * @param containerID The ID of the container to display details for.
+ */
+
+ public void showContainerDetails(Long containerID) throws SQLException {
+
+ try (Connection connection = getConnection()) {
+ List<DatanodeContainerInfo> logEntries =
getContainerLogData(containerID, connection);
+
+ if (logEntries.isEmpty()) {
+ System.out.println("Missing container with ID: " + containerID);
+ return;
+ }
+
+ System.out.printf("%-25s | %-15s | %-35s | %-20s | %-10s | %-30s |
%-12s%n",
+ "Timestamp", "Container ID", "Datanode ID", "Container State",
"BCSID", "Message", "Index Value");
+
System.out.println("-----------------------------------------------------------------------------------"
+
+
"-------------------------------------------------------------------------------------------------");
+
+ for (DatanodeContainerInfo entry : logEntries) {
+ System.out.printf("%-25s | %-15d | %-35s | %-20s | %-10d | %-30s |
%-12d%n",
+ entry.getTimestamp(),
+ entry.getContainerId(),
+ entry.getDatanodeId(),
+ entry.getState(),
+ entry.getBcsid(),
+ entry.getErrorMessage(),
+ entry.getIndexValue());
+ }
+
+
logEntries.sort(Comparator.comparing(DatanodeContainerInfo::getTimestamp));
+
+ if (checkForMultipleOpenStates(logEntries)) {
+ System.out.println("Container " + containerID + " might have duplicate
OPEN state.");
+ return;
+ }
+
+ Map<String, DatanodeContainerInfo> latestPerDatanode = new HashMap<>();
+ for (DatanodeContainerInfo entry : logEntries) {
+ String datanodeId = entry.getDatanodeId();
+ DatanodeContainerInfo existing = latestPerDatanode.get(datanodeId);
+ if (existing == null ||
entry.getTimestamp().compareTo(existing.getTimestamp()) > 0) {
+ latestPerDatanode.put(datanodeId, entry);
+ }
+ }
+
+ analyzeContainerHealth(containerID, latestPerDatanode);
+
+ } catch (SQLException e) {
+ throw e;
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void analyzeContainerHealth(Long containerID,
+ Map<String, DatanodeContainerInfo>
latestPerDatanode) {
+
+ Set<String> unhealthyReplicas = new HashSet<>();
+ Set<String> closedReplicas = new HashSet<>();
+ Set<String> openReplicas = new HashSet<>();
+ Set<String> quasiclosedReplicas = new HashSet<>();
+ Set<String> deletedReplicas = new HashSet<>();
+ Set<Long> bcsids = new HashSet<>();
+ Set<String> datanodeIds = new HashSet<>();
+
+ for (DatanodeContainerInfo entry : latestPerDatanode.values()) {
+ String datanodeId = entry.getDatanodeId();
+ String state = entry.getState();
+ long bcsid = entry.getBcsid();
+
+ datanodeIds.add(datanodeId);
+
+ switch (state.toUpperCase()) {
+ case "UNHEALTHY": unhealthyReplicas.add(datanodeId); break;
Review Comment:
Use container health/state enums instead of hardcoding.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]