errose28 commented on code in PR #8154: URL: https://github.com/apache/ozone/pull/8154#discussion_r2042979337
########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRatisLogRepair.java: ########## @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.EchoRPC; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; +import org.apache.hadoop.ozone.repair.RepairTool; +import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.server.metrics.SegmentedRaftLogMetrics; +import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; +import org.apache.ratis.server.raftlog.segmented.LogSegmentStartEnd; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogInputStream; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogOutputStream; +import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.SizeInBytes; +import picocli.CommandLine; + + +/** + * Tool to omit a raft log in a ratis segment file. + */ [email protected]( + name = "skip-ratis-transaction", + aliases = "srt", + description = "CLI to omit a raft log in a ratis segment file. The raft log at the index specified " + + "is replaced with an EchoOM command (which is a dummy command). It is an offline command " + + "i.e., doesn't require OM to be running. " + + "The command should be run for the same transaction on all 3 OMs only when they all OMs are crashing " + + "while applying the same transaction. If there is only one OM that is crashing and " + + "other OMs have executed the log successfully, then the DB should manually copied from one of the good OMs " + + "to the crashing OM instead.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class +) +public class OMRatisLogRepair extends RepairTool { + + @CommandLine.Option(names = {"-s", "--segment-path", "--segmentPath"}, + description = "Path of the input segment file") + private File segmentFile; + + @CommandLine.Option(names = {"-d", "--ratis-log-dir", "--ratisLogDir"}, + description = "Path of the ratis log directory") + private File logDir; + + @CommandLine.Option(names = {"-b", "--backup"}, + required = true, + description = "Path to put the backup of the original repaired segment file") + private File backupPath; + + @CommandLine.Option(names = {"--index"}, + required = true, + description = "Index of the failing transaction that should be removed") + private long index; + + @Override + public void execute() throws Exception { + + if (segmentFile == null && logDir == null) { + throw new IllegalArgumentException("Path to either a segment-file or ratis-log-dir must be provided."); + } + if (segmentFile == null) { + segmentFile = findSegmentFileContainingIndex(); + } + + if (segmentFile.toPath().equals(backupPath.toPath())) { + throw new IOException("Backup path cannot be same as segment file path."); + } + + LogSegmentPath pi = LogSegmentPath.matchLogSegment(this.segmentFile.toPath()); + if (pi == null) { + throw new IOException("Invalid Segment File"); + } + + if (!segmentFile.exists()) { + throw new IOException("Error: Source segment file \"" + segmentFile + "\" does not exist."); + } + if (backupPath.exists()) { + throw new IOException("Error: Backup file for segment file \"" + backupPath + "\" already exists."); + } + try { + info("Taking back up of Raft Log file: " + this.segmentFile.getAbsolutePath() + " to location: " + backupPath); + if (!isDryRun()) { + Files.copy(segmentFile.toPath(), backupPath.toPath()); + } + info("File backed-up successfully!"); + } catch (IOException ex) { + throw new IOException("Error: Failed to take backup of the file. Exception: " + ex, ex); + } + + String tempOutput = segmentFile.getAbsolutePath() + ".skr.output"; + File outputFile = createOutputFile(tempOutput); + + info("Processing Raft Log file: " + this.segmentFile.getAbsolutePath() + " size:" + this.segmentFile.length()); + SegmentedRaftLogOutputStream outputStream = null; + SegmentedRaftLogInputStream logInputStream = null; + + try { + logInputStream = getInputStream(pi); + if (!isDryRun()) { + outputStream = new SegmentedRaftLogOutputStream(outputFile, false, + 1024, 1024, ByteBuffer.allocateDirect(SizeInBytes.valueOf("8MB").getSizeInt())); + } + + RaftProtos.LogEntryProto next; + for (RaftProtos.LogEntryProto prev = null; (next = logInputStream.nextEntry()) != null; prev = next) { + if (prev != null) { + Preconditions.assertTrue(next.getIndex() == prev.getIndex() + 1L, + "gap between entry %s and entry %s", prev, next); + } + + if (next.getIndex() != index && !isDryRun()) { + // all other logs will be written as it is + outputStream.write(next); + outputStream.flush(); + info("Copied raft log for index (" + next.getIndex() + ")."); + } else { + // replace the transaction with a dummy OmEcho operation + OzoneManagerProtocolProtos.OMRequest oldRequest = OMRatisHelper + .convertByteStringToOMRequest(next.getStateMachineLogEntry().getLogData()); + OzoneManagerProtocolProtos.OMRequest.Builder newRequest = OzoneManagerProtocolProtos.OMRequest.newBuilder() + .setCmdType(EchoRPC) + .setClientId(oldRequest.getClientId()) + .setEchoRPCRequest(OzoneManagerProtocolProtos.EchoRPCRequest.newBuilder().build()); + + if (oldRequest.hasUserInfo()) { + newRequest.setUserInfo(oldRequest.getUserInfo()); + } + if (oldRequest.hasTraceID()) { + newRequest.setTraceID(oldRequest.getTraceID()); + } + if (oldRequest.hasLayoutVersion()) { + newRequest.setLayoutVersion(oldRequest.getLayoutVersion()); + } + if (oldRequest.hasVersion()) { + newRequest.setVersion(oldRequest.getVersion()); + } Review Comment: Do we need to set these? ########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRatisLogRepair.java: ########## @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.EchoRPC; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; +import org.apache.hadoop.ozone.repair.RepairTool; +import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.server.metrics.SegmentedRaftLogMetrics; +import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; +import org.apache.ratis.server.raftlog.segmented.LogSegmentStartEnd; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogInputStream; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogOutputStream; +import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.SizeInBytes; +import picocli.CommandLine; + + +/** + * Tool to omit a raft log in a ratis segment file. + */ [email protected]( + name = "skip-ratis-transaction", + aliases = "srt", + description = "CLI to omit a raft log in a ratis segment file. The raft log at the index specified " + + "is replaced with an EchoOM command (which is a dummy command). It is an offline command " + + "i.e., doesn't require OM to be running. " + + "The command should be run for the same transaction on all 3 OMs only when they all OMs are crashing " + + "while applying the same transaction. If there is only one OM that is crashing and " + + "other OMs have executed the log successfully, then the DB should manually copied from one of the good OMs " + + "to the crashing OM instead.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class +) +public class OMRatisLogRepair extends RepairTool { + + @CommandLine.Option(names = {"-s", "--segment-path", "--segmentPath"}, + description = "Path of the input segment file") + private File segmentFile; + + @CommandLine.Option(names = {"-d", "--ratis-log-dir", "--ratisLogDir"}, + description = "Path of the ratis log directory") + private File logDir; + + @CommandLine.Option(names = {"-b", "--backup"}, + required = true, + description = "Path to put the backup of the original repaired segment file") Review Comment: The implementation is currently taking a file name but I think it's easier to pass a directory here and let the command copy the file with the same name. We should still check that the final destination file doesn't exist before proceeding, like the command currently does. ########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRatisLogRepair.java: ########## @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.EchoRPC; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; +import org.apache.hadoop.ozone.repair.RepairTool; +import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.server.metrics.SegmentedRaftLogMetrics; +import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; +import org.apache.ratis.server.raftlog.segmented.LogSegmentStartEnd; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogInputStream; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogOutputStream; +import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.SizeInBytes; +import picocli.CommandLine; + + +/** + * Tool to omit a raft log in a ratis segment file. + */ [email protected]( + name = "skip-ratis-transaction", + aliases = "srt", + description = "CLI to omit a raft log in a ratis segment file. The raft log at the index specified " + + "is replaced with an EchoOM command (which is a dummy command). It is an offline command " + + "i.e., doesn't require OM to be running. " + + "The command should be run for the same transaction on all 3 OMs only when they all OMs are crashing " + + "while applying the same transaction. If there is only one OM that is crashing and " + + "other OMs have executed the log successfully, then the DB should manually copied from one of the good OMs " + + "to the crashing OM instead.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class +) +public class OMRatisLogRepair extends RepairTool { + + @CommandLine.Option(names = {"-s", "--segment-path", "--segmentPath"}, + description = "Path of the input segment file") + private File segmentFile; + + @CommandLine.Option(names = {"-d", "--ratis-log-dir", "--ratisLogDir"}, + description = "Path of the ratis log directory") + private File logDir; + + @CommandLine.Option(names = {"-b", "--backup"}, + required = true, + description = "Path to put the backup of the original repaired segment file") + private File backupPath; + + @CommandLine.Option(names = {"--index"}, + required = true, + description = "Index of the failing transaction that should be removed") + private long index; + + @Override + public void execute() throws Exception { + + if (segmentFile == null && logDir == null) { Review Comment: We should create an [argument group](https://picocli.info/#_mutually_exclusive_options) for these to make them mutually exclusive. ########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRatisLogRepair.java: ########## @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.EchoRPC; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; +import org.apache.hadoop.ozone.repair.RepairTool; +import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.server.metrics.SegmentedRaftLogMetrics; +import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; +import org.apache.ratis.server.raftlog.segmented.LogSegmentStartEnd; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogInputStream; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogOutputStream; +import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.SizeInBytes; +import picocli.CommandLine; + + +/** + * Tool to omit a raft log in a ratis segment file. + */ [email protected]( + name = "skip-ratis-transaction", + aliases = "srt", + description = "CLI to omit a raft log in a ratis segment file. The raft log at the index specified " + + "is replaced with an EchoOM command (which is a dummy command). It is an offline command " + + "i.e., doesn't require OM to be running. " + + "The command should be run for the same transaction on all 3 OMs only when they all OMs are crashing " + + "while applying the same transaction. If there is only one OM that is crashing and " + + "other OMs have executed the log successfully, then the DB should manually copied from one of the good OMs " + + "to the crashing OM instead.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class +) +public class OMRatisLogRepair extends RepairTool { + + @CommandLine.Option(names = {"-s", "--segment-path", "--segmentPath"}, Review Comment: I don't think we need camel case for new command line flags. Usually we use this pattern only for backwards compat with commands that were camel case and we want kebab case. ########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRatisLogRepair.java: ########## @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.EchoRPC; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; +import org.apache.hadoop.ozone.repair.RepairTool; +import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.server.metrics.SegmentedRaftLogMetrics; +import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; +import org.apache.ratis.server.raftlog.segmented.LogSegmentStartEnd; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogInputStream; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogOutputStream; +import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.SizeInBytes; +import picocli.CommandLine; + + +/** + * Tool to omit a raft log in a ratis segment file. + */ [email protected]( + name = "skip-ratis-transaction", + aliases = "srt", + description = "CLI to omit a raft log in a ratis segment file. The raft log at the index specified " + + "is replaced with an EchoOM command (which is a dummy command). It is an offline command " + + "i.e., doesn't require OM to be running. " + + "The command should be run for the same transaction on all 3 OMs only when they all OMs are crashing " + + "while applying the same transaction. If there is only one OM that is crashing and " + + "other OMs have executed the log successfully, then the DB should manually copied from one of the good OMs " + + "to the crashing OM instead.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class +) +public class OMRatisLogRepair extends RepairTool { + + @CommandLine.Option(names = {"-s", "--segment-path", "--segmentPath"}, + description = "Path of the input segment file") + private File segmentFile; + + @CommandLine.Option(names = {"-d", "--ratis-log-dir", "--ratisLogDir"}, + description = "Path of the ratis log directory") + private File logDir; + + @CommandLine.Option(names = {"-b", "--backup"}, + required = true, + description = "Path to put the backup of the original repaired segment file") Review Comment: ```suggestion description = "Directory to put the backup of the original segment file before the repair") ``` ########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRatisLogRepair.java: ########## @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.EchoRPC; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; +import org.apache.hadoop.ozone.repair.RepairTool; +import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.server.metrics.SegmentedRaftLogMetrics; +import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; +import org.apache.ratis.server.raftlog.segmented.LogSegmentStartEnd; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogInputStream; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogOutputStream; +import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.SizeInBytes; +import picocli.CommandLine; + + +/** + * Tool to omit a raft log in a ratis segment file. + */ [email protected]( + name = "skip-ratis-transaction", + aliases = "srt", + description = "CLI to omit a raft log in a ratis segment file. The raft log at the index specified " + + "is replaced with an EchoOM command (which is a dummy command). It is an offline command " + + "i.e., doesn't require OM to be running. " + + "The command should be run for the same transaction on all 3 OMs only when they all OMs are crashing " + + "while applying the same transaction. If there is only one OM that is crashing and " + + "other OMs have executed the log successfully, then the DB should manually copied from one of the good OMs " + + "to the crashing OM instead.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class +) +public class OMRatisLogRepair extends RepairTool { + + @CommandLine.Option(names = {"-s", "--segment-path", "--segmentPath"}, + description = "Path of the input segment file") + private File segmentFile; + + @CommandLine.Option(names = {"-d", "--ratis-log-dir", "--ratisLogDir"}, + description = "Path of the ratis log directory") + private File logDir; + + @CommandLine.Option(names = {"-b", "--backup"}, + required = true, + description = "Path to put the backup of the original repaired segment file") + private File backupPath; + + @CommandLine.Option(names = {"--index"}, + required = true, + description = "Index of the failing transaction that should be removed") + private long index; + + @Override + public void execute() throws Exception { + + if (segmentFile == null && logDir == null) { + throw new IllegalArgumentException("Path to either a segment-file or ratis-log-dir must be provided."); + } + if (segmentFile == null) { + segmentFile = findSegmentFileContainingIndex(); + } + + if (segmentFile.toPath().equals(backupPath.toPath())) { + throw new IOException("Backup path cannot be same as segment file path."); + } + + LogSegmentPath pi = LogSegmentPath.matchLogSegment(this.segmentFile.toPath()); + if (pi == null) { + throw new IOException("Invalid Segment File"); + } + + if (!segmentFile.exists()) { + throw new IOException("Error: Source segment file \"" + segmentFile + "\" does not exist."); + } + if (backupPath.exists()) { + throw new IOException("Error: Backup file for segment file \"" + backupPath + "\" already exists."); + } + try { + info("Taking back up of Raft Log file: " + this.segmentFile.getAbsolutePath() + " to location: " + backupPath); + if (!isDryRun()) { + Files.copy(segmentFile.toPath(), backupPath.toPath()); + } + info("File backed-up successfully!"); + } catch (IOException ex) { + throw new IOException("Error: Failed to take backup of the file. Exception: " + ex, ex); + } + + String tempOutput = segmentFile.getAbsolutePath() + ".skr.output"; + File outputFile = createOutputFile(tempOutput); + + info("Processing Raft Log file: " + this.segmentFile.getAbsolutePath() + " size:" + this.segmentFile.length()); + SegmentedRaftLogOutputStream outputStream = null; + SegmentedRaftLogInputStream logInputStream = null; + + try { + logInputStream = getInputStream(pi); + if (!isDryRun()) { + outputStream = new SegmentedRaftLogOutputStream(outputFile, false, + 1024, 1024, ByteBuffer.allocateDirect(SizeInBytes.valueOf("8MB").getSizeInt())); + } + + RaftProtos.LogEntryProto next; + for (RaftProtos.LogEntryProto prev = null; (next = logInputStream.nextEntry()) != null; prev = next) { + if (prev != null) { + Preconditions.assertTrue(next.getIndex() == prev.getIndex() + 1L, + "gap between entry %s and entry %s", prev, next); + } + + if (next.getIndex() != index && !isDryRun()) { + // all other logs will be written as it is + outputStream.write(next); + outputStream.flush(); + info("Copied raft log for index (" + next.getIndex() + ")."); + } else { + // replace the transaction with a dummy OmEcho operation + OzoneManagerProtocolProtos.OMRequest oldRequest = OMRatisHelper + .convertByteStringToOMRequest(next.getStateMachineLogEntry().getLogData()); + OzoneManagerProtocolProtos.OMRequest.Builder newRequest = OzoneManagerProtocolProtos.OMRequest.newBuilder() + .setCmdType(EchoRPC) + .setClientId(oldRequest.getClientId()) + .setEchoRPCRequest(OzoneManagerProtocolProtos.EchoRPCRequest.newBuilder().build()); + + if (oldRequest.hasUserInfo()) { + newRequest.setUserInfo(oldRequest.getUserInfo()); + } + if (oldRequest.hasTraceID()) { + newRequest.setTraceID(oldRequest.getTraceID()); + } + if (oldRequest.hasLayoutVersion()) { + newRequest.setLayoutVersion(oldRequest.getLayoutVersion()); + } + if (oldRequest.hasVersion()) { + newRequest.setVersion(oldRequest.getVersion()); + } + + RaftProtos.StateMachineLogEntryProto oldEntry = next.getStateMachineLogEntry(); + RaftProtos.StateMachineLogEntryProto.Builder newEntry = + RaftProtos.StateMachineLogEntryProto.newBuilder() + .setCallId(oldEntry.getCallId()) + .setClientId(oldEntry.getClientId()) + .setType(oldEntry.getType()) + .setLogData(OMRatisHelper.convertRequestToByteString(newRequest.build())); + if (oldEntry.hasStateMachineEntry()) { + newEntry.setStateMachineEntry(oldEntry.getStateMachineEntry()); + } + + RaftProtos.LogEntryProto newLogEntry = RaftProtos.LogEntryProto.newBuilder() + .setTerm(next.getTerm()) + .setIndex(next.getIndex()) + .setStateMachineLogEntry(newEntry) + .build(); + + if (!isDryRun()) { + outputStream.write(newLogEntry); + outputStream.flush(); + } + info("Replaced {" + oldRequest + "} with EchoRPC command at index " + + next.getIndex()); + } + } + + if (!isDryRun()) { + outputStream.flush(); + outputStream.close(); + Files.move(outputFile.toPath(), segmentFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + info("Moved temporary output file to correct raft log location : " + segmentFile.toPath()); + } + + } catch (Exception ex) { + error("Exception: " + ex); + } finally { + if (logInputStream != null) { + logInputStream.close(); + } + if (outputStream != null) { + outputStream.flush(); + outputStream.close(); + } + if (isDryRun()) { + boolean success = outputFile.delete(); + if (!success) { + error("Error: Could not delete temporary output file \"" + outputFile + "\"."); + } + } + } + } + + private File createOutputFile(String name) throws IOException { + File temp = new File(name); + try { + if (temp.exists()) { + error("Warning: Temporary output file already exists - " + temp.getAbsolutePath() + + ". Trying to delete it and create a new one."); + boolean success = temp.delete(); + if (!success) { + throw new IOException("Unable to delete old temporary file."); + } + } + boolean success = temp.createNewFile(); + if (success) { + info("Temporary output file created successfully: " + temp.getAbsolutePath()); + } else { + throw new IOException("createNewFile() failed."); + } + } catch (Exception e) { + throw new IOException("Error: Failed to create temporary output file - " + temp.getAbsolutePath(), e); + } + return temp; + } + + private SegmentedRaftLogInputStream getInputStream(LogSegmentPath pi) { + try { + Class<?> logInputStreamClass = + Class.forName("org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogInputStream"); + Constructor<?> constructor = logInputStreamClass.getDeclaredConstructor(File.class, LogSegmentStartEnd.class, + SizeInBytes.class, SegmentedRaftLogMetrics.class); + constructor.setAccessible(true); + SegmentedRaftLogInputStream inputStream = + (SegmentedRaftLogInputStream) constructor.newInstance(segmentFile, pi.getStartEnd(), + SizeInBytes.valueOf("32MB"), null); + if (inputStream == null) { + throw new RuntimeException("logInputStream is null. Constructor might have failed."); + } + return inputStream; + + } catch (ClassNotFoundException | NoSuchMethodException | SecurityException | + InvocationTargetException | InstantiationException | IllegalAccessException ex) { + error("Exception while trying to get input stream for segment file : " + ex); + throw new RuntimeException(ex); Review Comment: I think we only want to throw the exception, otherwise the error gets printed twice. ########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRatisLogRepair.java: ########## @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.EchoRPC; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; +import org.apache.hadoop.ozone.repair.RepairTool; +import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.server.metrics.SegmentedRaftLogMetrics; +import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; +import org.apache.ratis.server.raftlog.segmented.LogSegmentStartEnd; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogInputStream; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogOutputStream; +import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.SizeInBytes; +import picocli.CommandLine; + + +/** + * Tool to omit a raft log in a ratis segment file. + */ [email protected]( + name = "skip-ratis-transaction", + aliases = "srt", + description = "CLI to omit a raft log in a ratis segment file. The raft log at the index specified " + + "is replaced with an EchoOM command (which is a dummy command). It is an offline command " + + "i.e., doesn't require OM to be running. " + + "The command should be run for the same transaction on all 3 OMs only when they all OMs are crashing " + + "while applying the same transaction. If there is only one OM that is crashing and " + + "other OMs have executed the log successfully, then the DB should manually copied from one of the good OMs " + + "to the crashing OM instead.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class +) +public class OMRatisLogRepair extends RepairTool { + + @CommandLine.Option(names = {"-s", "--segment-path", "--segmentPath"}, + description = "Path of the input segment file") + private File segmentFile; + + @CommandLine.Option(names = {"-d", "--ratis-log-dir", "--ratisLogDir"}, + description = "Path of the ratis log directory") + private File logDir; + + @CommandLine.Option(names = {"-b", "--backup"}, + required = true, + description = "Path to put the backup of the original repaired segment file") + private File backupPath; + + @CommandLine.Option(names = {"--index"}, + required = true, + description = "Index of the failing transaction that should be removed") + private long index; + + @Override + public void execute() throws Exception { + + if (segmentFile == null && logDir == null) { + throw new IllegalArgumentException("Path to either a segment-file or ratis-log-dir must be provided."); + } + if (segmentFile == null) { + segmentFile = findSegmentFileContainingIndex(); + } + + if (segmentFile.toPath().equals(backupPath.toPath())) { + throw new IOException("Backup path cannot be same as segment file path."); + } + + LogSegmentPath pi = LogSegmentPath.matchLogSegment(this.segmentFile.toPath()); + if (pi == null) { + throw new IOException("Invalid Segment File"); + } + + if (!segmentFile.exists()) { + throw new IOException("Error: Source segment file \"" + segmentFile + "\" does not exist."); + } + if (backupPath.exists()) { + throw new IOException("Error: Backup file for segment file \"" + backupPath + "\" already exists."); + } + try { + info("Taking back up of Raft Log file: " + this.segmentFile.getAbsolutePath() + " to location: " + backupPath); + if (!isDryRun()) { + Files.copy(segmentFile.toPath(), backupPath.toPath()); + } + info("File backed-up successfully!"); + } catch (IOException ex) { + throw new IOException("Error: Failed to take backup of the file. Exception: " + ex, ex); + } + + String tempOutput = segmentFile.getAbsolutePath() + ".skr.output"; + File outputFile = createOutputFile(tempOutput); + + info("Processing Raft Log file: " + this.segmentFile.getAbsolutePath() + " size:" + this.segmentFile.length()); + SegmentedRaftLogOutputStream outputStream = null; + SegmentedRaftLogInputStream logInputStream = null; + + try { + logInputStream = getInputStream(pi); + if (!isDryRun()) { + outputStream = new SegmentedRaftLogOutputStream(outputFile, false, + 1024, 1024, ByteBuffer.allocateDirect(SizeInBytes.valueOf("8MB").getSizeInt())); + } + + RaftProtos.LogEntryProto next; + for (RaftProtos.LogEntryProto prev = null; (next = logInputStream.nextEntry()) != null; prev = next) { + if (prev != null) { + Preconditions.assertTrue(next.getIndex() == prev.getIndex() + 1L, + "gap between entry %s and entry %s", prev, next); + } + + if (next.getIndex() != index && !isDryRun()) { + // all other logs will be written as it is + outputStream.write(next); + outputStream.flush(); + info("Copied raft log for index (" + next.getIndex() + ")."); + } else { + // replace the transaction with a dummy OmEcho operation + OzoneManagerProtocolProtos.OMRequest oldRequest = OMRatisHelper + .convertByteStringToOMRequest(next.getStateMachineLogEntry().getLogData()); + OzoneManagerProtocolProtos.OMRequest.Builder newRequest = OzoneManagerProtocolProtos.OMRequest.newBuilder() + .setCmdType(EchoRPC) + .setClientId(oldRequest.getClientId()) + .setEchoRPCRequest(OzoneManagerProtocolProtos.EchoRPCRequest.newBuilder().build()); + + if (oldRequest.hasUserInfo()) { + newRequest.setUserInfo(oldRequest.getUserInfo()); + } + if (oldRequest.hasTraceID()) { + newRequest.setTraceID(oldRequest.getTraceID()); + } + if (oldRequest.hasLayoutVersion()) { + newRequest.setLayoutVersion(oldRequest.getLayoutVersion()); + } + if (oldRequest.hasVersion()) { + newRequest.setVersion(oldRequest.getVersion()); + } + + RaftProtos.StateMachineLogEntryProto oldEntry = next.getStateMachineLogEntry(); + RaftProtos.StateMachineLogEntryProto.Builder newEntry = + RaftProtos.StateMachineLogEntryProto.newBuilder() + .setCallId(oldEntry.getCallId()) + .setClientId(oldEntry.getClientId()) + .setType(oldEntry.getType()) + .setLogData(OMRatisHelper.convertRequestToByteString(newRequest.build())); + if (oldEntry.hasStateMachineEntry()) { + newEntry.setStateMachineEntry(oldEntry.getStateMachineEntry()); + } + + RaftProtos.LogEntryProto newLogEntry = RaftProtos.LogEntryProto.newBuilder() + .setTerm(next.getTerm()) + .setIndex(next.getIndex()) + .setStateMachineLogEntry(newEntry) + .build(); + + if (!isDryRun()) { + outputStream.write(newLogEntry); + outputStream.flush(); + } + info("Replaced {" + oldRequest + "} with EchoRPC command at index " + + next.getIndex()); + } + } + + if (!isDryRun()) { + outputStream.flush(); + outputStream.close(); + Files.move(outputFile.toPath(), segmentFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + info("Moved temporary output file to correct raft log location : " + segmentFile.toPath()); + } + + } catch (Exception ex) { + error("Exception: " + ex); + } finally { + if (logInputStream != null) { + logInputStream.close(); + } + if (outputStream != null) { + outputStream.flush(); + outputStream.close(); + } + if (isDryRun()) { + boolean success = outputFile.delete(); + if (!success) { + error("Error: Could not delete temporary output file \"" + outputFile + "\"."); + } + } + } + } + + private File createOutputFile(String name) throws IOException { + File temp = new File(name); + try { + if (temp.exists()) { + error("Warning: Temporary output file already exists - " + temp.getAbsolutePath() + + ". Trying to delete it and create a new one."); + boolean success = temp.delete(); + if (!success) { + throw new IOException("Unable to delete old temporary file."); + } + } + boolean success = temp.createNewFile(); + if (success) { + info("Temporary output file created successfully: " + temp.getAbsolutePath()); + } else { + throw new IOException("createNewFile() failed."); + } + } catch (Exception e) { + throw new IOException("Error: Failed to create temporary output file - " + temp.getAbsolutePath(), e); + } + return temp; + } + + private SegmentedRaftLogInputStream getInputStream(LogSegmentPath pi) { + try { + Class<?> logInputStreamClass = + Class.forName("org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogInputStream"); + Constructor<?> constructor = logInputStreamClass.getDeclaredConstructor(File.class, LogSegmentStartEnd.class, + SizeInBytes.class, SegmentedRaftLogMetrics.class); + constructor.setAccessible(true); + SegmentedRaftLogInputStream inputStream = + (SegmentedRaftLogInputStream) constructor.newInstance(segmentFile, pi.getStartEnd(), + SizeInBytes.valueOf("32MB"), null); + if (inputStream == null) { + throw new RuntimeException("logInputStream is null. Constructor might have failed."); + } + return inputStream; + + } catch (ClassNotFoundException | NoSuchMethodException | SecurityException | + InvocationTargetException | InstantiationException | IllegalAccessException ex) { + error("Exception while trying to get input stream for segment file : " + ex); + throw new RuntimeException(ex); + } + } + + private File findSegmentFileContainingIndex() { Review Comment: This method is fine for now, but let's file a Ratis Jira to see if we can make the getters for log start and end index in `LogSegmentStartEnd` public, so we could use `LogSegmentPath#getStartEnd` instead. ########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRatisLogRepair.java: ########## @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.EchoRPC; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; +import org.apache.hadoop.ozone.repair.RepairTool; +import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.server.metrics.SegmentedRaftLogMetrics; +import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; +import org.apache.ratis.server.raftlog.segmented.LogSegmentStartEnd; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogInputStream; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogOutputStream; +import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.SizeInBytes; +import picocli.CommandLine; + + +/** + * Tool to omit a raft log in a ratis segment file. + */ [email protected]( + name = "skip-ratis-transaction", + aliases = "srt", + description = "CLI to omit a raft log in a ratis segment file. The raft log at the index specified " + + "is replaced with an EchoOM command (which is a dummy command). It is an offline command " + + "i.e., doesn't require OM to be running. " + + "The command should be run for the same transaction on all 3 OMs only when they all OMs are crashing " + + "while applying the same transaction. If there is only one OM that is crashing and " + + "other OMs have executed the log successfully, then the DB should manually copied from one of the good OMs " + + "to the crashing OM instead.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class +) +public class OMRatisLogRepair extends RepairTool { + + @CommandLine.Option(names = {"-s", "--segment-path", "--segmentPath"}, + description = "Path of the input segment file") + private File segmentFile; + + @CommandLine.Option(names = {"-d", "--ratis-log-dir", "--ratisLogDir"}, + description = "Path of the ratis log directory") + private File logDir; + + @CommandLine.Option(names = {"-b", "--backup"}, + required = true, + description = "Path to put the backup of the original repaired segment file") + private File backupPath; + + @CommandLine.Option(names = {"--index"}, + required = true, + description = "Index of the failing transaction that should be removed") + private long index; + + @Override + public void execute() throws Exception { + + if (segmentFile == null && logDir == null) { + throw new IllegalArgumentException("Path to either a segment-file or ratis-log-dir must be provided."); + } + if (segmentFile == null) { + segmentFile = findSegmentFileContainingIndex(); + } + + if (segmentFile.toPath().equals(backupPath.toPath())) { + throw new IOException("Backup path cannot be same as segment file path."); + } + + LogSegmentPath pi = LogSegmentPath.matchLogSegment(this.segmentFile.toPath()); + if (pi == null) { + throw new IOException("Invalid Segment File"); + } + + if (!segmentFile.exists()) { + throw new IOException("Error: Source segment file \"" + segmentFile + "\" does not exist."); + } + if (backupPath.exists()) { + throw new IOException("Error: Backup file for segment file \"" + backupPath + "\" already exists."); + } + try { + info("Taking back up of Raft Log file: " + this.segmentFile.getAbsolutePath() + " to location: " + backupPath); + if (!isDryRun()) { + Files.copy(segmentFile.toPath(), backupPath.toPath()); + } + info("File backed-up successfully!"); + } catch (IOException ex) { + throw new IOException("Error: Failed to take backup of the file. Exception: " + ex, ex); + } + + String tempOutput = segmentFile.getAbsolutePath() + ".skr.output"; + File outputFile = createOutputFile(tempOutput); + + info("Processing Raft Log file: " + this.segmentFile.getAbsolutePath() + " size:" + this.segmentFile.length()); + SegmentedRaftLogOutputStream outputStream = null; + SegmentedRaftLogInputStream logInputStream = null; + + try { + logInputStream = getInputStream(pi); + if (!isDryRun()) { + outputStream = new SegmentedRaftLogOutputStream(outputFile, false, + 1024, 1024, ByteBuffer.allocateDirect(SizeInBytes.valueOf("8MB").getSizeInt())); + } + + RaftProtos.LogEntryProto next; + for (RaftProtos.LogEntryProto prev = null; (next = logInputStream.nextEntry()) != null; prev = next) { + if (prev != null) { + Preconditions.assertTrue(next.getIndex() == prev.getIndex() + 1L, + "gap between entry %s and entry %s", prev, next); + } + + if (next.getIndex() != index && !isDryRun()) { + // all other logs will be written as it is + outputStream.write(next); + outputStream.flush(); + info("Copied raft log for index (" + next.getIndex() + ")."); + } else { + // replace the transaction with a dummy OmEcho operation + OzoneManagerProtocolProtos.OMRequest oldRequest = OMRatisHelper + .convertByteStringToOMRequest(next.getStateMachineLogEntry().getLogData()); + OzoneManagerProtocolProtos.OMRequest.Builder newRequest = OzoneManagerProtocolProtos.OMRequest.newBuilder() + .setCmdType(EchoRPC) + .setClientId(oldRequest.getClientId()) + .setEchoRPCRequest(OzoneManagerProtocolProtos.EchoRPCRequest.newBuilder().build()); + + if (oldRequest.hasUserInfo()) { + newRequest.setUserInfo(oldRequest.getUserInfo()); + } + if (oldRequest.hasTraceID()) { + newRequest.setTraceID(oldRequest.getTraceID()); + } + if (oldRequest.hasLayoutVersion()) { + newRequest.setLayoutVersion(oldRequest.getLayoutVersion()); + } + if (oldRequest.hasVersion()) { + newRequest.setVersion(oldRequest.getVersion()); + } + + RaftProtos.StateMachineLogEntryProto oldEntry = next.getStateMachineLogEntry(); + RaftProtos.StateMachineLogEntryProto.Builder newEntry = + RaftProtos.StateMachineLogEntryProto.newBuilder() + .setCallId(oldEntry.getCallId()) + .setClientId(oldEntry.getClientId()) + .setType(oldEntry.getType()) + .setLogData(OMRatisHelper.convertRequestToByteString(newRequest.build())); + if (oldEntry.hasStateMachineEntry()) { + newEntry.setStateMachineEntry(oldEntry.getStateMachineEntry()); + } + + RaftProtos.LogEntryProto newLogEntry = RaftProtos.LogEntryProto.newBuilder() + .setTerm(next.getTerm()) + .setIndex(next.getIndex()) + .setStateMachineLogEntry(newEntry) + .build(); + + if (!isDryRun()) { + outputStream.write(newLogEntry); + outputStream.flush(); + } + info("Replaced {" + oldRequest + "} with EchoRPC command at index " + + next.getIndex()); + } + } + + if (!isDryRun()) { + outputStream.flush(); + outputStream.close(); + Files.move(outputFile.toPath(), segmentFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + info("Moved temporary output file to correct raft log location : " + segmentFile.toPath()); + } + + } catch (Exception ex) { + error("Exception: " + ex); + } finally { + if (logInputStream != null) { + logInputStream.close(); + } + if (outputStream != null) { + outputStream.flush(); + outputStream.close(); + } + if (isDryRun()) { + boolean success = outputFile.delete(); + if (!success) { + error("Error: Could not delete temporary output file \"" + outputFile + "\"."); + } + } + } + } + + private File createOutputFile(String name) throws IOException { + File temp = new File(name); + try { + if (temp.exists()) { + error("Warning: Temporary output file already exists - " + temp.getAbsolutePath() + + ". Trying to delete it and create a new one."); + boolean success = temp.delete(); + if (!success) { + throw new IOException("Unable to delete old temporary file."); + } + } + boolean success = temp.createNewFile(); + if (success) { + info("Temporary output file created successfully: " + temp.getAbsolutePath()); + } else { + throw new IOException("createNewFile() failed."); + } + } catch (Exception e) { + throw new IOException("Error: Failed to create temporary output file - " + temp.getAbsolutePath(), e); + } + return temp; + } + + private SegmentedRaftLogInputStream getInputStream(LogSegmentPath pi) { Review Comment: Can we use the public [LogSegment#readSegmentFile](https://github.com/apache/ratis/blob/688bbaee7b7b388c0e1eaf49b210a6a3877e4bd4/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/LogSegment.java#L170) instead of this? We would provide a consumer of `LogEntryProto` to read the entries to process. ########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRatisLogRepair.java: ########## @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.EchoRPC; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; +import org.apache.hadoop.ozone.repair.RepairTool; +import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.server.metrics.SegmentedRaftLogMetrics; +import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; +import org.apache.ratis.server.raftlog.segmented.LogSegmentStartEnd; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogInputStream; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogOutputStream; +import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.SizeInBytes; +import picocli.CommandLine; + + +/** + * Tool to omit a raft log in a ratis segment file. + */ [email protected]( + name = "skip-ratis-transaction", + aliases = "srt", + description = "CLI to omit a raft log in a ratis segment file. The raft log at the index specified " + + "is replaced with an EchoOM command (which is a dummy command). It is an offline command " + + "i.e., doesn't require OM to be running. " + + "The command should be run for the same transaction on all 3 OMs only when they all OMs are crashing " + + "while applying the same transaction. If there is only one OM that is crashing and " + + "other OMs have executed the log successfully, then the DB should manually copied from one of the good OMs " + + "to the crashing OM instead.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class +) +public class OMRatisLogRepair extends RepairTool { + + @CommandLine.Option(names = {"-s", "--segment-path", "--segmentPath"}, + description = "Path of the input segment file") + private File segmentFile; + + @CommandLine.Option(names = {"-d", "--ratis-log-dir", "--ratisLogDir"}, + description = "Path of the ratis log directory") + private File logDir; + + @CommandLine.Option(names = {"-b", "--backup"}, + required = true, + description = "Path to put the backup of the original repaired segment file") + private File backupPath; + + @CommandLine.Option(names = {"--index"}, + required = true, + description = "Index of the failing transaction that should be removed") + private long index; + + @Override + public void execute() throws Exception { + + if (segmentFile == null && logDir == null) { + throw new IllegalArgumentException("Path to either a segment-file or ratis-log-dir must be provided."); + } + if (segmentFile == null) { + segmentFile = findSegmentFileContainingIndex(); + } + + if (segmentFile.toPath().equals(backupPath.toPath())) { + throw new IOException("Backup path cannot be same as segment file path."); + } + + LogSegmentPath pi = LogSegmentPath.matchLogSegment(this.segmentFile.toPath()); + if (pi == null) { + throw new IOException("Invalid Segment File"); + } + + if (!segmentFile.exists()) { + throw new IOException("Error: Source segment file \"" + segmentFile + "\" does not exist."); + } + if (backupPath.exists()) { + throw new IOException("Error: Backup file for segment file \"" + backupPath + "\" already exists."); + } + try { + info("Taking back up of Raft Log file: " + this.segmentFile.getAbsolutePath() + " to location: " + backupPath); + if (!isDryRun()) { + Files.copy(segmentFile.toPath(), backupPath.toPath()); + } + info("File backed-up successfully!"); + } catch (IOException ex) { + throw new IOException("Error: Failed to take backup of the file. Exception: " + ex, ex); + } + + String tempOutput = segmentFile.getAbsolutePath() + ".skr.output"; + File outputFile = createOutputFile(tempOutput); + + info("Processing Raft Log file: " + this.segmentFile.getAbsolutePath() + " size:" + this.segmentFile.length()); + SegmentedRaftLogOutputStream outputStream = null; + SegmentedRaftLogInputStream logInputStream = null; + + try { + logInputStream = getInputStream(pi); + if (!isDryRun()) { + outputStream = new SegmentedRaftLogOutputStream(outputFile, false, + 1024, 1024, ByteBuffer.allocateDirect(SizeInBytes.valueOf("8MB").getSizeInt())); + } + + RaftProtos.LogEntryProto next; + for (RaftProtos.LogEntryProto prev = null; (next = logInputStream.nextEntry()) != null; prev = next) { + if (prev != null) { + Preconditions.assertTrue(next.getIndex() == prev.getIndex() + 1L, + "gap between entry %s and entry %s", prev, next); + } + + if (next.getIndex() != index && !isDryRun()) { + // all other logs will be written as it is + outputStream.write(next); + outputStream.flush(); + info("Copied raft log for index (" + next.getIndex() + ")."); + } else { + // replace the transaction with a dummy OmEcho operation + OzoneManagerProtocolProtos.OMRequest oldRequest = OMRatisHelper + .convertByteStringToOMRequest(next.getStateMachineLogEntry().getLogData()); + OzoneManagerProtocolProtos.OMRequest.Builder newRequest = OzoneManagerProtocolProtos.OMRequest.newBuilder() + .setCmdType(EchoRPC) + .setClientId(oldRequest.getClientId()) + .setEchoRPCRequest(OzoneManagerProtocolProtos.EchoRPCRequest.newBuilder().build()); + + if (oldRequest.hasUserInfo()) { + newRequest.setUserInfo(oldRequest.getUserInfo()); + } + if (oldRequest.hasTraceID()) { + newRequest.setTraceID(oldRequest.getTraceID()); + } + if (oldRequest.hasLayoutVersion()) { + newRequest.setLayoutVersion(oldRequest.getLayoutVersion()); + } + if (oldRequest.hasVersion()) { + newRequest.setVersion(oldRequest.getVersion()); + } + + RaftProtos.StateMachineLogEntryProto oldEntry = next.getStateMachineLogEntry(); + RaftProtos.StateMachineLogEntryProto.Builder newEntry = + RaftProtos.StateMachineLogEntryProto.newBuilder() + .setCallId(oldEntry.getCallId()) + .setClientId(oldEntry.getClientId()) + .setType(oldEntry.getType()) + .setLogData(OMRatisHelper.convertRequestToByteString(newRequest.build())); + if (oldEntry.hasStateMachineEntry()) { + newEntry.setStateMachineEntry(oldEntry.getStateMachineEntry()); + } + + RaftProtos.LogEntryProto newLogEntry = RaftProtos.LogEntryProto.newBuilder() + .setTerm(next.getTerm()) + .setIndex(next.getIndex()) + .setStateMachineLogEntry(newEntry) + .build(); + + if (!isDryRun()) { + outputStream.write(newLogEntry); + outputStream.flush(); + } + info("Replaced {" + oldRequest + "} with EchoRPC command at index " + + next.getIndex()); + } + } + + if (!isDryRun()) { + outputStream.flush(); + outputStream.close(); + Files.move(outputFile.toPath(), segmentFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + info("Moved temporary output file to correct raft log location : " + segmentFile.toPath()); + } + + } catch (Exception ex) { + error("Exception: " + ex); + } finally { + if (logInputStream != null) { + logInputStream.close(); + } + if (outputStream != null) { + outputStream.flush(); + outputStream.close(); + } + if (isDryRun()) { + boolean success = outputFile.delete(); Review Comment: Let's not write any files to the disk in dry-run mode. Also let's use the methods from the `Files` class to throw exceptions on error, so PicoCLI will display the exception message and exit non-zero. It should automatically filter out the stack trace. ########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRatisLogRepair.java: ########## @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.EchoRPC; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; +import org.apache.hadoop.ozone.repair.RepairTool; +import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.server.metrics.SegmentedRaftLogMetrics; +import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; +import org.apache.ratis.server.raftlog.segmented.LogSegmentStartEnd; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogInputStream; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogOutputStream; +import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.SizeInBytes; +import picocli.CommandLine; + + +/** + * Tool to omit a raft log in a ratis segment file. + */ [email protected]( + name = "skip-ratis-transaction", + aliases = "srt", + description = "CLI to omit a raft log in a ratis segment file. The raft log at the index specified " + + "is replaced with an EchoOM command (which is a dummy command). It is an offline command " + + "i.e., doesn't require OM to be running. " + + "The command should be run for the same transaction on all 3 OMs only when they all OMs are crashing " + + "while applying the same transaction. If there is only one OM that is crashing and " + + "other OMs have executed the log successfully, then the DB should manually copied from one of the good OMs " + + "to the crashing OM instead.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class +) +public class OMRatisLogRepair extends RepairTool { + + @CommandLine.Option(names = {"-s", "--segment-path", "--segmentPath"}, + description = "Path of the input segment file") + private File segmentFile; + + @CommandLine.Option(names = {"-d", "--ratis-log-dir", "--ratisLogDir"}, + description = "Path of the ratis log directory") + private File logDir; + + @CommandLine.Option(names = {"-b", "--backup"}, + required = true, + description = "Path to put the backup of the original repaired segment file") + private File backupPath; + + @CommandLine.Option(names = {"--index"}, + required = true, + description = "Index of the failing transaction that should be removed") + private long index; + + @Override + public void execute() throws Exception { + + if (segmentFile == null && logDir == null) { + throw new IllegalArgumentException("Path to either a segment-file or ratis-log-dir must be provided."); + } + if (segmentFile == null) { + segmentFile = findSegmentFileContainingIndex(); + } + + if (segmentFile.toPath().equals(backupPath.toPath())) { + throw new IOException("Backup path cannot be same as segment file path."); + } + + LogSegmentPath pi = LogSegmentPath.matchLogSegment(this.segmentFile.toPath()); + if (pi == null) { + throw new IOException("Invalid Segment File"); + } + + if (!segmentFile.exists()) { + throw new IOException("Error: Source segment file \"" + segmentFile + "\" does not exist."); + } + if (backupPath.exists()) { + throw new IOException("Error: Backup file for segment file \"" + backupPath + "\" already exists."); + } + try { + info("Taking back up of Raft Log file: " + this.segmentFile.getAbsolutePath() + " to location: " + backupPath); + if (!isDryRun()) { + Files.copy(segmentFile.toPath(), backupPath.toPath()); + } + info("File backed-up successfully!"); + } catch (IOException ex) { + throw new IOException("Error: Failed to take backup of the file. Exception: " + ex, ex); + } + + String tempOutput = segmentFile.getAbsolutePath() + ".skr.output"; + File outputFile = createOutputFile(tempOutput); + + info("Processing Raft Log file: " + this.segmentFile.getAbsolutePath() + " size:" + this.segmentFile.length()); + SegmentedRaftLogOutputStream outputStream = null; + SegmentedRaftLogInputStream logInputStream = null; + + try { + logInputStream = getInputStream(pi); + if (!isDryRun()) { + outputStream = new SegmentedRaftLogOutputStream(outputFile, false, + 1024, 1024, ByteBuffer.allocateDirect(SizeInBytes.valueOf("8MB").getSizeInt())); + } + + RaftProtos.LogEntryProto next; + for (RaftProtos.LogEntryProto prev = null; (next = logInputStream.nextEntry()) != null; prev = next) { + if (prev != null) { + Preconditions.assertTrue(next.getIndex() == prev.getIndex() + 1L, + "gap between entry %s and entry %s", prev, next); + } + + if (next.getIndex() != index && !isDryRun()) { + // all other logs will be written as it is + outputStream.write(next); + outputStream.flush(); + info("Copied raft log for index (" + next.getIndex() + ")."); + } else { + // replace the transaction with a dummy OmEcho operation + OzoneManagerProtocolProtos.OMRequest oldRequest = OMRatisHelper + .convertByteStringToOMRequest(next.getStateMachineLogEntry().getLogData()); + OzoneManagerProtocolProtos.OMRequest.Builder newRequest = OzoneManagerProtocolProtos.OMRequest.newBuilder() + .setCmdType(EchoRPC) + .setClientId(oldRequest.getClientId()) + .setEchoRPCRequest(OzoneManagerProtocolProtos.EchoRPCRequest.newBuilder().build()); + + if (oldRequest.hasUserInfo()) { + newRequest.setUserInfo(oldRequest.getUserInfo()); + } + if (oldRequest.hasTraceID()) { + newRequest.setTraceID(oldRequest.getTraceID()); + } + if (oldRequest.hasLayoutVersion()) { + newRequest.setLayoutVersion(oldRequest.getLayoutVersion()); + } + if (oldRequest.hasVersion()) { + newRequest.setVersion(oldRequest.getVersion()); + } + + RaftProtos.StateMachineLogEntryProto oldEntry = next.getStateMachineLogEntry(); + RaftProtos.StateMachineLogEntryProto.Builder newEntry = + RaftProtos.StateMachineLogEntryProto.newBuilder() + .setCallId(oldEntry.getCallId()) + .setClientId(oldEntry.getClientId()) + .setType(oldEntry.getType()) + .setLogData(OMRatisHelper.convertRequestToByteString(newRequest.build())); + if (oldEntry.hasStateMachineEntry()) { + newEntry.setStateMachineEntry(oldEntry.getStateMachineEntry()); + } + + RaftProtos.LogEntryProto newLogEntry = RaftProtos.LogEntryProto.newBuilder() + .setTerm(next.getTerm()) + .setIndex(next.getIndex()) + .setStateMachineLogEntry(newEntry) + .build(); + + if (!isDryRun()) { + outputStream.write(newLogEntry); + outputStream.flush(); + } + info("Replaced {" + oldRequest + "} with EchoRPC command at index " + + next.getIndex()); + } + } + + if (!isDryRun()) { + outputStream.flush(); + outputStream.close(); + Files.move(outputFile.toPath(), segmentFile.toPath(), StandardCopyOption.REPLACE_EXISTING); Review Comment: ```suggestion Files.move(outputFile.toPath(), segmentFile.toPath(), StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE); ``` ########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRatisLogRepair.java: ########## @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.EchoRPC; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; +import org.apache.hadoop.ozone.repair.RepairTool; +import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.server.metrics.SegmentedRaftLogMetrics; +import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; +import org.apache.ratis.server.raftlog.segmented.LogSegmentStartEnd; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogInputStream; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogOutputStream; +import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.SizeInBytes; +import picocli.CommandLine; + + +/** + * Tool to omit a raft log in a ratis segment file. + */ [email protected]( + name = "skip-ratis-transaction", + aliases = "srt", + description = "CLI to omit a raft log in a ratis segment file. The raft log at the index specified " + + "is replaced with an EchoOM command (which is a dummy command). It is an offline command " + + "i.e., doesn't require OM to be running. " + + "The command should be run for the same transaction on all 3 OMs only when they all OMs are crashing " + + "while applying the same transaction. If there is only one OM that is crashing and " + + "other OMs have executed the log successfully, then the DB should manually copied from one of the good OMs " + + "to the crashing OM instead.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class +) +public class OMRatisLogRepair extends RepairTool { + + @CommandLine.Option(names = {"-s", "--segment-path", "--segmentPath"}, + description = "Path of the input segment file") + private File segmentFile; + + @CommandLine.Option(names = {"-d", "--ratis-log-dir", "--ratisLogDir"}, + description = "Path of the ratis log directory") + private File logDir; + + @CommandLine.Option(names = {"-b", "--backup"}, + required = true, + description = "Path to put the backup of the original repaired segment file") + private File backupPath; + + @CommandLine.Option(names = {"--index"}, + required = true, + description = "Index of the failing transaction that should be removed") + private long index; + + @Override + public void execute() throws Exception { + + if (segmentFile == null && logDir == null) { + throw new IllegalArgumentException("Path to either a segment-file or ratis-log-dir must be provided."); + } + if (segmentFile == null) { + segmentFile = findSegmentFileContainingIndex(); + } + + if (segmentFile.toPath().equals(backupPath.toPath())) { + throw new IOException("Backup path cannot be same as segment file path."); + } + + LogSegmentPath pi = LogSegmentPath.matchLogSegment(this.segmentFile.toPath()); + if (pi == null) { + throw new IOException("Invalid Segment File"); + } + + if (!segmentFile.exists()) { + throw new IOException("Error: Source segment file \"" + segmentFile + "\" does not exist."); + } + if (backupPath.exists()) { + throw new IOException("Error: Backup file for segment file \"" + backupPath + "\" already exists."); + } + try { + info("Taking back up of Raft Log file: " + this.segmentFile.getAbsolutePath() + " to location: " + backupPath); + if (!isDryRun()) { + Files.copy(segmentFile.toPath(), backupPath.toPath()); + } + info("File backed-up successfully!"); + } catch (IOException ex) { + throw new IOException("Error: Failed to take backup of the file. Exception: " + ex, ex); + } + + String tempOutput = segmentFile.getAbsolutePath() + ".skr.output"; + File outputFile = createOutputFile(tempOutput); + + info("Processing Raft Log file: " + this.segmentFile.getAbsolutePath() + " size:" + this.segmentFile.length()); + SegmentedRaftLogOutputStream outputStream = null; + SegmentedRaftLogInputStream logInputStream = null; + + try { + logInputStream = getInputStream(pi); + if (!isDryRun()) { + outputStream = new SegmentedRaftLogOutputStream(outputFile, false, + 1024, 1024, ByteBuffer.allocateDirect(SizeInBytes.valueOf("8MB").getSizeInt())); + } + + RaftProtos.LogEntryProto next; + for (RaftProtos.LogEntryProto prev = null; (next = logInputStream.nextEntry()) != null; prev = next) { + if (prev != null) { + Preconditions.assertTrue(next.getIndex() == prev.getIndex() + 1L, + "gap between entry %s and entry %s", prev, next); + } + + if (next.getIndex() != index && !isDryRun()) { + // all other logs will be written as it is + outputStream.write(next); + outputStream.flush(); + info("Copied raft log for index (" + next.getIndex() + ")."); + } else { + // replace the transaction with a dummy OmEcho operation + OzoneManagerProtocolProtos.OMRequest oldRequest = OMRatisHelper + .convertByteStringToOMRequest(next.getStateMachineLogEntry().getLogData()); + OzoneManagerProtocolProtos.OMRequest.Builder newRequest = OzoneManagerProtocolProtos.OMRequest.newBuilder() + .setCmdType(EchoRPC) + .setClientId(oldRequest.getClientId()) + .setEchoRPCRequest(OzoneManagerProtocolProtos.EchoRPCRequest.newBuilder().build()); + + if (oldRequest.hasUserInfo()) { + newRequest.setUserInfo(oldRequest.getUserInfo()); + } + if (oldRequest.hasTraceID()) { + newRequest.setTraceID(oldRequest.getTraceID()); + } + if (oldRequest.hasLayoutVersion()) { + newRequest.setLayoutVersion(oldRequest.getLayoutVersion()); + } + if (oldRequest.hasVersion()) { + newRequest.setVersion(oldRequest.getVersion()); + } + + RaftProtos.StateMachineLogEntryProto oldEntry = next.getStateMachineLogEntry(); + RaftProtos.StateMachineLogEntryProto.Builder newEntry = + RaftProtos.StateMachineLogEntryProto.newBuilder() + .setCallId(oldEntry.getCallId()) + .setClientId(oldEntry.getClientId()) + .setType(oldEntry.getType()) + .setLogData(OMRatisHelper.convertRequestToByteString(newRequest.build())); + if (oldEntry.hasStateMachineEntry()) { + newEntry.setStateMachineEntry(oldEntry.getStateMachineEntry()); + } + + RaftProtos.LogEntryProto newLogEntry = RaftProtos.LogEntryProto.newBuilder() + .setTerm(next.getTerm()) + .setIndex(next.getIndex()) + .setStateMachineLogEntry(newEntry) + .build(); + + if (!isDryRun()) { + outputStream.write(newLogEntry); + outputStream.flush(); + } + info("Replaced {" + oldRequest + "} with EchoRPC command at index " + + next.getIndex()); + } + } + + if (!isDryRun()) { + outputStream.flush(); + outputStream.close(); + Files.move(outputFile.toPath(), segmentFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + info("Moved temporary output file to correct raft log location : " + segmentFile.toPath()); + } + + } catch (Exception ex) { + error("Exception: " + ex); + } finally { + if (logInputStream != null) { + logInputStream.close(); + } + if (outputStream != null) { + outputStream.flush(); + outputStream.close(); + } + if (isDryRun()) { + boolean success = outputFile.delete(); + if (!success) { + error("Error: Could not delete temporary output file \"" + outputFile + "\"."); + } + } + } + } + + private File createOutputFile(String name) throws IOException { Review Comment: I think we can replace this method with `Files#createTempFile`. We can do best effort cleanup on exit, otherwise the file would need to be manually deleted. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
