malliaridis commented on code in PR #2381: URL: https://github.com/apache/solr/pull/2381#discussion_r1705590402
########## solr/modules/hdfs/bin/prepare-snapshot-export.sh: ########## @@ -0,0 +1,176 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +usage() { + echo "This script is to support the specific use case of using Hadoop specific libraries for managing Solr snapshot." + echo "--prepare-snapshot-export <arg> This command will prepare" + echo " copylistings for the specified" + echo " snapshot. This command should only" + echo " be used only if Solr is deployed" + echo " with Hadoop and collection index" + echo " files are stored on a shared" + echo " file-system e.g. HDFS" + echo "--export <arg> This command will create a backup" + echo " for the specified snapshot." +} + +distcp_warning() { + echo "SOLR_USE_DISTCP environment variable is not set. \ + Do you want to use hadoop distcp tool for exporting Solr collection snapshot ?" +} + +use_bin_solr_warning() { + echo "Please use the bin/solr script to execute this command." +} + +parse_options() { + OPTIND=3 + while getopts ":c:d:s:" o ; do + case "${o}" in + d) + destPath=${OPTARG} + ;; + s) + sourcePath=${OPTARG} + ;; + c) + collectionName=${OPTARG} + ;; + *) + echo "Unknown option ${OPTARG}" + usage 1>&2 + exit 1 + ;; + esac + done +} + +prepare_snapshot_export() { + #Make sure to cleanup the temporary files. + scratch=$(mktemp -d -t solrsnaps.XXXXXXXXXX) + function finish { + rm -rf "${scratch}" + } + trap finish EXIT + + if hdfs dfs -test -d "${destPath}" ; then + run_solr_snapshot_tool --prepare-snapshot-export "$@" -t "${scratch}" + + hdfs dfs -mkdir -p "${copyListingDirPath}" > /dev/null + find "${scratch}" -type f -printf "%f\n" | while read shardId; do + echo "Copying the copy-listing for $shardId" + hdfs dfs -copyFromLocal "${scratch}/${shardId}" "${copyListingDirPath}" > /dev/null + done + else + echo "Directory ${destPath} does not exist." + exit 1 + fi +} + +copy_snapshot_files() { + copylisting_dir_path="$1" + + if hdfs dfs -test -d "${copylisting_dir_path}" ; then + for shardId in $(hdfs dfs -stat "%n" "${copylisting_dir_path}/*"); do + oPath="${destPath}/${snapshotName}/snapshot.${shardId}" + echo "Copying the index files for ${shardId} to ${oPath}" + ${distCpCmd} -f "${copylisting_dir_path}/${shardId}" "${oPath}" > /dev/null + done + else + echo "Directory ${copylisting_dir_path} does not exist." + exit 1 + fi +} + +collectionName="" +destPath="" +sourcePath="" +cmd="$1" +snapshotName="$2" +copyListingDirPath="" +distCpCmd="${SOLR_DISTCP_CMD:-hadoop distcp}" +scriptDir=$(dirname "$0") +solrLibPath="${SOLR_LIB_PATH:-${scriptDir}/../../../server/solr-webapp/webapp/WEB-INF/lib/*:${scriptDir}/../../../server/lib/ext/*:${scriptDir}/../lib/*}" +case "${cmd}" in + --create) + use_bin_solr_warning + ;; + --delete) + use_bin_solr_warning + ;; + --list) + use_bin_solr_warning + ;; + --describe) + use_bin_solr_warning + ;; + --prepare-snapshot-export) + : "${SOLR_USE_DISTCP:? $(distcp_warning)}" + + parse_options "$@" + + : "${destPath:? Please specify destination directory using -d option}" + + copyListingDirPath="${destPath}/copylistings" + prepare_snapshot_export "${@:2}" + echo "Done. GoodBye!" + ;; + --export) + if [ -z "${SOLR_USE_DISTCP}" ]; then + echo "Not using the distcp command" + use_bin_solr_warning + echo "Done. GoodBye!" + exit 0 + fi + + parse_options "$@" + + : "${snapshotName:? Please specify the name of the snapshot}" + : "${destPath:? Please specify destination directory using -d option}" + + if [ -n "${collectionName}" ] && [ -n "${sourcePath}" ]; then + echo "The -c and -s options can not be specified together" + exit 1 + fi + + if [ -z "${collectionName}" ] && [ -z "${sourcePath}" ]; then + echo "At least one of options (-c or -s) must be specified" + exit 1 + fi + + if [ -n "${collectionName}" ]; then + copyListingDirPath="${destPath}/${snapshotName}/copylistings" + prepare_snapshot_export "${@:2}" + copy_snapshot_files "${destPath}/${snapshotName}/copylistings" + hdfs dfs -rm -r -f -skipTrash "${destPath}/${snapshotName}/copylistings" > /dev/null + else + copy_snapshot_files "${sourcePath}/copylistings" + echo "Copying the collection meta-data to ${destPath}/${snapshotName}" + ${distCpCmd} "${sourcePath}/${snapshotName}/*" "${destPath}/${snapshotName}/" > /dev/null + fi + + echo "Done. GoodBye!" + ;; + --help) Review Comment: I would use `-h|--help)` for consistency here. ########## solr/core/src/java/org/apache/solr/cli/SolrCLI.java: ########## @@ -310,6 +310,12 @@ private static Tool newTool(String toolType) throws Exception { else if ("post".equals(toolType)) return new PostTool(); else if ("postlogs".equals(toolType)) return new PostLogsTool(); else if ("version".equals(toolType)) return new VersionTool(); + else if ("snapshot-create".equals(toolType)) return new SnapshotCreateTool(); + else if ("snapshot-delete".equals(toolType)) return new SnapshotDeleteTool(); + else if ("snapshot-list".equals(toolType)) return new SnapshotListTool(); + else if ("snapshot-describe".equals(toolType)) return new SnapshotDescribeTool(); + else if ("snapshot-prepare-export".equals(toolType)) return new SnapshotPrepareExportTool(); + else if ("snapshot-export".equals(toolType)) return new SnapshotExportTool(); Review Comment: Is there a specific reason you decided to use separate CLI classes for individual snapshot actions? Because this breaks "visually" the consistency (see `zk` command). I would personally prefer single command `snapshot` or `snapshots` and preferably a single entry point that handles all snapshot related actions and argument parsing. This way we would also avoid some code duplication. Individual classes may still be used for handling individual actions, but in a more simplified manner, as the arguments are already parsed and prepared for the action. And a more general "help" output with "--help" that first lists all options available for snapshoting, and eventually provides information for HDFS limitations / requirements for specific arguments could be printed. ########## solr/core/src/java/org/apache/solr/cli/SolrCLI.java: ########## @@ -310,6 +310,12 @@ private static Tool newTool(String toolType) throws Exception { else if ("post".equals(toolType)) return new PostTool(); else if ("postlogs".equals(toolType)) return new PostLogsTool(); else if ("version".equals(toolType)) return new VersionTool(); + else if ("snapshot-create".equals(toolType)) return new SnapshotCreateTool(); + else if ("snapshot-delete".equals(toolType)) return new SnapshotDeleteTool(); + else if ("snapshot-list".equals(toolType)) return new SnapshotListTool(); + else if ("snapshot-describe".equals(toolType)) return new SnapshotDescribeTool(); + else if ("snapshot-prepare-export".equals(toolType)) return new SnapshotPrepareExportTool(); + else if ("snapshot-export".equals(toolType)) return new SnapshotExportTool(); Review Comment: Additionally, with the `OptionGroup` it is possible to allow only one of many options (`--create`, `--list` etc.). And don't get me wrong, I like the short classes more than an overpowered class. So my preference is kind of mixed feelings here. ########## solr/modules/hdfs/src/java/org/apache/solr/hdfs/snapshots/PrepareSnapshotExportCLI.java: ########## @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.hdfs.snapshots; + +import java.io.Closeable; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.fs.Path; +import org.apache.solr.cli.CLIO; +import org.apache.solr.cli.SolrCLI; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.response.CollectionAdminResponse; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.params.CollectionAdminParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.snapshots.CollectionSnapshotMetaData; +import org.apache.solr.core.snapshots.CollectionSnapshotMetaData.CoreSnapshotMetaData; +import org.apache.solr.core.snapshots.SolrSnapshotManager; + +/** + * This class provides the commandline to prepare a snapshot export with HDFS. Other snapshot + * related commands live in org.apache.solr.cli package. + */ +public class PrepareSnapshotExportCLI implements Closeable, CLIO { + + private static final String COLLECTION = "c"; + private static final String TEMP_DIR = "t"; + private static final String DEST_DIR = "d"; + private static final String HDFS_PATH_PREFIX = "p"; + private static final String BACKUP_REPO_NAME = "r"; + private static final String ASYNC_REQ_ID = "i"; Review Comment: Would also be nice to support the long form of the options, so that it is consistent with the other scripts. May also be a separate ticket though. ########## solr/core/src/java/org/apache/solr/cli/SnapshotPrepareExportTool.java: ########## @@ -0,0 +1,322 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.cli; + +import java.io.IOException; +import java.io.PrintStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Option; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.response.CollectionAdminResponse; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.params.CollectionAdminParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.snapshots.CollectionSnapshotMetaData; +import org.apache.solr.core.snapshots.SolrSnapshotManager; + +/** + * Supports prepare-snapshot-export command in the bin/solr script. This command should only be used + * only if Solr is deployed with Hadoop and collection index files are stored on a shared + * file-system e.g. HDFS This class should probably be in the hdfs module. + */ +public class SnapshotPrepareExportTool extends ToolBase { + + public SnapshotPrepareExportTool() { + this(CLIO.getOutStream()); + } + + public SnapshotPrepareExportTool(PrintStream stdout) { + super(stdout); + } + + @Override + public String getName() { + return "snapshot-prepare-export"; + } + + @Override + public List<Option> getOptions() { Review Comment: Options have differences to `PrepareSnapshotExportCLI`. Should they maybe be synced? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org