On Tue, Feb 19, 2013 at 01:33:35PM -0600, Domsch, Matt wrote: > On Tue, Feb 19, 2013 at 12:27:22PM -0600, John Sauter wrote: > > In July-August of 2012 there was a discussion of the new meaning of the > > --exclude qualifier. I was concerned that the new meaning meant that I > > could no longer conveniently use S3cmd to backup the *.gpg files in a > > wide and deep directory tree. I suggested that the new function of > > --exclude should be moved to a new qualifier for compatibility. > > > > The URL for archive of the discussion is > > <http://sourceforge.net/mailarchive/message.php?msg_id=29617785>. > > > > Has my concern been addressed? > > John Sauter (john_sau...@systemeyescomputerstore.com) > > It has not. > > The equivalent rsync command: > > $ rsync -a --exclude='*' --include='*.gpg' src dst > > copies nothing, as 'src' matches '*'. So the new behavior of s3cmd > sync matches that of rsync, which I believe is what most users would > expect. > > I believe what you are looking for is the rsync equivalent of: > > $ pushd src > $ find . -name \*.gpg > ../files-from.txt > $ popd > $ rsync --files-from=files-from.txt src dst > > which copies only the files specified in files-from.txt which was > created apriori to the rsync. Correct? > > If so, I think this could be added into > S3/FileLists.py:_get_filelist_local() in a fairly straightforward > manner. But it would still require you to change your script. Before > heading down this path, I want to be sure my understanding is > correct. Please advise.
Something like the patch below, found at: https://github.com/mdomsch/s3cmd/tree/files-from/ -- Matt Domsch Technology Strategist Dell | Office of the CTO >From 3ce5e98914497274defe459a57ea617b9368db65 Mon Sep 17 00:00:00 2001 From: Matt Domsch <matt_dom...@dell.com> Date: Tue, 19 Feb 2013 16:08:15 -0600 Subject: [PATCH] add --files-from=FILE to allow transfer of select files only This solves the change of behavior introduced by processing excludes/includes during os.walk(), where previously: s3cmd sync --exclude='*' --include='*.gpg' would walk the whole tree and transfer only the files named *.gpg. Since the change to os.walk(), the exclude '*' matches everything, and nothing is transferred. This patch introduces --files-from=FILE to match rsync behaviour, where the list of files to transfer (local to remote) is taken not from an os.walk(), but from the explicit list in FILE. The equivalent for remote to local, and remote to remote, is not yet implemented. --- S3/Config.py | 1 + S3/FileLists.py | 41 +++++++++++++++++++++++++++++++++++++---- s3cmd | 3 +++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/S3/Config.py b/S3/Config.py index c8770ca..aac6b09 100644 --- a/S3/Config.py +++ b/S3/Config.py @@ -92,6 +92,7 @@ class Config(object): website_error = "" website_endpoint = "http://%(bucket)s.s3-website-%(location)s.amazonaws.com/" additional_destinations = [] + files_from = [] cache_file = "" add_headers = "" diff --git a/S3/FileLists.py b/S3/FileLists.py index 2bf7ed9..fae9004 100644 --- a/S3/FileLists.py +++ b/S3/FileLists.py @@ -140,6 +140,35 @@ def handle_exclude_include_walk(root, dirs, files): else: debug(u"PASS: %r" % (file)) + +def _get_filelist_from_file(cfg, local_path): + def _append(d, key, value): + if key not in d: + d[key] = [value] + else: + d[key].append(value) + + filelist = {} + for fname in cfg.files_from: + f = open(fname, 'r') + for line in f: + line = line.strip() + line = os.path.normpath(os.path.join(local_path, line)) + dirname = os.path.dirname(line) + basename = os.path.basename(line) + _append(filelist, dirname, basename) + f.close() + + # reformat to match os.walk() + result = [] + keys = filelist.keys() + keys.sort() + for key in keys: + values = filelist[key] + values.sort() + result.append((key, [], values)) + return result + def fetch_local_list(args, recursive = None): def _get_filelist_local(loc_list, local_uri, cache): info(u"Compiling list of local files...") @@ -156,11 +185,15 @@ def fetch_local_list(args, recursive = None): if local_uri.isdir(): local_base = deunicodise(local_uri.basename()) local_path = deunicodise(local_uri.path()) - if cfg.follow_symlinks: - filelist = _fswalk_follow_symlinks(local_path) + if len(cfg.files_from): + filelist = _get_filelist_from_file(cfg, local_path) + single_file = False else: - filelist = _fswalk_no_symlinks(local_path) - single_file = False + if cfg.follow_symlinks: + filelist = _fswalk_follow_symlinks(local_path) + else: + filelist = _fswalk_no_symlinks(local_path) + single_file = False else: local_base = "" local_path = deunicodise(local_uri.dirname()) diff --git a/s3cmd b/s3cmd index 1aa31ae..c1a1a28 100755 --- a/s3cmd +++ b/s3cmd @@ -1738,6 +1738,7 @@ def main(): optparser.add_option( "--rinclude", dest="rinclude", action="append", metavar="REGEXP", help="Same as --include but uses REGEXP (regular expression) instead of GLOB") optparser.add_option( "--rinclude-from", dest="rinclude_from", action="append", metavar="FILE", help="Read --rinclude REGEXPs from FILE") + optparser.add_option( "--files-from", dest="files_from", action="append", metavar="FILE", help="Read list of source-file names from FILE") optparser.add_option( "--bucket-location", dest="bucket_location", help="Datacentre to create bucket in. As of now the datacenters are: US (default), EU, ap-northeast-1, ap-southeast-1, sa-east-1, us-west-1 and us-west-2") optparser.add_option( "--reduced-redundancy", "--rr", dest="reduced_redundancy", action="store_true", help="Store object with 'Reduced redundancy'. Lower per-GB price. [put, cp, mv]") @@ -1910,6 +1911,8 @@ def main(): if options.additional_destinations: cfg.additional_destinations = options.additional_destinations + if options.files_from: + cfg.files_from = options.files_from ## Set output and filesystem encoding for printing out filenames. sys.stdout = codecs.getwriter(cfg.encoding)(sys.stdout, "replace") -- 1.8.1.2 ------------------------------------------------------------------------------ Everyone hates slow websites. So do we. Make your web apps faster with AppDynamics Download AppDynamics Lite for free today: http://p.sf.net/sfu/appdyn_d2d_feb _______________________________________________ S3tools-general mailing list S3tools-general@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/s3tools-general