On Tue, 2013-02-19 at 16:16 -0600, Matt Domsch wrote: > On Tue, Feb 19, 2013 at 01:33:35PM -0600, Domsch, Matt wrote: > > On Tue, Feb 19, 2013 at 12:27:22PM -0600, John Sauter wrote: > > > In July-August of 2012 there was a discussion of the new meaning of the > > > --exclude qualifier. I was concerned that the new meaning meant that I > > > could no longer conveniently use S3cmd to backup the *.gpg files in a > > > wide and deep directory tree. I suggested that the new function of > > > --exclude should be moved to a new qualifier for compatibility. > > > > > > The URL for archive of the discussion is > > > <http://sourceforge.net/mailarchive/message.php?msg_id=29617785>. > > > > > > Has my concern been addressed? > > > John Sauter (john_sau...@systemeyescomputerstore.com) > > > > It has not. > > > > The equivalent rsync command: > > > > $ rsync -a --exclude='*' --include='*.gpg' src dst > > > > copies nothing, as 'src' matches '*'. So the new behavior of s3cmd > > sync matches that of rsync, which I believe is what most users would > > expect. > > > > I believe what you are looking for is the rsync equivalent of: > > > > $ pushd src > > $ find . -name \*.gpg > ../files-from.txt > > $ popd > > $ rsync --files-from=files-from.txt src dst > > > > which copies only the files specified in files-from.txt which was > > created apriori to the rsync. Correct? > > > > If so, I think this could be added into > > S3/FileLists.py:_get_filelist_local() in a fairly straightforward > > manner. But it would still require you to change your script. Before > > heading down this path, I want to be sure my understanding is > > correct. Please advise. > > > Something like the patch below, found at: > https://github.com/mdomsch/s3cmd/tree/files-from/ > > -- > Matt Domsch > Technology Strategist > Dell | Office of the CTO > > >From 3ce5e98914497274defe459a57ea617b9368db65 Mon Sep 17 00:00:00 2001 > From: Matt Domsch <matt_dom...@dell.com> > Date: Tue, 19 Feb 2013 16:08:15 -0600 > Subject: [PATCH] add --files-from=FILE to allow transfer of select files only > > This solves the change of behavior introduced by processing > excludes/includes during os.walk(), where previously: > > s3cmd sync --exclude='*' --include='*.gpg' > > would walk the whole tree and transfer only the files named *.gpg. > > Since the change to os.walk(), the exclude '*' matches everything, and > nothing is transferred. > > This patch introduces --files-from=FILE to match rsync behaviour, > where the list of files to transfer (local to remote) is taken not > from an os.walk(), but from the explicit list in FILE. > > The equivalent for remote to local, and remote to remote, is not yet > implemented. > --- > S3/Config.py | 1 + > S3/FileLists.py | 41 +++++++++++++++++++++++++++++++++++++---- > s3cmd | 3 +++ > 3 files changed, 41 insertions(+), 4 deletions(-) > > diff --git a/S3/Config.py b/S3/Config.py > index c8770ca..aac6b09 100644 > --- a/S3/Config.py > +++ b/S3/Config.py > @@ -92,6 +92,7 @@ class Config(object): > website_error = "" > website_endpoint = > "http://%(bucket)s.s3-website-%(location)s.amazonaws.com/" > additional_destinations = [] > + files_from = [] > cache_file = "" > add_headers = "" > > diff --git a/S3/FileLists.py b/S3/FileLists.py > index 2bf7ed9..fae9004 100644 > --- a/S3/FileLists.py > +++ b/S3/FileLists.py > @@ -140,6 +140,35 @@ def handle_exclude_include_walk(root, dirs, files): > else: > debug(u"PASS: %r" % (file)) > > + > +def _get_filelist_from_file(cfg, local_path): > + def _append(d, key, value): > + if key not in d: > + d[key] = [value] > + else: > + d[key].append(value) > + > + filelist = {} > + for fname in cfg.files_from: > + f = open(fname, 'r') > + for line in f: > + line = line.strip() > + line = os.path.normpath(os.path.join(local_path, line)) > + dirname = os.path.dirname(line) > + basename = os.path.basename(line) > + _append(filelist, dirname, basename) > + f.close() > + > + # reformat to match os.walk() > + result = [] > + keys = filelist.keys() > + keys.sort() > + for key in keys: > + values = filelist[key] > + values.sort() > + result.append((key, [], values)) > + return result > + > def fetch_local_list(args, recursive = None): > def _get_filelist_local(loc_list, local_uri, cache): > info(u"Compiling list of local files...") > @@ -156,11 +185,15 @@ def fetch_local_list(args, recursive = None): > if local_uri.isdir(): > local_base = deunicodise(local_uri.basename()) > local_path = deunicodise(local_uri.path()) > - if cfg.follow_symlinks: > - filelist = _fswalk_follow_symlinks(local_path) > + if len(cfg.files_from): > + filelist = _get_filelist_from_file(cfg, local_path) > + single_file = False > else: > - filelist = _fswalk_no_symlinks(local_path) > - single_file = False > + if cfg.follow_symlinks: > + filelist = _fswalk_follow_symlinks(local_path) > + else: > + filelist = _fswalk_no_symlinks(local_path) > + single_file = False > else: > local_base = "" > local_path = deunicodise(local_uri.dirname()) > diff --git a/s3cmd b/s3cmd > index 1aa31ae..c1a1a28 100755 > --- a/s3cmd > +++ b/s3cmd > @@ -1738,6 +1738,7 @@ def main(): > optparser.add_option( "--rinclude", dest="rinclude", > action="append", metavar="REGEXP", help="Same as --include but uses REGEXP > (regular expression) instead of GLOB") > optparser.add_option( "--rinclude-from", dest="rinclude_from", > action="append", metavar="FILE", help="Read --rinclude REGEXPs from FILE") > > + optparser.add_option( "--files-from", dest="files_from", > action="append", metavar="FILE", help="Read list of source-file names from > FILE") > optparser.add_option( "--bucket-location", dest="bucket_location", > help="Datacentre to create bucket in. As of now the datacenters are: US > (default), EU, ap-northeast-1, ap-southeast-1, sa-east-1, us-west-1 and > us-west-2") > optparser.add_option( "--reduced-redundancy", "--rr", > dest="reduced_redundancy", action="store_true", help="Store object with > 'Reduced redundancy'. Lower per-GB price. [put, cp, mv]") > > @@ -1910,6 +1911,8 @@ def main(): > > if options.additional_destinations: > cfg.additional_destinations = options.additional_destinations > + if options.files_from: > + cfg.files_from = options.files_from > > ## Set output and filesystem encoding for printing out filenames. > sys.stdout = codecs.getwriter(cfg.encoding)(sys.stdout, "replace")
This sounds like it will solve my problem. Please let me know when you have a version of S3cmd with --files-from implemented for sync, and I'll give it a try. John Sauter (john_sau...@systemeyescomputerstore.com)
signature.asc
Description: This is a digitally signed message part
------------------------------------------------------------------------------ Everyone hates slow websites. So do we. Make your web apps faster with AppDynamics Download AppDynamics Lite for free today: http://p.sf.net/sfu/appdyn_d2d_feb
_______________________________________________ S3tools-general mailing list S3tools-general@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/s3tools-general