Hello Joey, I finally found the time to wrap up the gendelta extension that checks the local tree for completeness. After testing it against 10,000 tar balls from the debian archive I am reasonably confident that it works correctly.
Please take a look. Best regards -- Muharem Hrnjadovic <[email protected]> Public key id : B2BBFCFC Key fingerprint : A5A3 CC67 2B87 D641 103F 5602 219F 6B60 B2BB FCFC
=== added file 'Ptutils.pm' --- Ptutils.pm 1970-01-01 00:00:00 +0000 +++ Ptutils.pm 2009-10-14 08:08:35 +0000 @@ -0,0 +1,283 @@ +#!/usr/bin/perl +=head1 NAME + +ptutils - pristine-tar utilities + +=head1 DESCRIPTION + +Various utility functions used by the pristine-tar tool. + +=head1 FUNCTIONS + +=over 4 + +=item checkmanifest + +Find all paths that are listed in the gendelta manifest but do *not* exist +in the local tree. + +When the optional "localtree" argument is passed to gendelta it will check +whether the files required are all present in the local tree and abort if this +is not the case. + +=back + +=head1 LIMITATIONS + +The gentar gendelta "check local tree" extension was tested extensively +against the debian tar ball archive but may still occasionally stumble over +file names (in the tar-generated manifest) that are encoded in ways that +pristine-tar does not understand, but tar does. + +=head1 AUTHOR + +Muharem Hrnjadovic <[email protected]> + +Licensed under the GPL, version 2 or above. + +=cut + +package Ptutils; + +use Exporter; +...@isa = ("Exporter"); +...@export = qw(&checkmanifest); + +use strict; +use warnings; +use Cwd qw(realpath); +use Data::Dumper; +use File::Find; +use File::Spec::Functions qw(abs2rel canonpath catdir splitdir); +use List::MoreUtils qw(zip); + +my $debug=0; + +sub debug { + message(@_) if $debug; +} + +sub message { + print STDERR "pristine-tar utils: @_\n"; +} + +sub checkpathsets { + # Check whether all files listed in the manifest are available locally for + # agiven pair of prefixes. + my $dir_paths=shift; # local paths + my $tar_paths=shift; # path listed in manifest + my $prefixes=shift; # local/manifest path prefixes + + # Strip off the prefixes. + normalizepaths($dir_paths, $tar_paths, $prefixes); + + # Now let's see whether all paths in the manifest can be found locally. + + # Strip off trailing slashes. + map { s,/*$,,; } @$prefixes; + map { s,/*$,,; } @$tar_paths; + debug(sprintf "!!! PFX '%s'", join("', '", @$prefixes)); + + # Compute path difference. + my %seen; + @seen {...@$tar_paths} = (); + delete @seen {...@$dir_paths}; + delete @seen {...@$prefixes}; + + my @missing_in_tree = sort(keys %seen); + return \...@missing_in_tree; +} + +sub checkmanifest { + # Find all paths that are listed in the manifest but do *not* exist in + # the local tree. + my $manifest=shift; + my $localtree_path=shift; + + my @manifest_entries = (); + my @paths_in_tree = (); + my @missing_in_tree; + + (-d $localtree_path) || die "!! No such directory: '$localtree_path', wrong path?"; + + find sub { push(@paths_in_tree, canonpath($File::Find::name)) }, abs2rel(realpath($localtree_path)); + + open(IN, "<", $manifest) || die "$!"; + while (<IN>) { + chomp $_; + # Condense multiple slashes to one. + s,//+,/,; + # Unicode code points (e.g. \201) are read as text i.e. as four + # characters and *not* as a single byte. We convert them to bytes + # here. + s/\\(\d{3})/"chr(0$1)"/eeg; + push(@manifest_entries, $_); + } + close IN; + + my @lps = @paths_in_tree; + my @tps = @manifest_entries; + + # Figure out what the prefixes for the respective path sets might be. + my $dir_prefix = compute_representative_path_prefix(\...@lps); + my $tar_prefix = compute_representative_path_prefix(\...@tps); + + return if !defined($dir_prefix) || !defined($tar_prefix); + my @prefixes; + if ($dir_prefix eq $tar_prefix) { + @prefixes = ('', ''); + } + else { + @prefixes = ($dir_prefix, $tar_prefix); + } + + my ($missing, $missing_in_2nd_attempt, $missing_in_3rd_attempt); + + # Check whether we have all the paths required given the prefixes guessed. + $missing = checkpathsets(\...@lps, \...@tps, \...@prefixes); + debug(sprintf "-- 1st*AT: files missing, %s", scalar(@$missing)); + + if ((scalar(@$missing) > 0) && checkprefixes(\...@prefixes)) { + # This is in essence guesswork: we did not find all the paths required + # based on the prefixes in the first round. + # In this 'educated guess' attempt we try to find all that's needed w/o + # any prefixes whatsoever. + @lps = @paths_in_tree; + @tps = @manifest_entries; + $missing_in_2nd_attempt = checkpathsets(\...@lps, \...@tps, ['', '']); + debug(sprintf "-- 2nd*AT: files missing, %s", scalar(@$missing_in_2nd_attempt)); + + # All paths found? Great! Let the caller know. + return $missing_in_2nd_attempt if (scalar(@$missing_in_2nd_attempt) < scalar(@$missing)); + } + if (scalar(@$missing) > 0) { + # A last ditch attempt: use the prefix found for the files in the tar + # manifest also for the files unpacked on the local file system. + switch_to_manifest_prefix(\...@prefixes); + @lps = @paths_in_tree; + @tps = @manifest_entries; + $missing_in_3rd_attempt = checkpathsets(\...@lps, \...@tps, \...@prefixes); + debug(sprintf "-- 3rd*AT: files missing, %s", scalar(@$missing_in_3rd_attempt)); + + # All paths found? Great! Let the caller know. + return $missing_in_3rd_attempt if (scalar(@$missing_in_3rd_attempt) < scalar(@$missing)); + } + return $missing; +} + +sub switch_to_manifest_prefix { + my $prefixes = shift; + my @ldirs = splitdir($$prefixes[0]); + my @tdirs = splitdir($$prefixes[1]); + $$prefixes[0] = catdir(($ldirs[0], @tdirs[1..$#tdirs])); + debug(sprintf "!!! 3rd*AT -> PFX '%s'", join("', '", @$prefixes)); +} + +sub checkprefixes { + my $prefixes = shift; + my ($a, $b) = @$prefixes; + + my $a_is_prefix_of_b = ($a eq substr($b, 0, length $a)); + my $b_is_prefix_of_a = ($b eq substr($a, 0, length $b)); + + return ($a_is_prefix_of_b || $b_is_prefix_of_a); +} + +sub pick2paths { + # Pick 2 out of a list of paths so we can compute the common path prefix. + my $paths = shift; + + return unless scalar(@$paths) > 1; + + @$paths = sort @$paths; + + # Return the first and the last path for reasons of diversity. This gives + # us the highest chance of getting the prefix right. + my @picks = ($$paths[0], $$paths[-1]); + + debug(sprintf "/// SOR: %s", Dumper(\...@picks)); + return \...@picks; +} + +sub compute_representative_path_prefix { + # Given a set of paths, find the prefixes of a "representative" pair of + # paths. If the set consists of a sole path just return it. + my $paths = shift; + + if (scalar(@$paths) == 1) { + return $$paths[0]; + } + + my $picks = pick2paths($paths); + + return if !defined($picks); + + my ($a, $b) = @$picks; + + my @as = split(//, $a); + my @bs = split(//, $b); + my @zippedchars = zip @as, @bs; + my $counter = 0; + my $last_slash_seen_at = -1; + my ($achar, $bchar); + local *next_pair_of_chars = sub { @zippedchars[2*$counter..2*$counter+1] }; + local *check_counter = sub { (defined($achar) && ($achar eq '/')) || (defined($bchar) && ($bchar eq '/')) }; + + while (($achar, $bchar) = next_pair_of_chars()) { + $last_slash_seen_at = $counter if check_counter(); + last if !defined($achar) || !defined($bchar) || $achar ne $bchar; + $counter += 1; + } + ($achar, $bchar) = next_pair_of_chars(); + $last_slash_seen_at = $counter if check_counter(); + + # Make sure the prefix ends on path segment boundary. + my $result; + if ($last_slash_seen_at == -1) { + $result = ''; + } + else { + $result = substr($a, 0, $counter); + } + $result =~ s,/*$,,; + debug(sprintf "... RPP: %s", Dumper($result)); + + return $result; +} + +sub normalizepaths { + # This procedure + # + # - figures out what kind of path prefixes the files + # in the manifest and in the local tree use respectively + # - strips off these prefixes as needed + # + # so it becomes possible to find out which files in the manifest + # are *not* in the local tree. + + # References to the lists that hold the local tree and manifest paths + # respectively. + my $paths_in_tree = shift; + my $manifest_entries = shift; + my $prefixes = shift; + + # Any path prefixes found? An empty list indicates a bug or error. + my @pathsets = ($paths_in_tree, $manifest_entries); + my %pdata = zip @$prefixes, @pathsets; + while (my ($prefix, $paths) = each(%pdata)) { + # Do we need to strip off prefixes for this set of paths? + if (length($prefix) > 0) { + # Escape regex meta chars in path prefixes (example: + # aptoncd-0.1.98+bzr112) + $prefix =~ s![+?*]!\\$&!g; + # Pre-compile regex for prefix. + my $pre = qr(^$prefix/?); + map { $_ = canonpath($_); s,$pre,,; $_ } @$paths; + } + } + + return; +} + +1; + === modified file 'pristine-tar' --- pristine-tar 2009-04-14 21:23:22 +0000 +++ pristine-tar 2009-10-14 07:36:17 +0000 @@ -8,7 +8,7 @@ B<pristine-tar> [-vdk] gentar delta tarball -B<pristine-tar> [-vdk] gendelta tarball delta +B<pristine-tar> [-vdk] [-l localtree] gendelta tarball delta B<pristine-tar> [-vdk] [-m message] commit tarball [upstream] @@ -39,6 +39,10 @@ If the delta filename is "-", it is written to standard output. +If the optional "localtree" argument is passed gendelta will check whether +the files required are all present in the local tree and abort if this is +not the case. + =item pristine-tar gentar This takes the specified delta file, and the files in the current @@ -122,12 +126,17 @@ use warnings; use strict; +use File::Basename; +use File::Path; use File::Temp; -use File::Path; -use File::Basename; use Getopt::Long; use Cwd qw{getcwd abs_path}; +use FindBin; +use lib $FindBin::Bin; + +use Ptutils; + # magic identification use constant GZIP_ID1 => 0x1F; use constant GZIP_ID2 => 0x8B; @@ -144,6 +153,7 @@ my $debug=0; my $keep=0; my $message; +my $localtree; # Force locale to C since tar may output utf-8 filenames differently # depending on the locale. @@ -151,7 +161,7 @@ sub usage { print STDERR "Usage: pristine-tar [-vdk] gentar delta tarball\n"; - print STDERR " pristine-tar [-vdk] gendelta tarball delta\n"; + print STDERR " pristine-tar [-vdk] [-l localtree] gendelta tarball delta\n"; print STDERR " pristine-tar [-vdk] [-m message] commit tarball [upstream]\n"; print STDERR " pristine-tar [-vdk] checkout tarball\n"; exit 1; @@ -195,13 +205,17 @@ my $source=shift; my %optio...@_; - my @manifest; - open (IN, "$tempdir/manifest") || die "$tempdir/manifest: $!"; - while (<IN>) { - chomp; - push @manifest, $_; - } - close IN; + my @manifest; + open (IN, "$tempdir/manifest") || die "$tempdir/manifest: $!"; + while (<IN>) { + chomp; + # Unicode code points (e.g. \201) are read as text i.e. as four + # characters and *not* as a single byte. We convert them to bytes + # here. + s/\\(\d{3})/"chr(0$1)"/eeg; + push @manifest, $_; + } + close IN; # The manifest and source should have the same filenames, # but the manifest probably has all the files under a common @@ -399,6 +413,20 @@ my $tempdir=tempdir(); + genmanifest($tarball, "$tempdir/manifest"); + + if (defined $localtree) { + # Check whether all paths in the manifest are also present in the + # local tree. + my $missing_in_tree = Ptutils::checkmanifest("$tempdir/manifest", $localtree); + + if ($#$missing_in_tree >= 0) { + # Abort here since we don't have all the files required for + # generating a pristine tar in the local tree. + error("Files missing in local tree: @$missing_in_tree"); + } + } + my $stdout=0; if ($delta eq "-") { $stdout=1; @@ -449,7 +477,6 @@ $tarball="$tempdir/origtarball"; } - genmanifest($tarball, "$tempdir/manifest"); my $recreatetarball; if (! exists $opts{recreatetarball}) { my $sourcedir="$tempdir/tmp"; @@ -746,6 +773,7 @@ Getopt::Long::Configure("bundling"); if (! GetOptions( + "l|localtree=s" => \$localtree, "m|message=s" => \$message, "v|verbose!" => \$verbose, "d|debug!" => \$debug,
signature.asc
Description: OpenPGP digital signature

