On Tue, Jun 27, 2017 at 05:41:12AM +0200, Cyril Brulebois wrote: > Jonathan Wiltshire <j...@debian.org> (2016-11-20): > > On Wed, Feb 24, 2016 at 07:32:30AM +0000, Colin Watson wrote: > > > As the new debmirror maintainer, I'd like to fix a couple of problems > > > with the version in stable. One is serious and causes debmirror to sit > > > in an infinite loop when run against the current Debian archive > > > (#808216, #815149). While the other doesn't have quite such dire > > > consequences, a current desktop system with the appstream package > > > installed needs to have the DEP-11 metadata files present in order for > > > "apt update" to succeed (#814416). > > > > Is this still desired or should the bug be closed? > > Ping?
Hmm, wow, I left this one for a while, didn't I? In the meantime I've accumulated some more fixes in my jessie branch that are needed to do a more reasonable job of mirroring current repositories. Can I get an updated approval for this patch? diff --git a/debian/changelog b/debian/changelog index 98a5953..e840339 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,15 @@ +debmirror (1:2.16+deb8u1) UNRELEASED; urgency=medium + + * Tolerate unknown lines in *.diff/Index (closes: #808216, #815149). + * Mirror DEP-11 metadata files (closes: #814416). + * Prefer xz over gz, and cope with either being missing as long as we can + get some version of the index file in question. + * Use check_lists to check Translation files rather than a similar custom + function; this allows use of stronger hashes. + * Mirror and validate InRelease files (closes: #619188). + + -- Colin Watson <cjwat...@debian.org> Wed, 24 Feb 2016 07:26:17 +0000 + debmirror (1:2.16) unstable; urgency=low * Fix confusing output with --precleanup. diff --git a/debmirror b/debmirror index 0c2543c..88768a5 100755 --- a/debmirror +++ b/debmirror @@ -671,6 +671,10 @@ my %file_lists; # info. Files also get registered in %files. my %i18n_get; +# Hash to record which DEP-11 metadata files need to be downloaded. Files +# also get registered in %files. +my %dep11_get; + # Separate hash for files belonging to Debian Installer images. # This data is not cached. my %di_files; @@ -967,8 +971,18 @@ foreach my $dist (@dists) { $files{"dists/$codename$dist_sdir/Release.gpg"}=1; $files{$tempdir."/"."dists/$codename$dist_sdir/Release.gpg"}=1; if ($debmarshal) { - link_release_into_snapshot($mirrordir,$dist,$next,$tempdir, - $codename,$dist_sdir); + link_release_into_snapshot($mirrordir,$dist,$next,$tempdir, + $codename,$dist_sdir,"Release.gpg"); + } + } + if (-f "$tdir/InRelease") { + rename("$tdir/InRelease", "$tempdir/dists/$codename$dist_sdir/InRelease") + or die "Error while moving $tdir/InRelease: $!\n"; + $files{"dists/$codename$dist_sdir/InRelease"}=1; + $files{$tempdir."/"."dists/$codename$dist_sdir/InRelease"}=1; + if ($debmarshal) { + link_release_into_snapshot($mirrordir,$dist,$next,$tempdir, + $codename,$dist_sdir,"InRelease"); } } } @@ -1004,7 +1018,7 @@ if ($num_errors != 0 && $ignore_missing_release) { if ($num_errors != 0) { print "Errors:\n ".join(" ",@errlog) if (@errlog); - die "Failed to download some Release or Release.gpg files!\n"; + die "Failed to download some Release, Release.gpg or InRelease files!\n"; } # Enable caching again for http @@ -1092,7 +1106,7 @@ if ($num_errors != 0) { # for the ftp method. $do_dry_run = $dry_run; -# Determine size of Contents and Translation files to get. +# Determine size of Contents, Translation, and DEP-11 files to get. if ($getcontents) { # Updates of Contents files using diffs are done here; only full downloads # are delayed. @@ -1126,6 +1140,7 @@ foreach my $dist (keys %distset) { next unless exists $distset{$dist}{mirror}; foreach my $section (@sections) { i18n_from_release($dist,"$section/i18n"); + dep11_from_release($dist,"$section/dep11"); } } @@ -1296,16 +1311,17 @@ SOURCE: } } -# With pre-mirror cleanup Contents and Translation files need to be +# With pre-mirror cleanup Contents, Translation, and DEP-11 files need to be # downloaded before the cleanup as otherwise they would be deleted # because they haven't been registered yet. # With post-mirror cleanup it's more neat to do all downloads together. # This could be simplified if we could register the files earlier. -# Download Contents and Translation files. +# Download Contents, Translation, and DEP-11 files. init_connection(); get_contents_files() if ($getcontents); get_i18n_files(); +get_dep11_files(); # Pre-mirror cleanup if ($pre_cleanup) { @@ -1626,21 +1642,6 @@ sub check_file { return 0; } -# Always checks both file size and sha1 as the files get updated (this is -# similar to what is done in check_lists, which forces verify_checksums). -sub check_i18n { - my ($filename, $size, $sha1)=@_; - my $digest = Digest::SHA->new(1); - my $ret = 0; - - if (-f "$filename" and ($size == -s _)) { - open HANDLE, $filename or die "$filename: $!"; - $digest->addfile(*HANDLE); - $ret = ($sha1 eq $digest->hexdigest); - } - return $ret; -} - # Check uncompressed diff content against sha1sum from Index file. sub check_diff { my ($filename, $size, $sha1) = @_; @@ -1959,12 +1960,12 @@ sub update_latest_links { } sub link_release_into_snapshot { - my ($mirrordir,$dist,$next,$tempdir,$codename,$dist_sdir) = @_; - - unlink("$mirrordir/dists/$dist/$next/Release.gpg"); - link("$tempdir/dists/$codename$dist_sdir/Release.gpg", - "$mirrordir/dists/$dist/$next/Release.gpg") - or die "Error while linking $tempdir/dists/$codename$dist_sdir/Release.gpg: $!\n"; + my ($mirrordir,$dist,$next,$tempdir,$codename,$dist_sdir,$base) = @_; + + unlink("$mirrordir/dists/$dist/$next/$base"); + link("$tempdir/dists/$codename$dist_sdir/$base", + "$mirrordir/dists/$dist/$next/$base") + or die "Error while linking $tempdir/dists/$codename$dist_sdir/$base: $!\n"; } sub link_contents_into_snapshot { @@ -1982,7 +1983,7 @@ sub link_contents_into_snapshot { } } -sub link_translation_into_snapshot { +sub link_auxfile_into_snapshot { my ($file,$dist,$distpath,$filename,$mirrordir,$tempdir) = @_; my $next = get_next_snapshot($dist); my $target_path = "$mirrordir/dists/$dist/$next/$distpath"; @@ -1993,6 +1994,76 @@ sub link_translation_into_snapshot { or die "Error while linking $tempdir/$file: $!"; } +sub gpg_verify { + my (@files) = @_; + # Check for gpg + if (system("gpgv --version >/dev/null 2>/dev/null")) { + say("gpgv failed: gpgv binary missing?"); + push (@errlog,"gpgv failed: gpgv binary missing?\n"); + $num_errors++; + } else { + # Verify Release signature + my $gpgv_res = 0; + my $outp = IO::Pipe->new; + my $errp = IO::Pipe->new; + my $gpgvout = ""; + my $gpgverr = ""; + if (my $child = fork) { + $outp->reader; + $errp->reader; + my $sel = IO::Select->new; + $sel->add($outp, $errp); + while (my @ready = $sel->can_read) { + for (@ready) { + my $buf = ""; + my $bytesread = $_->read($buf, 1024); + if (!defined($bytesread)) { + die "read error: $!\n"; + } elsif ($bytesread == 0) { + $sel->remove($_); + $_->close; + } else { + if ($_ == $outp) { + $gpgvout .= $buf; + } + if ($_ == $errp) { + $gpgverr .= $buf; + } + } + } + } + + waitpid($child, 0) == -1 + and die "was pid $child automatically reaped?\n"; + $gpgv_res = not $?; + } + else { + $outp->writer; + $errp->writer; + STDOUT->fdopen(fileno($outp), "w") or die; + STDERR->fdopen(fileno($errp), "w") or die; + my @gpgv = qw(gpgv --status-fd 1); + push @gpgv, (map { ('--keyring' => $_) } @keyrings); + push @gpgv, @files; + exec(@gpgv) or die "exec: $gpgv[0]: $!\n"; + } + + # In debug or verbose mode, display the gpg error message on stdout. + if (! $gpgv_res || $debug) { + print $gpgvout; + print $gpgverr; + } + if ($verbose && ! $debug) { + print $gpgverr; + } + if (! $gpgv_res) { + say("$files[0] signature does not verify."); + push (@errlog,"$files[0] signature does not verify\n"); + $num_errors++; + } + } +} + sub get_release { my ($tdir, $dist) = @_; @@ -2003,82 +2074,36 @@ sub get_release { # is set; needed because remote_get() can register errors my @t_errlog = @errlog; my $t_errors = $num_errors; + remote_get("dists/$dist/InRelease", "$tempdir/.tmp"); + + my @inr_t_errlog = @errlog; + my $inr_t_errors = $num_errors; remote_get("dists/$dist/Release.gpg", "$tempdir/.tmp"); + # We only need one of InRelease and Release.gpg. + if ($num_errors == $t_errors || $num_errors == $inr_t_errors) { + @errlog = @t_errlog; + $num_errors = $t_errors; + } + if (! $check_gpg) { # Nothing to do. } - elsif (-f "$tdir/Release" && -f "$tdir/Release.gpg") { - # Check for gpg - if (system("gpgv --version >/dev/null 2>/dev/null")) { - say("gpgv failed: gpgv binary missing?"); - push (@errlog,"gpgv failed: gpgv binary missing?\n"); + else { + my $got_gpg = 0; + if (-f "$tdir/Release" && -f "$tdir/Release.gpg") { + $got_gpg = 1; + gpg_verify("$tdir/Release.gpg", "$tdir/Release"); + } + if (-f "$tdir/InRelease") { + $got_gpg = 1; + gpg_verify("$tdir/InRelease"); + } + if (! $got_gpg) { + say("Release gpg signature does not verify, file missing."); + push (@errlog,"Release gpg signature does not verify\n"); $num_errors++; - } else { - # Verify Release signature - my $gpgv_res = 0; - my $outp = IO::Pipe->new; - my $errp = IO::Pipe->new; - my $gpgvout = ""; - my $gpgverr = ""; - if (my $child = fork) { - $outp->reader; - $errp->reader; - my $sel = IO::Select->new; - $sel->add($outp, $errp); - while (my @ready = $sel->can_read) { - for (@ready) { - my $buf = ""; - my $bytesread = $_->read($buf, 1024); - if (!defined($bytesread)) { - die "read error: $!\n"; - } elsif ($bytesread == 0) { - $sel->remove($_); - $_->close; - } else { - if ($_ == $outp) { - $gpgvout .= $buf; - } - if ($_ == $errp) { - $gpgverr .= $buf; - } - } - } - } - - waitpid($child, 0) == -1 - and die "was pid $child automatically reaped?\n"; - $gpgv_res = not $?; - } - else { - $outp->writer; - $errp->writer; - STDOUT->fdopen(fileno($outp), "w") or die; - STDERR->fdopen(fileno($errp), "w") or die; - my @gpgv = qw(gpgv --status-fd 1); - push @gpgv, (map { ('--keyring' => $_) } @keyrings); - push @gpgv, "$tdir/Release.gpg", "$tdir/Release"; - exec(@gpgv) or die "exec: $gpgv[0]: $!\n"; - } - - # In debug or verbose mode, display the gpg error message on stdout. - if (! $gpgv_res || $debug) { - print $gpgvout; - print $gpgverr; - } - if ($verbose && ! $debug) { - print $gpgverr; - } - if (! $gpgv_res) { - say("Release gpg signature does not verify."); - push (@errlog,"Release gpg signature does not verify\n"); - $num_errors++; - } } - } else { - say("Release gpg signature does not verify, file missing."); - push (@errlog,"Release gpg signature does not verify\n"); - $num_errors++; } if ($ignore_release_gpg) { @@ -2198,55 +2223,83 @@ sub get_index { $files{"$tempdir/$subdir/$file.diff/Index"}=1; } + my $got_any_file=0; + if (exists $file_lists{"$tempdir/$subdir/$file.xz"}{size}) { + my $got_xz=0; + if (!check_lists("$tempdir/$subdir/$file.xz")) { + if (remote_get("$subdir/$file.xz")) { + $got_xz=1; + } else { + push (@errlog,"$subdir/$file.xz failed checksum verification\n"); + $num_errors++; + } + } else { + $bytes_gotten += $file_lists{"$tempdir/$subdir/$file.xz"}{size}; + $got_xz=1; + } + if ($got_xz) { + if (!check_lists("$tempdir/$subdir/$file")) { + system_redirect_io("xz -d", "$tempdir/$subdir/$file.xz", "$tempdir/$subdir/$file"); + } + if (!check_lists("$tempdir/$subdir/$file.gz") && ! $slow_cpu) { + system_redirect_io("gzip $gzip_options", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.gz"); + } + $files{"$subdir/$file.xz"}=1; + $files{"$tempdir/$subdir/$file.xz"}=1; + $got_any_file=1; + } + } if (exists $file_lists{"$tempdir/$subdir/$file.gz"}{size}) { + my $got_gz=0; if (!check_lists("$tempdir/$subdir/$file.gz")) { if (remote_get("$subdir/$file.gz")) { - system_redirect_io("gzip -d", "$tempdir/$subdir/$file.gz", "$tempdir/$subdir/$file"); - if (! $slow_cpu) { - system_redirect_io("bzip2", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.bz2"); - } + $got_gz=1; } else { push (@errlog,"$subdir/$file.gz failed checksum verification\n"); $num_errors++; } } else { $bytes_gotten += $file_lists{"$tempdir/$subdir/$file.gz"}{size}; + $got_gz=1; } - } elsif ($ignore_missing_release) { - say("Ignoring missing Release file for $subdir/$file.gz"); - push (@errlog,"Ignoring missing Release file for $subdir/$file.gz\n"); - if (remote_get("$subdir/$file.gz")) { - system_redirect_io("gzip -d", "$tempdir/$subdir/$file.gz", "$tempdir/$subdir/$file"); - } - } else { - if (-f "$subdir/$file.gz") { - say("$subdir/$file.gz exists locally but not in Release"); - die "Won't mirror without $subdir/$file.gz signature in Release"; - } else { - say("$subdir/$file.gz does not exist locally or in Release, skipping.") if ($debug); + if ($got_gz) { + if (!check_lists("$tempdir/$subdir/$file")) { + system_redirect_io("gzip -d", "$tempdir/$subdir/$file.gz", "$tempdir/$subdir/$file"); + } + if (!check_lists("$tempdir/$subdir/$file.xz") && ! $slow_cpu) { + system_redirect_io("xz", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.xz"); + } + $files{"$subdir/$file.gz"}=1; + $files{"$tempdir/$subdir/$file.gz"}=1; + $got_any_file=1; } } if (exists $file_lists{"$tempdir/$subdir/$file"}) { if (!check_lists("$tempdir/$subdir/$file")) { if (remote_get("$subdir/$file")) { - if (! $slow_cpu) { - system_redirect_io("bzip2", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.bz2"); - } + $got_any_file=1; } else { push (@errlog,"$subdir/$file failed checksum verification\n"); $num_errors++; } } else { $bytes_gotten += $file_lists{"$tempdir/$subdir/$file"}{size}; + $got_any_file=1; } } - if (exists $file_lists{"$tempdir/$subdir/$file.bz2"}) { - if (!check_lists("$tempdir/$subdir/$file.bz2")) { - if (!remote_get("$subdir/$file.bz2")) { - push (@errlog,"$subdir/$file.bz2 failed checksum verification, removing\n"); - } + if ($got_any_file) { + } elsif ($ignore_missing_release) { + say("Ignoring missing Release file for $subdir/$file.gz"); + push (@errlog,"Ignoring missing Release file for $subdir/$file.gz\n"); + if (remote_get("$subdir/$file.gz")) { + system_redirect_io("gzip -d", "$tempdir/$subdir/$file.gz", "$tempdir/$subdir/$file"); + } + } else { + if (-f "$subdir/$file.gz") { + say("$subdir/$file.gz exists locally but not in Release"); + die "Won't mirror without $subdir/$file.gz signature in Release"; } else { - $bytes_gotten += $file_lists{"$tempdir/$subdir/$file.bz2"}{size}; + say("$subdir/$file.gz does not exist locally or in Release, skipping.") if ($debug); } } if (exists $file_lists{"$tempdir/$subdir/Release"}) { @@ -2265,13 +2318,9 @@ sub get_index { } else { die "get_index called with unknown type $file\n"; } - $files{"$subdir/$file.gz"}=1; - $files{"$subdir/$file.bz2"}=1; # Uncompressed files are no longer kept on the mirrors - $files{"$subdir/$file"}=1 unless exists $file_lists{"$tempdir/$subdir/$file.gz"}; + $files{"$subdir/$file"}=1 unless exists $file_lists{"$tempdir/$subdir/$file.xz"} or exists $file_lists{"$tempdir/$subdir/$file.gz"}; $files{"$subdir/Release"}=1; - $files{"$tempdir/$subdir/$file.gz"}=1; - $files{"$tempdir/$subdir/$file.bz2"}=1; $files{"$tempdir/$subdir/$file"}=1; $files{"$tempdir/$subdir/Release"}=1; } @@ -2396,7 +2445,6 @@ sub i18n_from_release { my $filename = substr($path, length($compdir)+1, length($path)-length($compdir)-1); next if $filename !~ /bz2$/; - my ($sha1, $size) = ($file_lists{$path}{SHA1}, $file_lists{$path}{size}); if(!(defined($include) && ($subdir."/".$filename)=~/$include/o)) { next if (defined($exclude) && ($subdir."/".$filename)=~/$exclude/o); } @@ -2404,10 +2452,8 @@ sub i18n_from_release { $files{"$subdir/$filename"}=1; $files{$tempdir."/"."$subdir/$filename"}=1; - if (! check_i18n("$tempdir/$subdir/$filename", $size, $sha1)) { - $bytes_to_get += $size; - $i18n_get{"$subdir/$filename"}{sha1} = $sha1; - $i18n_get{"$subdir/$filename"}{size} = $size; + if (! check_lists($path)) { + $bytes_to_get += $file_lists{$path}{size}; $i18n_get{"$subdir/$filename"}{dist} = $dist; $i18n_get{"$subdir/$filename"}{distpath} = $distpath; $i18n_get{"$subdir/$filename"}{filename} = $filename; @@ -2418,15 +2464,70 @@ sub i18n_from_release { sub get_i18n_files { say("Get Translation files ..."); foreach my $file (sort keys %i18n_get) { - if (! check_i18n("$tempdir/$file", $i18n_get{$file}{size}, $i18n_get{$file}{sha1})) { + if (! check_lists("$tempdir/$file")) { remote_get("$file"); if ($debmarshal) { - link_translation_into_snapshot($file, - $i18n_get{$file}{dist}, - $i18n_get{$file}{distpath}, - $i18n_get{$file}{filename}, - $mirrordir, - $tempdir); + link_auxfile_into_snapshot($file, + $i18n_get{$file}{dist}, + $i18n_get{$file}{distpath}, + $i18n_get{$file}{filename}, + $mirrordir, + $tempdir); + } + } + } +} + +sub dep11_from_release { + my ($dist,$distpath) = @_; + my $subdir = "dists/$dist/$distpath"; + my $compdir = $tempdir."/".$subdir; + my ($size, $filename); + my $exclude = "(".join("|", @excludes).")" if @excludes; + my $include = "(".join("|", @includes).")" if @includes; + + # Create dep11 directories + make_dir($subdir); + make_dir($compdir); + + # Search for DEP-11 files in file_lists + foreach my $path (keys %file_lists) { + next if length($compdir)+1>length($path); # the +1 stands for the slash after $compdir + next if substr($path, 0, length($compdir)) ne $compdir; + + my $filename = substr($path, length($compdir)+1, length($path)-length($compdir)-1); + next if $filename !~ /\.(?:gz|bz2|xz)$/; + my $all_arches = "(".join("|", map(quotemeta, @arches)).")"; + next if $filename =~ /^Components-/ and $filename !~ /^Components-$all_arches\./; + + my $size = $file_lists{$path}{size}; + if(!(defined($include) && ($subdir."/".$filename)=~/$include/o)) { + next if (defined($exclude) && ($subdir."/".$filename)=~/$exclude/o); + } + + $files{"$subdir/$filename"}=1; + $files{$tempdir."/"."$subdir/$filename"}=1; + if (!check_lists("$tempdir/$subdir/$filename")) { + $bytes_to_get += $size; + $dep11_get{"$subdir/$filename"}{dist} = $dist; + $dep11_get{"$subdir/$filename"}{distpath} = $distpath; + $dep11_get{"$subdir/$filename"}{filename} = $filename; + } + } +} + +sub get_dep11_files { + say("Get DEP-11 metadata files ..."); + foreach my $file (sort keys %dep11_get) { + if (!check_lists("$tempdir/$file")) { + remote_get($file); + if ($debmarshal) { + link_auxfile_into_snapshot($file, + $dep11_get{$file}{dist}, + $dep11_get{$file}{distpath}, + $dep11_get{$file}{filename}, + $mirrordir, + $tempdir); } } } @@ -2463,6 +2564,9 @@ sub fetch_and_apply_diffs { $diff_size{$file} = $size; } } + else { + $_ = <INDEX>; + } } close(INDEX); @@ -2608,6 +2712,7 @@ sub di_check_dists { name_release("d-i", $tdir, $di_dist); unlink "$tdir/Release"; unlink "$tdir/Release.gpg"; + unlink "$tdir/InRelease"; } } } Thanks, -- Colin Watson [cjwat...@debian.org]