#!/usr/bin/perl -w

# TODO: I'd like to be able to tell it to get some extra files, by name.
# I'm thinking Contents files. It would be really nice if it could pull
# a whole directory -- think project/trace, or disks-i386...
# TODO: It would probably be cleaner and easier to learn if it took
# apt-style lines to tell where to mirror from and what portions to use.

=head1 NAME

debmirror - Debian partial mirror script, with ftp, http, hftp or
rsync and package pool support

=head1 SYNOPSIS

debmirror [options] mirrordir

=head1 DESCRIPTION

This program downloads and maintains a partial local Debian mirror. It can
mirror any combination of architectures, distributions, and sections. Files
are transferred by ftp, and package pools are fully supported. It also does
locking and updates trace files.

To support package pools, this program mirrors in three steps.

=over 4

=item 1. download Packages and Sources files

First it downloads all Packages and Sources files for the subset of Debian it
was instructed to get.

=item 2. clean up unknown files

Any files and directories on the local mirror that are not in the list are
removed.

=item 3. download everything else

The Packages and Sources files are scanned, to build up a list of all the
files they refer to. A few other miscellaneous files are added to the list.
Then the program makes sure that each file in the list is present on the 
local mirror and is up-to-date, using file size (and optionally md5sum) checks.
Any necessary files are downloaded.

=back

=cut

sub usage {
  warn join(" ", @_)."\n" if @_;
  warn <<EOF;
Usage: $0 [--debug] [--progress] [--verbose] [--source|--nosource]
          [--md5sums] [--passive] [--host=remotehost]
          [--user=remoteusername] [--passwd=remoteuserpassword]
          [--method=ftp|hftp|http|rsync]
          [--timeout=seconds] [--root=directory]
          [--dist=foo[,bar,..] ...] [--section=foo[,bar,..] ...]
          [--arch=foo[,bar,..] ...] [--skippackages] [--getcontents]
          [--postcleanup|--cleanup|--nocleanup] [--adddir=directory]
          [--ignore=regex] [--exclude=regex] [--include=regex]
          [--exclude-deb-section=regex] [--max-batch=number]
          [--rsync-batch=number] [--ignore-missing-release]
          [--ignore-release-gpg]
          [--limit-priority=regex] [--dry-run]
          [--proxy=http://user:pass\@url:port/] [--help]
          [--rsync-options=options] [--ignore-small-errors]
          [--pdiff=mirror|use|none]
          mirrordir

For details, see man page.
EOF
  exit(1);
}

=head1 OPTIONS

=over 4

=item mirrordir

This required parameter specifies where the local mirror directory is. If the
directory does not exist, it will be created. Be careful; telling this 
program that your home directory is the mirrordir is guaranteed to replace
your home directory with a Debian mirror!

=item --debug

Enables verbose debug output, including ftp protocol dump.

=item --progress -p

Displays progress bars as files are downloaded.

=item --verbose -v

Displays progress between file downloads.

=item --source

Include source in the mirror (default).

=item --nosource

Do not include source.

=item --md5sums -m

Use md5sums to determine if files on the local mirror that are the correct
size actually have the correct content. Not enabled by default, because
it is too paranoid, and too slow.

=item --passive

Download in passive mode.

=item --host=remotehost -h

Specify the remote host to mirror from. Defaults to ftp.debian.org,
you are strongly encouraged to find a closer mirror.

=item --user=remoteusername -u

Specify the remote user name to use to log to the remote host. Helpful when
dealing with brain damaged proxy servers. Defaults to anonymous.

=item --passwd=remoteuserpassword

Specify the remote user password to use to log into the remote ftp host.
It is used with --user and defaults to anonymous@.

=item --method=ftp|hftp|http|rsync -e

Specify the method to download files. Currently, supported methods are
ftp, hftp (ftp over http proxy), http or rsync. To connect a rsync
server, you need to put ':' prefix in the root directory
(i.e. ":debian", which means host::debian).

=item --proxy=http://user:pass@url:port/

Specifies the http proxy (like Squid) to use for http and hftp method.

=item --timeout=seconds -t

Specifies the timeout to use for network operations (either FTP or rsync).
Set this to a higher value if you experience failed downloads. Defaults
to 300 seconds.

=item --root=directory -r directory

Specifies the directory on the remote host that is the root of the Debian
archive. Defaults to "/debian", which will work for most mirrors. The root
directory has a dists/ subdirectory.

=item --dist=foo[,bar,..] -d foo

Specify the distribution (woody, sarge, sid) of Debian to mirror. This
switch may be used multiple times, and multiple distributions may be
specified at once, separated by commas. Using the links (stable,
testing, unstable) does not have the expected results but you may add
those links manually. Defaults to mirroring sid.

=item --section=foo[,bar,..] -s foo

Specify the section of Debian to mirror. Defaults to
main,contrib,non-free,main/debian-installer.

=item --arch=foo[,bar,..] -a foo

Specify the architectures to mirror. The default is --arch=i386.
Specifying --arch=none will mirror no archs.

=item --postcleanup

Clean up the local mirror but only after mirroring is complete and
only if there was no error. This is the default.

=item --cleanup

Do clean up any unknown files and directories on the local mirror (see
step 2 above).

=item --nocleanup

Do not clean up the local mirror after mirroring is complete.

=item --ignore=regex

Never delete any files whose filenames match the regex. May be used multiple times.

=item --exclude=regex

Never download any files whose filenames match the regex. May be used multiple times.

=item --exclude-deb-section=regex

Never download any files whose Debian Section (games, doc, oldlibs,
science, ...) match the regex.  May be used multiple times.

=item --limit-priority=regex

Limit download to files whose Debian Priority (required, extra,
optional, ...) match the regex.  May be used multiple times.

=item --include=regex

Don't exclude any files whose filenames match the regex. May be used multiple times.

=item --skippackages

Don't re-download Packages and Sources files. Useful if you know they are
up-to-date.

=item --adddir directory

Also download Packages and Sources files from the specified directory
on the remote host (the directory is relative to the root of the
Debian archive). If you used this option for "--adddir
dists/proposed-updates" in the past use "--dist
<dist>-proposed-updates" now. This feature is now obsolete and will be
removed soon.

=item --getcontents

Download Contents.arch.gz files.

=item --max-batch=number

Download at most max-batch number of files (and ignore rest).

=item --rsync-batch=number

Download at most number of files with each rsync call and then loop.

=item --ignore-missing-release

Don't fail if the Release file is missing.

=item --ignore-release-gpg

Don't fail if the Release.gpg file is missing.

=item --dry-run

Simulate a mirror run. This will still download the meta files to .temp
but won't replace the old meta files, won't download debs and source
files and only simulates cleanup.

=item --rsync-options=options

Specify alternative rsync options to be used. Default options are
"-aIL --partial". Care must be taken when specifying alternative
options not to disrupt operations, it's best to only add to those
options.

The most likely option to add is "--bwlimit=x" to avoid saturating the
bandwidth of your link.

=item --ignore-small-errors

Normally debmirror will report an error if any deb files or sources
fail to download and refuse to update the meta data to an inconsistent
mirror. Normally this is a good things as it indicates something went
wrong during download and should be retried. But sometimes the
upstream mirror actually is broken. Specifying --ignore-small-errors
causes debmirror to ignore missing or broken deb and source files but
still be pedantic about checking meta files.

=item --pdiff=mirror|use|none

If the Release file contains entries for pdiff files then it will
mirror them and atempt to update the Packages files with them. This is
the default 'mirror' behaviour. Specifying --pdiff=use will use the
pdiff files to update the index files but will not keep them in the
mirror. Specifying --pdiff=none will completly ignore pdiff files.

=item --help

Display a usage summary.

=back

=head1 ARCHIVE SIZE

Mirror size for a singe arch and binary only (in MiB):

         | sarge | etch |  sid  |  all
---------+-------+------+-------+-------
main     | 8816  | 9126 | 10777 | 20577
contrib  |  126  |  118 |   291 |   363
non-free |  282  |  345 |   464 |   666
d-i      |   44  |   28 |    31 |    78
all      | 9187  | 9536 | 11476 | 21502

Mirror size per arch (in MiB):

         | sarge | etch |  sid  |  all
---------+-------+------+-------+-------
source   |  9339 | 9419 | 11495 | 17521
all      |  4478 | 5047 |  6160 | 10459
alpha    |  4256 | 3906 |  4732 |  9708
amd64    |  3644 | 3635 |  4877 |  9152
arm      |  3445 | 3193 |  3933 |  7845
hppa     |  4112 | 3713 |  4541 |  9167
i386     |  4422 | 3979 |  5005 | 10477
ia64     |  4709 | 4489 |  5316 | 11043
m68k     |  3372 | 3072 |  3664 |  7139
mips     |  3631 | 3364 |  4099 |  8237
mipsel   |  3560 | 3319 |  4049 |  8106
powerpc  |  4208 | 3967 |  4742 |  9915
s390     |  3673 | 3452 |  4144 |  8489
sparc    |  3761 | 3585 |  4390 |  8893

All numbers reflect the state of 2005 Dec 9th and do not include any
filesystem overhead (which adds 1-2GB for ext2/3).

=head1 EXAMPLES

 debmirror /mirror/debian

Simply make a mirror in /mirror/debian, using all defaults.

 debmirror /mirror/debian --ignore=non-US/
 debmirror /mirror/debian/non-US -h non-us.debian.org -r /debian-non-US \
           -d sid/non-US -s main,contrib,non-free

Make one full mirror, and supplement it with a mirror of non-US, in a
directory inside.

 debmirror -a i386,sparc -s main -h ftp.kernel.org \
           -d sid -d sarge /home/me/debian/mirror --nosource \
           --progress

Make a mirror of i386 and sparc binaries, main only, and include both unstable
and testing versions of Debian. Download from ftp.kernel.org.

 debmirror -e rsync -r :debian /home/me/debian/mirror

Make a mirror using rsync. rsync server is ftp.debian.org::debian.

=head1 FILES

  ~/.gnupg/pubring.gpg

    Debmirror uses gpg to verify Release and Release.gpg using the
    default keying ~/.gnupg/pubring.gpg. This can be changed by
    exporting GNUPGHOME resulting in $GNUPGHOME/pubring.gpg being used.

    To add the right key to this keyring you can import it from the
    debian keyring (in case of the debian archive) using:

      gpg --keyring /usr/share/keyrings/debian-role-keys.gpg --export \
          "Debian Archive Automatic Signing Key (2005)" | gpg --import

    or download the key from a keyserver:

      gpg --keyserver keyring.debian.org --recv-keys <key ID>

    <key ID> for debian and 2005 is 4F368D5D and can be found in the
    gpg error message from debmirror -v output.

=cut

use strict;
use Cwd;
use Net::FTP;
use Getopt::Long;
use File::Temp qw/ tempfile /;
use LockFile::Simple;
use Compress::Zlib;
use Digest::MD5;
use Digest::SHA1;
use LWP::UserAgent;

# Yeah, I use too many global variables in this program.
our ($debug, $progress, $verbose, $passive, $skippackages, $getcontents);
our ($ua, $proxy);
our (@dists, @sections, @arches, @extra_dirs, @ignores, @excludes, @includes);
our (@excludes_deb_section, @limit_priority);
our $check_md5sums = 0;
our $check_downloads = 0;
our $cleanup=0;
our $post_cleanup=1;
our $no_cleanup=0;
our $do_source=1;
our $host="ftp.debian.org";
our $user="anonymous";
our $passwd="anonymous@";
our $remoteroot="/debian";
our $download_method="ftp";
our $timeout=300;
our $max_batch=0;
our $rsync_batch=200;
our $num_errors=0;
our $bytes_to_get=0;
our $bytes_gotten=0;
our $ignore_release=0;
our $ignore_release_gpg=0;
our $start_time = time;
our $dry_run=0;
our $dry_run_var=0;
our $rsync_options="-aIL --partial";
our $ignore_small_errors=0;
our $pdiff_mode="use";
my @errlog;
my $HOME;
($HOME = $ENV{'HOME'}) or die "HOME not defined in environment!\n";

# Load in config files
require "/etc/debmirror.conf" if -r "/etc/debmirror.conf";
require "$HOME/.debmirror.conf" if -r "$HOME/.debmirror.conf";

# This hash holds all the files we know about, If the hash key is false,
# the file already exists in the mirror (or is locally created) and does not
# need to be downloaded, if it is true the file needs to be downloaded.
# Filenames should be relative to $mirrordir.
my %files;

my $help;
GetOptions('debug'        => \$debug,
	   'progress|p'   => \$progress,
	   'verbose|v'    => \$verbose,
	   'source!'      => \$do_source,
	   'md5sums|m'    => \$check_md5sums,
	   'nomd5sums'    => \$check_downloads,
	   'passive!'     => \$passive,
	   'host|h=s'     => \$host,
	   'user|u=s'     => \$user,
	   'passwd=s'     => \$passwd,
	   'root|r=s'     => \$remoteroot,
	   'dist|d=s'     => \@dists,
	   'section|s=s'  => \@sections,
	   'arch|a=s'     => \@arches,
	   'adddir=s'     => \@extra_dirs,
	   'cleanup'      => \$cleanup,
	   'postcleanup'  => \$post_cleanup,
	   'nocleanup'    => \$no_cleanup,
	   'ignore=s'     => \@ignores,
	   'exclude=s'    => \@excludes,
	   'exclude-deb-section=s' => \@excludes_deb_section,
	   'limit-priority=s' => \@limit_priority,
	   'include=s'    => \@includes,
	   'skippackages' => \$skippackages,
	   'getcontents'  => \$getcontents,
	   'method|e=s'   => \$download_method,
	   'timeout|t=s'  => \$timeout,
	   'max-batch=s'  => \$max_batch,
	   'rsync-batch=s'  => \$rsync_batch,
	   'ignore-missing-release' => \$ignore_release,
	   'ignore-release-gpg' => \$ignore_release_gpg,
	   'dry-run'      => \$dry_run_var,
	   'proxy=s'      => \$proxy,
	   'rsync-options=s' => \$rsync_options,
	   'ignore-small-errors' => \$ignore_small_errors,
	   'pdiff=s'      => \$pdiff_mode,
	   'help'         => \$help,
) or usage;
usage if $help;

# This parameter is so important that it is the only required parameter.
my $mirrordir=shift or usage("mirrordir not specified");

# Post-process arrays. Allow commas to seperate values the user entered.
# If the user entered nothing, provide defaults.
@dists=split(/,/,join(',',@dists));
@dists=qw(sid) unless @dists;
@sections=split(/,/,join(',',@sections));
@sections=qw(main contrib non-free main/debian-installer) unless @sections;
@arches=split(/,/,join(',',@arches));
@arches=qw(i386) unless @arches;
@arches=() if (join(',',@arches) eq "none");
$cleanup=0 if ($no_cleanup);
$post_cleanup=0 if ($no_cleanup);
$post_cleanup=0 if ($cleanup);

# Display configuration.
$|=1 if $debug;
if ($passwd eq "anonymous@") {
  if ($download_method eq "http") {
    say("Mirroring to $mirrordir from $download_method://$host/$remoteroot/");
  } else {
    say("Mirroring to $mirrordir from $download_method://$user\@$host/$remoteroot/");
  }
} else {
  say("Mirroring to $mirrordir from $download_method://$user:XXX\@$host/$remoteroot/");
}
say("Arches: ".join(",", @arches));
say("Dists: ".join(",", @dists));
say("Sections: ".join(",", @sections));
say("Including source.") if $do_source;
say("Passive mode on.") if $passive;
say("Checking md5sums.") if $check_md5sums;
if ($post_cleanup) {
  say("Will clean up AFTER mirroring.");
} else {
  say("Will NOT clean up.") unless $cleanup;
}
say("Proxy: $proxy.") if $proxy;
say("Download at most $max_batch files.") if ($max_batch > 0);
say("Download at most $rsync_batch files per rsync call.") if ($download_method eq "rsync");
say("Pdiff mode: $pdiff_mode.");
say("Dry run.") if $dry_run_var;

my $md5;
$md5=Digest::MD5->new;

# Set up mirror directory and resolve $mirrordir to a full path for
# locking and rsync
make_dir($mirrordir) if ( ! -d $mirrordir);
die "You need write permissions on $mirrordir" if (!-w $mirrordir);
chdir($mirrordir) or die "chdir $mirrordir: $!";
$mirrordir = cwd();

# Handle the lock file. This is the same method used by official
# Debian push mirrors.
my $hostname=`hostname -f 2>/dev/null || hostname`;
chomp $hostname;
my $lockfile="Archive-Update-in-Progress-$hostname";
$files{$lockfile}=1;
say("Attempting to get lock, this might take 2 minutes before it fails.");
my $lockmgr = LockFile::Simple->make(-format => "%f/$lockfile", -max => 12,
				     -delay => 10, -nfs => 1, -autoclean => 1,
				     -warn => 1, -stale => 1, -hold => 0);
my $lock = $lockmgr->lock("$mirrordir")
  or die "$lockfile exists or you lack proper permissions; aborting";
$SIG{INT}=sub { $lock->release; exit 1 };
$SIG{TERM}=sub { $lock->release; exit 1 };

# Register the trace file.
my $tracefile="project/trace/$hostname";
$files{$tracefile}=1;

# Create tempdir if missing
my $tempdir=".temp";
make_dir($tempdir) if ( ! -d $tempdir);
die "You need write permissions on $tempdir" if (!-w $tempdir);

# Start up ftp.
my $ftp;
my %opts = (Debug => $debug, Passive => $passive, Timeout => $timeout);

my $rsynctempfile;
END { unlink $rsynctempfile if $rsynctempfile }

sub init_connection {
  $_ = $download_method;

  /^hftp$/  && do {
    # LWP stuff
    $ua = new LWP::UserAgent;
    if ($proxy) {
      $ua->proxy('ftp', $proxy);
    } elsif ($ENV{ftp_proxy}) {
      $ua->proxy('ftp', $ENV{ftp_proxy});
    } else {
      die("hftp method needs a proxy.");
    }
    return;
  };

  /^http$/ && do {
    # LWP stuff[
    $ua = new LWP::UserAgent;
    $ua->proxy('http', $ENV{http_proxy}) if ($ENV{http_proxy});
    $ua->proxy('http', $proxy) if ($proxy);
    return;
  };

  /^ftp$/ && do {
    $ftp=Net::FTP->new($host, %opts) or die "$@\n";
    $ftp->login($user, $passwd) or die "login failed"; # anonymous
    $ftp->binary or die "could not set binary mode";
    $ftp->cwd($remoteroot) or die "cwd to $remoteroot failed";
    $ftp->hash(\*STDOUT,102400) if $progress;
    return;
  };

  /^rsync$/ && do {
    return;
  };

  usage("unknown download method: $_");
}
init_connection;

# fix remoteroot if --method=rsync
if ($download_method eq "rsync") {
  $remoteroot = "$host:$remoteroot/";
  if (! ($user eq 'anonymous')) {
    $remoteroot = "$user\@$remoteroot";
  }
};

say("Get Release files.");
# Get Release files without caching for http
$ua->default_header( "Cache-Control" => "max-age=0" ) if ($ua);
my (%file_lists_md5, %file_lists_size);
foreach my $dist (@dists) {
  make_dir ("dists/$dist");
  make_dir ("$tempdir/dists/$dist");
  remote_get("dists/$dist/Release");
  $files{"dists/$dist/Release"}=1;
  $files{$tempdir."/"."dists/$dist/Release"}=1;
  my $t = $num_errors;
  remote_get("dists/$dist/Release.gpg");
  $files{"dists/$dist/Release.gpg"}=1;
  $files{$tempdir."/"."dists/$dist/Release.gpg"}=1;
  # Check for gpg
  if (!$ignore_release_gpg) {
    if (system("gpg --version >/dev/null 2>/dev/null")) {
      say("gpg failed: --ignore-release-gpg or gpg binary missing?");
      push (@errlog,"gpg failed: --ignore-release-gpg or gpg binary missing?\n");
      $num_errors++;
    }
    # Verify Release signature
    my $GPG="gpg --no-tty -q";
    if (!$verbose) {
      $GPG = $GPG." >/dev/null 2>/dev/null";
    }
    if (!-f "$tempdir/dists/$dist/Release.gpg" || !-f "$tempdir/dists/$dist/Release" || system("$GPG --verify $tempdir/dists/$dist/Release.gpg $tempdir/dists/$dist/Release")) {
      if ($verbose) {
	say("Release signature does not verify.");
	push (@errlog,"Release signature does not verify.\n");
      } else {
	system("gpg --no-tty -q --verify $tempdir/dists/$dist/Release.gpg $tempdir/dists/$dist/Release");
	say("Release signature does not verify.");
	push (@errlog,"Release signature does not verify.\n");
      }
      $num_errors++;
    }
  }
  $num_errors=$t if ($ignore_release_gpg);

  # Parse the Release
  if (open RELEASE, "<$tempdir/dists/$dist/Release") {
    while (<RELEASE>) {
	last if /^MD5Sum:/;
    }
    $_ = <RELEASE>;
    while (defined $_ && $_ =~ /^ /) {
      my ($md5sum, $size, $filename) =
	(/ ([a-z0-9]+) +(\d+) +(.*)$/);
      $file_lists_md5{"$tempdir/dists/$dist/$filename"} = $md5sum;
      $file_lists_size{"$tempdir/dists/$dist/$filename"} = $size;
      $_ = <RELEASE>;
    }
    close RELEASE;
  }
}

if ($num_errors != 0 && $ignore_release) {
  say("Ignoring failed Release files.");
  push (@errlog,"Ignoring failed Release files\n");
  $num_errors = 0;
}

if ($num_errors != 0) {
  print "Errors:\n ".join(" ",@errlog) if (@errlog);
  die "Failed to download some Release or Release.gpg files!\n";
}

# Enable caching again for http
init_connection if ($ua);

# Calculate expected downloads for meta files
$bytes_to_get = $bytes_gotten;
sub add_bytes {
  my $name=shift;
  $bytes_to_get +=  $file_lists_size{"$tempdir/$name"} if (exists $file_lists_size{"$tempdir/$name"});
}
foreach my $dist (@dists) {
  foreach my $section (@sections) {
    foreach my $arch (@arches) {
      add_bytes("dists/$dist/$section/binary-$arch/Packages");
      add_bytes("dists/$dist/$section/binary-$arch/Packages.gz");
      add_bytes("dists/$dist/$section/binary-$arch/Packages.bz2");
      add_bytes("dists/$dist/$section/binary-$arch/Release");
      add_bytes("dists/$dist/$section/binary-$arch/Packages.diff/Index") unless ($pdiff_mode eq "none");
    }
    if ($do_source) {
      add_bytes("dists/$dist/$section/source/Sources");
      add_bytes("dists/$dist/$section/source/Sources.gz");
      add_bytes("dists/$dist/$section/source/Sources.bz2");
      add_bytes("dists/$dist/$section/source/Release");
      add_bytes("dists/$dist/$section/source/Sources.diff/Index") unless ($pdiff_mode eq "none");
    }
  }
}
foreach (@extra_dirs) {
  add_bytes("$_/Packages");
  add_bytes("$_/Packages.gz");
  add_bytes("$_/Packages.bz2");
  add_bytes("$_/Release");
  add_bytes("$_/Packages.diff/Index");
  if ($do_source) {
    add_bytes("$_/Sources");
    add_bytes("$_/Sources.gz");
    add_bytes("$_/Sources.bz2");
    add_bytes("$_/Sources.diff/Index");
  }
}
if ($getcontents) {
  foreach my $dist (@dists) {
    foreach my $arch (@arches) {
      next if $dist=~/experimental/;
      next if $dist=~/.*-proposed-updates/;
      next if $arch=~/source/;
      add_bytes("dists/$dist/Contents-$arch.gz");
    }
  }
}


say("Get Packages and Sources files and other miscellany.");
# Get Packages and Sources files and other miscellany.
my (@package_files, @source_files);
foreach my $dist (@dists) {
  foreach my $section (@sections) {
    # no d-i in woody
    next if ($section =~ /debian-installer/ && $dist eq "woody");
    next if ($section =~ /debian-installer/ && $dist eq "experimental");
    next if ($section =~ /debian-installer/ && $dist =~ /.*-proposed-updates/);
    next if ($section =~ /debian-installer/ && $dist =~ /.*breezy-updates/ );
    next if ($section =~ /debian-installer/ && $dist eq "breezy-security" );
    foreach my $arch (@arches) {
      get_index("dists/$dist/$section/binary-$arch", "Packages");
    }
    get_index("dists/$dist/$section/source", "Sources") if ($do_source);
  }
}
foreach (@extra_dirs) {
  get_packages($_, "Packages");
  get_sources($_, "Sources") if ($do_source);
}

# Sanity check. I once nuked a mirror because of this..
if (@arches && ! @package_files) {
  print "Errors:\n ".join(" ",@errlog) if (@errlog);
  die "Failed to download any Packages files!\n";
}
if ($do_source && ! @source_files) {
  print "Errors:\n ".join(" ",@errlog) if (@errlog);
  die "Failed to download any Sources files!\n";
}

if ($num_errors != 0) {
  print "Errors:\n ".join(" ",@errlog) if (@errlog);
  die "Failed to download some Package, Sources or Release files!\n";
}

# Realy set dry-run option now if it was given. This delay is needed
# for the ftp method.
$dry_run = $dry_run_var;

if ($getcontents) {
  say("Get Contents files.");
  foreach my $dist (@dists) {
    foreach my $arch (@arches) {
      next if $dist=~/experimental/;
      next if $dist=~/.*-proposed-updates/;
      next if $arch=~/source/;
      remote_get("dists/$dist/Contents-$arch.gz");
      $files{"dists/$dist/Contents-$arch.gz"}=1;
      $files{$tempdir."/"."dists/$dist/Contents-$arch.gz"}=1;
    }
  }
}

# close ftp connection to avoid timeouts, will reopen later
if ($download_method eq 'ftp') { $ftp->quit; }

say("Parse Packages and Sources files and add to the file list everything therein.");
{
  local $/="\n\n";
  my ($filename, $size, $md5sum, $directory, $exclude, $include,
      $architecture, $exclude_deb_section, $limit_priority, $deb_section,
      $deb_priority);

  my %arches = map { $_ => 1 } (@arches, "all");

  $exclude =  "(".join("|", @excludes).")" if @excludes;
  $exclude_deb_section =
    "(".join("|", @excludes_deb_section).")" if @excludes_deb_section;
  $limit_priority =
    "(".join("|", @limit_priority).")" if @limit_priority;
  $include =  "(".join("|", @includes).")" if @includes;
  foreach my $file (@package_files) {
    next if (!-f $file);
    my $gunzf = gzopen($file, "rb") or die "$file: $!";
    my $line;
    my $res;
    my $loop = 1;
    while ($loop) {
      my $buf = "";
      while(($res = $gunzf->gzreadline($line) > 0)
	    && !($line =~ /^$/)) {
	$buf = $buf . $line;
      }
      if ($res <= 0) {
	$loop = 0;
	next;
      }
      $_ = $buf;
      ($filename)=m/^Filename:\s+(.*)/im;
      ($deb_section)=m/^Section:\s+(.*)/im;
      ($deb_priority)=m/^Priority:\s+(.*)/im;
      ($architecture)=m/^Architecture:\s+(.*)/im;
      next if (!$arches{$architecture});
      if(!(defined($include) && ($filename=~/$include/o))) {
	next if (defined($exclude) && $filename=~/$exclude/o);
	next if (defined($exclude_deb_section) && defined($deb_section)
		 && $deb_section=~/$exclude_deb_section/o);
	next if (defined($limit_priority) && defined($deb_priority)
		 && ! ($deb_priority=~/$limit_priority/o));
      }
      next if (exists $files{$filename}); # multiple occurences
      ($size)=m/^Size:\s+(\d+)/im;
      ($md5sum)=m/^MD5sum:\s+([A-Za-z0-9]+)/im;
      if (check_file($filename, $size, $md5sum)) {
	$files{$filename} = 1;
      } else {
	$files{$filename} = 0;
	$file_lists_md5{$filename} = $md5sum;
	$file_lists_size{$filename} = $size;
	$bytes_to_get += $size;
      }
    }
    $gunzf->gzclose();
  }
  foreach my $file (@source_files) {
    next if (!-f $file);
    my $gunzf = gzopen($file, "rb") or die "$file: $!";
    my $line;
    my $res;
    my $loop = 1;
    while ($loop) {
      my $buf = "";
      while(($res = $gunzf->gzreadline($line) > 0)
	    && !($line =~ /^$/)) {
	$buf = $buf . $line;
      }
      if ($res <= 0) {
	$loop = 0;
	next;
      }
      $_ = $buf;
      ($directory) = m/^Directory:\s+(.*)/im;
      ($deb_section)=m/^Section:\s+(.*)/im;
      ($deb_priority)=m/^Priority:\s+(.*)/im;
      next if (defined($exclude_deb_section) && defined($deb_section)
	       && $deb_section=~/$exclude_deb_section/o);
      next if (defined($limit_priority) && defined($deb_priority)
	       && ! ($deb_priority=~/$limit_priority/o));
      while (m/^ ([A-Za-z0-9]{32} .*)/mg) {
	($md5sum, $size, $filename)=split(' ', $1, 3);
	$filename="$directory/$filename";
	if(!(defined($include) && ($filename=~/$include/o))) {
	  next if (defined($exclude) && $filename=~/$exclude/o);
	}
        next if (exists $files{$filename}); # multiple occurences
	if (check_file($filename, $size, $md5sum)) {
	  $files{$filename} = 1;
	} else {
	  $files{$filename} = 0;
	  $file_lists_md5{$filename} = $md5sum;
	  $file_lists_size{$filename} = $size;
	  $bytes_to_get += $size;
	}
      }
    }
    $gunzf->gzclose();
  }
}

# Pre mirror cleanup
cleanup_unknown_files() if ($cleanup && ! $post_cleanup);

say("Download all files that we need to get (".int(1+$bytes_to_get/1024/1024)." MiB).");
# Download all files that we need to get.
DOWNLOAD: {
  init_connection;
  $_ = $download_method;

  /^hftp$/ && do {
    # LWP stuff
    my $dirname;
    my $i=0;
    foreach my $file (sort keys %files) {
      if (!$files{$file}) {
	if (($dirname) = $file =~ m:(.*)/:) {
	  make_dir($dirname);
	}
	hftp_get($file);
	if ($max_batch > 0 && ++$i >= $max_batch) {
	  push (@errlog,"Batch limit exceeded, mirror run was partial\n");
	  $num_errors++;
	  last;
	}
      }
    }
    last DOWNLOAD;
  };

  /^http$/ && do {
    # LWP stuff
    my $dirname;
    my $i=0;
    foreach my $file (sort keys %files) {
      if (!$files{$file}) {
	if (($dirname) = $file =~ m:(.*)/:) {
	  make_dir($dirname);
	}
	http_get($file);
	if ($max_batch > 0 && ++$i >= $max_batch) {
	  push (@errlog,"Batch limit exceeded, mirror run was partial\n");
	  $num_errors++;
	  last;
	}
      }
    }
    last DOWNLOAD;
  };

  # Ftp method
  /^ftp$/ && do {
    my $dirname;
    my $i=0;
    foreach my $file (sort keys %files) {
      if (!$files{$file}) {
	if (($dirname) = $file =~ m:(.*)/:) {
	  make_dir($dirname);
	}
	ftp_get($file);
	if ($max_batch > 0 && ++$i >= $max_batch) {
	  push (@errlog,"Batch limit exceeded, mirror run was partial\n");
	  $num_errors++;
	  last;
	}
      }
    }
    last DOWNLOAD;
  };

  # Rsync method
  /^rsync$/ && do {
    my $opt=$rsync_options;
    my $fh;
    my @result;
    my $i=0;
    my $j=0;
    $opt = "$opt --progress" if $progress;
    $opt = "$opt -v" if $verbose;
    $opt = "$opt -v" if $debug;
    $opt = "$opt -n" if $dry_run;
    foreach my $file (sort keys %files) {
      if (!$files{$file}) {
	my $dirname;
	my @dir;
	($dirname) = $file =~ m:(.*/):;
	@dir= split(/\//, $dirname);
	for (0..$#dir) {
	  push (@result, "" . join('/', @dir[0..$_]) . "/");
	}
	push (@result, "$file");
	if (++$j >= $rsync_batch) {
	  $j = 0;
	  ($fh, $rsynctempfile) = tempfile();
	  if (@result) {
	    @result = sort(@result);
	    my $prev = "not equal to $result[0]";
	    @result = grep($_ ne $prev && ($prev = $_, 1), @result);
	    for (@result) {
	      print $fh "$_\n";
	    }
	  }
	  system ("rsync --timeout=$timeout $opt $remoteroot --include-from=$rsynctempfile --exclude='*' $mirrordir");
	  close $fh;
	  foreach my $dest (@result) {
	    if (-f $dest) {
	      if (!check_lists($dest)) {
		say("$dest failed md5sum check");
		$num_errors++;
	      }
	    } elsif (!-d $dest) {
	      say("$dest missing");
	      $num_errors++;
	    }
	  }
	  @result = ();
	}
	if ($max_batch > 0 && ++$i >= $max_batch) {
	  print "Batch limit exceeded, mirror run will be partial\n";
	  push (@errlog,"Batch limit exceeded, mirror run was partial\n");
	  $num_errors++;
	  last;
	}
      }
    }
    ($fh, $rsynctempfile) = tempfile();
    if (@result) {
      @result = sort(@result);
      my $prev = "not equal to $result[0]";
      @result = grep($_ ne $prev && ($prev = $_, 1), @result);
      for (@result) {
	print $fh "$_\n";
      }
      system ("rsync --timeout=$timeout $opt $remoteroot --include-from=$rsynctempfile --exclude='*' $mirrordir");
      close $fh;
      foreach my $dest (@result) {
	if (-f $dest) {
	  if (!check_lists($dest)) {
	    say("$dest failed md5sum check");
	    $num_errors++;
	  }
	} elsif (!-d $dest) {
	  say("$dest missing");
	  $num_errors++;
	}
      }
    }
    last DOWNLOAD;
  };
}

# Finish up. Write out trace file.
if ($download_method eq 'ftp') { $ftp->quit; }
make_dir("project/trace");
open OUT, ">$tracefile" or die "$tracefile: $!";
print OUT `date -u`;
close OUT;
$lock->release;

my $total_time = time - $start_time;
my $avg_speed = 0;
$avg_speed = sprintf("%3.0f",($bytes_gotten / $total_time)) unless ($total_time == 0);
if ($bytes_gotten == 0) {
  say("Downloaded files in ".$total_time."s");
} else {
  say("Downloaded ".int(1+$bytes_gotten/1024/1024)." MiB in ".$total_time."s at ".(int($avg_speed/1024*100)/100)." kiB/s");
}

print "Errors:\n ".join(" ",@errlog) if (@errlog);

if ($num_errors != 0) {
  print "Failed to download files ($num_errors errors)!\n";
  exit 1 if (!$ignore_small_errors);
}

say("Everything OK. Moving meta files.");
chdir($tempdir) or die "unable to chdir($tempdir): $!\n";
my $res=0;
foreach my $file (`find . -type f`) {
  chomp $file;
  $file=~s:^\./::;
  # this skips pdiff files if unwanted
  next if (!exists $files{$file});
  print("Moving $file\n") if ($debug);
  if ($mirrordir."/".$file) {
    $res &= unlink($mirrordir."/".$file) if (! $dry_run);
  }
  if (! $dry_run) {
    "$file" =~ m,(^.*)/,;
    make_dir("$mirrordir/$1");
    if (!link($file, $mirrordir."/".$file)) {
      $res &= system("cp $file $mirrordir/$file");
    }
  }
  $res &= link($file, $mirrordir."/".$file) if (! $dry_run);
}
# Post mirror cleanup
cleanup_unknown_files() if ($post_cleanup);

# mirror cleanup for directories
if ($cleanup || $post_cleanup) {
  # Remove all empty directories. Not done as part of main cleanup
  # to prevent race problems with pool download code, which
  # makes directories.. Sort so they are removable in bottom-up
  # order.
  chdir($mirrordir) or die "chdir $mirrordir: $!";
  system("find . -depth -type d ! -name . ! -name .. -print0 | xargs -0 rmdir 2>/dev/null") if (! $dry_run);
}

if ($res == 0) {
  say("All done.");
} else {
  die("Failed to move some meta files.");
}

exit;

# Pass this function a filename, a file size (bytes), and a md5sum (hex).
# It will return true if the md5sum matches.
sub check_file {
  my ($filename, $size, $md5sum)=@_;
  if (-f $filename and $size == -s _) {
    if ($check_md5sums) {
      open HANDLE, $filename or
	die "$filename: $!";
      $md5->addfile(*HANDLE);
      my $digest = $md5->hexdigest;
      return ($md5sum eq $digest);
    }
    else {
      # Assume it is ok, w/o md5 check.
      return 1;
    }
  }
  return 0;
}

# Check uncompressed pdiff content against sha1sum from Index file.
sub check_pdiff {
  my ($filename, $size, $sha1) = @_;
  my $digest = Digest::SHA1->new;
  my $ret = 0;

  if (-f "$filename.gz") {
    system_redirect_io("gzip -d", "$filename.gz", "$filename");
    if ($size == -s $filename) {
      open HANDLE, $filename or die "$filename: $!";
      $digest->addfile(*HANDLE);
      $ret = ($sha1 eq $digest->hexdigest);
    }
    unlink ($filename);
  }
  return $ret;
}

# Check file against md5sum and size from the Release file.
# It will return true if the md5sum matches.
sub check_lists {
  my $file = shift;
  my $t = $check_md5sums;
  my $ret = 1;
  $check_md5sums = 1;
  if (exists $file_lists_size{$file}) {
    $ret = check_file ($file, $file_lists_size{$file},
		       $file_lists_md5{$file});
  }
  $check_md5sums = $t;
  return $ret;
}

sub remote_get {
  my $file=shift;
  my $res;
say("skipping $file") if ($skippackages);
  return 1 if ($skippackages);
  chdir($tempdir) or die "unable to chdir($tempdir): $!\n";
 METHOD: {
    $_ = $download_method;
    /^hftp$/ && do {
      $res=hftp_get($file);
      $res=$res && check_lists($file);
      if (!$res) {
	say("$file failed md5sum check, removing");
	unlink($file) if (-f $file);
      }
    };

    /^http$/ && do {
      $res=http_get($file);
      $res=$res && check_lists($file);
      if (!$res) {
	say("$file failed md5sum check, removing");
	unlink($file) if (-f $file);
      }
    };

    /^ftp$/ && do {
      $res=ftp_get($file);
      $res=$res && check_lists($file);
      if (!$res) {
	say("$file failed md5sum check, removing");
	unlink($file) if (-f $file);
      }
    };

    /^rsync$/ && do {
say("remote_get rsync $file");
      $res=rsync_get($file);
      $res=$res && check_lists($file);
      if (!$res) {
	say("$file failed md5sum check");
	# FIXME: make sure the size doesn't match so it gets retried
      }
    };
  }
  chdir($mirrordir) or die "unable to chdir($mirrordir): $!\n";
  return $res;
}

# Get a file via hftp, first displaying its filename if progress is on.
sub hftp_get {
  my $oldautoflush = $|;
  $| = 1;
  my $file=shift;
  my $url="ftp://${host}/${remoteroot}/${file}";
  my $ret=1;

  print "$url => " if ($debug);
  if ($progress || $verbose) {
    print "Getting: $file... ";
  }
  if (! $dry_run) {
    unlink($file) if (-f $file);
    $ret = $ua->mirror($url, $file);
    print $ret->status_line . "\n" if ($debug);
    if ($ret->is_error) {
      warn "$file failed " . $ret->status_line . "\n" if ($progress or $verbose);
      push (@errlog,"Download of $file failed: ".$ret->status_line);
      $num_errors++;
    } elsif ($progress || $verbose) {
      print "ok\n";
    }
    $ret = not ( $ret->is_error );
  } elsif ($progress || $verbose) {
    print "ok\n";
  }
  $| = $oldautoflush;
  return $ret;
}

# Get a file via hftp, first displaying its filename if progress is on.
sub http_get {
  my $oldautoflush = $|;
  $| = 1;
  my $file=shift;
  my $percent = 0;
  my $url="http://${host}/${remoteroot}/${file}";
  my $ret=1;
  $percent = sprintf("%3.0f",(($bytes_gotten/$bytes_to_get)*100)) unless($bytes_to_get == 0);

  print "$url => " if ($debug);
  if ($progress || $verbose) {
    print "[$percent%] Getting: $file... ";
  }
  if (! $dry_run) {
    unlink($file) if (-f $file);
    $ret = $ua->mirror($url, $file);
    print $ret->status_line . "\n" if ($debug);
    if ($ret->is_error) {
      warn "$file failed " . $ret->status_line . "\n" if ($progress or $verbose);
      push (@errlog,"Download of $file failed: ".$ret->status_line);
      $num_errors++;
    } elsif ($progress || $verbose) {
      print "ok\n";
    }
    $ret = not ( $ret->is_error );
  } elsif ($progress || $verbose) {
    print "ok\n";
  }
  # Account for actual bytes gotten
  my @stat = stat $file;
  $bytes_gotten += $stat[7] if (@stat);

  $| = $oldautoflush;
  return $ret;
}

# Get a file via ftp, first displaying its filename if progress is on.
# I should just be able to subclass Net::Ftp and override the get method,
# but it's late.
sub ftp_get {
  my $oldautoflush = $|;
  $| = 1;
  my $file=shift;
  my $percent = 0;
  my $mtime;
  $percent = sprintf("%3.0f",(($bytes_gotten/$bytes_to_get)*100)) unless($bytes_to_get == 0);

  my @stat = stat $file;
  if (@stat) { # already have the file?
    my $size = $ftp->size($file);
    my $mtime = $ftp->mdtm($file);
    if ($mtime && $size
	&& $size == $stat[7]
	&& $mtime == $stat[9]) { # size and time match
      print "[$percent%] Keeping: $file\n" if ($progress || $verbose);
      $bytes_gotten += $size;
      return 1;
    }
  }
  if ($progress) {
    print "[$percent%] Getting: $file\t #";
  } elsif ($verbose) {
    print "[$percent%] Getting: $file";
  }
  my $ret=1;
  if (! $dry_run) {
    unlink($file) if (-f $file);
    $ret = $ftp->get($file, $file);
    if ($ret) {
      my $mtime=$ftp->mdtm($file);
      utime($mtime, $mtime, $file) if defined $mtime;
    } else {
      if ($progress or $verbose) {
	warn " failed:".$ftp->message;
      }
      push (@errlog,"Download of $file failed: ".$ftp->message);
      $num_errors++;
    }
  }
  my $size=$ftp->size($file);
  $bytes_gotten += $size if $size;
  $| = $oldautoflush;
  print "\n" if (($verbose and not $progress) or ($dry_run and $progress));
  return $ret;
}

sub rsync_get {
  my $file=shift;
  my $opt=$rsync_options;
  (my $dirname) = $file =~ m:(.*/):;
  my @dir= split(/\//, $dirname);
  for (0..$#dir) {
    $opt = "$opt --include=" . join('/', @dir[0..$_]) . "/";
  }
  $opt = "$opt --progress" if $progress;
  $opt = "$opt -v" if $debug;
  system ("rsync --timeout=$timeout $opt $remoteroot --include=$file --exclude='*' .");
  if ($? == 0 && -f $file) {
    return 1;
  } else {
    push (@errlog,"Download of $file failed\n");
    $num_errors++;
    return 0;
  }
}

# run system() with stdin and stdout redirected to files
# unlinks stdout target file first to break hard links
sub system_redirect_io {
  my ($command, $fromfile, $tofile) = @_;

  if (-f $tofile) {
    unlink($tofile) or die "unlink($tofile) failed: $!";
  }
  system("$command <$fromfile >$tofile");
}

# Get Index file in the passed subdirectory.
sub get_index {
  my $subdir=shift;
  my $file=shift;
  make_dir($subdir);
  make_dir("$tempdir/$subdir");

  if (!($pdiff_mode eq "none") && exists $file_lists_size{"$tempdir/$subdir/$file.diff/Index"}) {
    if (!check_lists ("$tempdir/$subdir/$file.diff/Index")) {
      make_dir("$tempdir/$subdir/$file.diff");
      say("$subdir/$file.diff/Index needs fetch");
      remote_get("$subdir/$file.diff/Index");
      if (!check_lists ("$tempdir/$subdir/$file.diff/Index")) {
	say("$subdir/$file.diff/Index failed md5sum check, removing");
	push (@errlog,"$subdir/$file.diff/Index failed md5sum check, removing\n");
	unlink "$tempdir/$subdir/$file.diff/Index";
      } else {
	fetch_and_apply_pdiffs($subdir, $file);
	if (check_lists ("$tempdir/$subdir/$file")) {
	  system_redirect_io("gzip -9 -n", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.gz");
	  system_redirect_io("bzip2", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.bz2");
	}
      }
    } else {
      $bytes_gotten += $file_lists_size{"$tempdir/$subdir/$file.diff/Index"};
      fetch_and_apply_pdiffs($subdir, "$file");
      if (check_lists ("$tempdir/$subdir/$file")) {
	system_redirect_io("gzip -9 -n", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.gz");
	system_redirect_io("bzip2", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.bz2");
      }
    }
    $files{"$subdir/$file.diff/Index"}=1 if ($pdiff_mode eq "mirror");
    $files{"$tempdir/$subdir/$file.diff/Index"}=1;
  }

  if (exists $file_lists_size{"$tempdir/$subdir/$file.gz"}) {
    if (!check_lists ("$tempdir/$subdir/$file.gz")) {
      say("$subdir/$file.gz needs fetch");
      remote_get("$subdir/$file.gz");
      if (check_lists ("$tempdir/$subdir/$file.gz")) {
	system_redirect_io("gzip -d", "$tempdir/$subdir/$file.gz", "$tempdir/$subdir/$file");
	system_redirect_io("bzip2", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.bz2");
      } else {
	say("$subdir/$file.gz failed md5sum check");
	push (@errlog,"$subdir/$file.gz failed md5sum check\n");
	$num_errors++;
      }
    } else {
      $bytes_gotten += $file_lists_size{"$tempdir/$subdir/$file.gz"};
    }
  } elsif ($ignore_release) {
    say("Ignoring missing Release file for $subdir/$file.gz");
    push (@errlog,"Ignoring missing Release file for $subdir/$file.gz\n");
    say("$subdir/$file.gz needs fetch");
    remote_get("$subdir/$file.gz");
  } else {
    if (-f "$subdir/$file.gz") {
      say("$subdir/$file.gz exists locally but not in Release");
      die "Won't mirror without $subdir/$file.gz signature in Release";
    } else {
      say("$subdir/$file.gz does not exist locally or in Release, skipping.") if ($debug);
    }
  }
  if (exists $file_lists_size{"$tempdir/$subdir/$file"}) {
    if (!check_lists ("$tempdir/$subdir/$file")) {
      say("$subdir/$file needs fetch");
      remote_get("$subdir/$file");
      if (check_lists ("$tempdir/$subdir/$file")) {
	system_redirect_io("bzip2", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.bz2");
      } else {
	say("$subdir/$file failed md5sum check");
	push (@errlog,"$subdir/$file failed md5sum check\n");
	$num_errors++;
      }
    } else {
      $bytes_gotten += $file_lists_size{"$tempdir/$subdir/$file"};
    }
  }
  if (exists $file_lists_size{"$tempdir/$subdir/$file.bz2"}) {
    if (!check_lists ("$tempdir/$subdir/$file.bz2")) {
      say("$subdir/$file.bz2 needs fetch");
      remote_get("$subdir/$file.bz2");
      if (!check_lists ("$tempdir/$subdir/$file.bz2")) {
	say("$subdir/$file.bz2 failed md5sum check, removing");
	push (@errlog,"$subdir/$file.bz2 failed md5sum check, removing\n");
	unlink "$tempdir/$subdir/$file.bz2";
      }
    } else {
      $bytes_gotten += $file_lists_size{"$tempdir/$subdir/$file.bz2"};
    }
  }
  if (exists $file_lists_size{"$tempdir/$subdir/Release"}) {
    if (!check_lists ("$tempdir/$subdir/Release")) {
      say("$subdir/Release needs fetch");
      remote_get("$subdir/Release");
      if (!check_lists ("$tempdir/$subdir/Release")) {
	say("$subdir/Release failed md5sum check, removing");
	push (@errlog,"$subdir/Release failed md5sum check, removing\n");
	unlink "$tempdir/$subdir/Release";
      }
    } else {
      $bytes_gotten += $file_lists_size{"$tempdir/$subdir/Release"};
    }
  }
  if ($file eq "Packages") {
    push @package_files, "$tempdir/$subdir/$file.gz";
  } else {
    if ($file eq "Sources") {
      push @source_files, "$tempdir/$subdir/$file.gz";
    } else {
      die "get_index called with unknown type $file\n";
    }
  }
  $files{"$subdir/$file.gz"}=1;
  $files{"$subdir/$file.bz2"}=1;
  $files{"$subdir/$file"}=1;
  $files{"$subdir/Release"}=1;
  $files{"$tempdir/$subdir/$file.gz"}=1;
  $files{"$tempdir/$subdir/$file.bz2"}=1;
  $files{"$tempdir/$subdir/$file"}=1;
  $files{"$tempdir/$subdir/Release"}=1;
}

sub fetch_and_apply_pdiffs {
  my ($subdir, $list) = @_;
  local (*INDEX, *LIST);
  my (%history_sha1, %history_size, %pdiff_sha1, %pdiff_size);
  my ($current_sha1, $current_size, $sha1, $size, $file, $digest, $ret);

  # Parse DiffIndex file
  open(INDEX, "$tempdir/$subdir/$list.diff/Index") or die "$tempdir/$subdir/$list.diff/Index: $!";
  $_ = <INDEX>;
  while (defined($_)) {
    if (m/^SHA1-Current:/m) {
      ($current_sha1, $current_size) = m/^SHA1-Current:\s+([A-Za-z0-9]+)\s+(\d+)/m;
      $_ = <INDEX>;
    }
    elsif (m/^SHA1-History:/m) {
      while (defined($_ = <INDEX>)) {
	last if (!m/^\s/m);
	($sha1, $size, $file) = m/^\s+([A-Za-z0-9]+)\s+(\d+)\s+(.*)/m;
	$history_sha1{$file} = $sha1;
	$history_size{$file} = $size;
      }
    }
    elsif (m/^SHA1-Patches:/m) {
      while (defined($_ = <INDEX>)) {
	last if (!m/^\s/m);
	($sha1, $size, $file) = m/^\s+([A-Za-z0-9]+)\s+(\d+)\s+(.*)/m;
	$pdiff_sha1{$file} = $sha1;
	$pdiff_size{$file} = $size;
      }
    }
  }
  close(INDEX);

  # Download pdiff files as necessary
  $ret = 1;
  foreach $file (sort keys %pdiff_sha1) {
    if (!check_pdiff("$tempdir/$subdir/$list.diff/$file", $pdiff_size{$file}, $pdiff_sha1{$file})) {
      say("$subdir/$list.diff/$file.gz needs fetch");
      remote_get("$subdir/$list.diff/$file.gz");
#FIXME: before download
      $bytes_to_get += -s "$tempdir/$subdir/$list.diff/$file.gz";
      if (!check_pdiff("$tempdir/$subdir/$list.diff/$file", $pdiff_size{$file}, $pdiff_sha1{$file})) {
	say("$subdir/$list.diff/$file.gz failed sha1sum check, removing");
	push (@errlog,"$subdir/$list.diff/$file.gz failed sha1sum check, removing\n");
	unlink "$tempdir/$subdir/$list.diff/$file.gz";
	$ret = 0;
      }
    } else {
#FIXME: before download
      $bytes_to_get += -s "$tempdir/$subdir/$list.diff/$file.gz";
      $bytes_gotten += -s "$tempdir/$subdir/$list.diff/$file.gz";
    }
    $files{"$subdir/$list.diff/$file.gz"}=1 if ($pdiff_mode eq "mirror");
    $files{"$tempdir/$subdir/$list.diff/$file.gz"}=1;
  }
  return unless ($ret);

  # Apply pdiff files
  open(LIST, "$tempdir/$subdir/$list") or return;
  $digest = Digest::SHA1->new;
  $digest->addfile(*LIST);
  $sha1 = $digest->hexdigest;
  $size = -s "$tempdir/$subdir/$list";
  foreach $file (sort keys %history_sha1) {
    next unless ($sha1 eq $history_sha1{$file} && $size eq $history_size{$file});
    if (system("gzip -d < \"$tempdir/$subdir/$list.diff/$file.gz\" | patch --ed \"$tempdir/$subdir/$list\"")) {
      say("Patch $file failed, will fetch $subdir/$list file");
      unlink "$tempdir/$subdir/$list";
      return;
    }
    open(LIST, "$tempdir/$subdir/$list") or return;
    $digest = Digest::SHA1->new;
    $digest->addfile(*LIST);
    $sha1 = $digest->hexdigest;
    $size = -s "$tempdir/$subdir/$list";
    say("$subdir/$list patched with $subdir/$list.diff/$file.gz");
  }
  if (!($sha1 eq $current_sha1 && $size eq $current_size)) {
    say("$subdir/$list failed sha1sum check, removing");
    push (@errlog,"$subdir/$list failed sha1sum check, removing\n");
    unlink "$tempdir/$subdir/$list";
  }
}

# Make a directory including all needed parents.
{
  my %seen;

  sub make_dir {
    my $dir=shift;

    my @parts=split('/', $dir);
    my $current='';
    foreach my $part (@parts) {
      $current.="$part/";
      if (! $seen{$current}) {
	if (! -d $current) {
	  mkdir ($current, 0755) or die "mkdir failed: $!";
	  debug("Created directory: $current");
	}
	$seen{$current}=1;
      }
    }
  }
}

# Mirror cleanup for unknown files that cannot be found in Packages files.
# This subroutine is called on pre- and post-cleanup and takes no arguments.
# It uses some global variables like $files, $mirrordir, @ignores
sub cleanup_unknown_files {
  say("Cleanup mirror.");
  chdir($mirrordir) or die "chdir $mirrordir: $!";
  my $ignore;
  $ignore = "(".join("|", @ignores).")" if @ignores;
  # Remove all files in the mirror that we don't know about
  foreach my $file (`find . -type f`) {
    chomp $file;
    $file=~s:^\./::;
    unless (exists $files{$file} or (defined($ignore) && $file=~/$ignore/o)) {
      say("deleting $file") if ($verbose);
      if (! $dry_run) {
       unlink $file or die "unlink $file: $!";
      }
    }
  }
}

sub say {
  print join(' ', @_)."\n" if ($verbose or $progress);
}

sub debug {
  print $0.': '.join(' ', @_)."\n" if $debug;
}

=head1 COPYRIGHT

This program is copyright 2001 by Joey Hess <joeyh@debian.org>, under
the terms of the GNU GPL, copyright 2003 by Goswin von Brederlow
<brederlo@informatik.uni-tuebingen.de>.

The author disclaims any responsibility for any mangling of your system,
unexpected bandwidth usage bills, meltdown of the Debian mirror network, 
etc, that this script may cause. See NO WARRANTY section of GPL.

=head1 AUTHOR

Current: Goswin von Brederlow <brederlo@informatik.uni-tuebingen.de>
Previous authors: Joey Hess <joeyh@debian.org>
                  Joerg Wendland <joergland@debian.org>

=head1 MOTTO

Waste bandwith -- put a partial mirror on your laptop today!

=cut
