#!/usr/bin/perl

# Copyright 2008 Stefan Fritsch <sf@debian.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

use strict;
use warnings;
use File::Temp qw{tempdir};
use POSIX qw{WIFEXITED WEXITSTATUS WTERMSIG};
use File::Copy;
use Getopt::Std;

$Getopt::Std::STANDARD_HELP_VERSION=1;
my %opts;
getopts( 'n:qvhdikc:', \%opts ) or usage(3);

usage(0) if $opts{h};
usage(1) if scalar @ARGV != 2;

my $max_patches = defined $opts{n} ? $opts{n}  : 50;
my $debug       = $opts{d};
my $verbose     = -t STDOUT;
if ($opts{q} ) {
	$verbose = 0;
}
if ($opts{v}) {
	$verbose = 1;
}

# handle ssl certificates
my $curl_opts = '';
if ($opts{k}) {
    if ($opts{c}) {
        usage(1);
    }
    else {
        $curl_opts .= ' -k';
    }
}
elsif ($opts{c}) {
    if (-d $opts{c}) {
        $curl_opts .= " --capath $opts{c}";
    }
    elsif (-f $opts{c}) {
        $curl_opts .= " --cacert $opts{c}";
    }
}

my $rred       = "diffindex-rred";
my %decompress = (
    "gz"  => "gzip -dc",
    "bz2" => "bzip2 -dc",
    "lzo" => "lzop -dc",
    ""    => "cat",
);

my %compress = (
    "gz"  => "gzip -c",
    "bz2" => "bzip2 -c",
    "lzo" => "lzop -c",
    ""    => "cat",
);

my $re_sum = qr/[0-9a-f]{40}/i;
my ( $url, $target ) = @ARGV;

$url    or die;
$target or die;

my $basename = $target;
my $baseurl  = $url;
my $algo     = "";
my $url_algo = "";

if ( $basename =~ s{\.(gz|bz2|lzo)}{} ) {
    $algo = $1;
}
if ( $baseurl =~ s{\.(gz|bz2|lzo)}{} ) {
    $url_algo = $1;
}

my $oldindexfile = "$basename.IndexDiff";

if ( !-e $target ) {
    download_full();
    exit 0;
}

my $tmpdir = tempdir(CLEANUP => 1);
my $newindexfile = "$tmpdir/Index";
my $newindex;
my $current_sum;

# try using the diffs to download the new file
# if something goes wrong we throw an exception and download the full file
eval {
    if (!$max_patches) {
        die "Patch download disabled.\n";
    }

    my $index_url = "$baseurl.diff/Index";
    info("Downloading Index $index_url:\n");
    if (url_not_found($index_url)) {
        die "No Index available.\n";
    }
    {
        # don't make curl print the progress bar for the index
        my $verbose;
        download( $index_url, $newindexfile, $oldindexfile );
    }
    if ( ! -e $newindexfile ) {
        info("Index is up-to-date.\n");
        exit 0;
    }

    eval {

        # try to get sha1sum from old Index file
        my $oldindex = parse_index($oldindexfile);
        $current_sum = $oldindex->{Current}->{sum};
    };
    if ($@) {

        # calculate sha1sum if not successful
        info("Calculating old sha1sum...\n");
        my $result = qx/$decompress{$algo} $target | sha1sum/;
        if ( $? == 0 and $result =~ /^($re_sum)\s/ ) {
            $current_sum = lc($1);
        }
        else {
            die "Could not get sha1sum of old file\n";
        }
    }

    $newindex = parse_index($newindexfile);

    if ( $current_sum eq $newindex->{Current}->{sum} ) {
        info("File is up-to-date.\n");
        move( $newindexfile, $oldindexfile ) if !-e $oldindexfile;
        exit 0;
    }

    my @patches;
    my $patch = $newindex->{History}->{$current_sum}
        or die "local file too old\n";

    while ( scalar @patches <= $max_patches ) {
        if ( !defined $newindex->{$patch} ) {
            die "something wrong with index: $patch missing\n";
        }
        push @patches, $patch;

        $patch = $newindex->{$patch}->{next};
        last if !defined $patch;
    }
    die "would require more than $max_patches patches\n" if defined $patch;

    info( "Downloading " . scalar @patches . " patches:\n" );
    my @args = map { ( "$baseurl.diff/$_.gz", "$tmpdir/$_.gz" ) } @patches;
    download(@args);

    info("Applying patches...\n");
    @args = map { "$tmpdir/" . quotemeta($_) . ".gz" } @patches;
    system_or_die(
        "$decompress{$algo} $target | $rred @args | $compress{$algo} > ${target}_new",
        "Failed to apply patches with $rred",
        [ "${target}_new", $oldindexfile ]
    );

    move( "${target}_new", $target );
    move( $newindexfile,   $oldindexfile );
};
if ($@) {

    # something went wrong, download full file
    warn "$@" if $verbose;
    if ( $@ =~ /signal 2/ ) {

        # exit if CTRL-C was pressed
        exit 2;
    }
    download_full();
}

exit 0;
########################## END of main #######################################

# args: url, filename [, url, filename [, ...]] [, oldfile]
# (we want to be able to download multpile files with http keepalive)
# download only if remote is newer than oldfile
sub download {
    my $oldfile;
    $oldfile = pop(@_) if scalar @_ % 2;
    my %urls    = @_;
    my $command = "curl -L -f -g $curl_opts";
    $command .= " -sS" if !$verbose;
    $command .= " -z $oldfile" if ($oldfile && -e $oldfile);

    foreach my $url ( keys %urls ) {
        $command .= " " . quotemeta($url) . " -o " . quotemeta( $urls{$url} );
    }

    system_or_die( $command,
        "Download of " . join( " ", keys %urls ) . " failed",
        [ values %urls ]
    );
}

sub download_full {

    if ( url_not_found($url) ) {
        info("Ignoring source without Contents File:\n  $url\n");
        return;
    }

    info("Downloading complete file $url\n");
    download( $url, "${target}_new", $target );
    if (! -e "${target}_new") {
        info("File is up-to-date.\n");
        exit(0);
    }
    if ( $url_algo ne $algo ) {
        system_or_die(
            "$decompress{$url_algo} ${target}_new | $compress{$algo} > ${target}_new2",
            "Recompression from '$url_algo' to '$algo' failed",
            [ "${target}_new", "${target}_new2" ]
        );
        move( "${target}_new2", $target );
        unlink "${target}_new";
    }
    else {
        move( "${target}_new", $target );
    }
    unlink $oldindexfile if -e $oldindexfile;
}

sub url_not_found {
    my $url = shift;

    my $cmd     = "curl -L -s -I -g $curl_opts" . quotemeta($url);
    my $headers = qx{$cmd};
    debug("'$cmd' ($?):\n$headers");

    if ( WIFEXITED($?) && WEXITSTATUS($?) == 19 ) {

        # FTP file not found
        return 1;
    }
    if ( $headers =~ m{^HTTP/1.. 404}m ) {
        return 1;
    }
    return 0;
}

sub parse_index {
    my $file = shift;

    my $data = {};

    open( my $fh, "<", $file ) or die "could not open $file\n";

    my ( $section, $previous, $line );

    while ( defined( $line = <$fh> ) ) {
        if ( $line =~ m{^SHA1-Current:\s*($re_sum)\s+(\d+)\s*$}m ) {
            if ( $data->{Current} ) {
                die "Invalid Index $file:$.: Multiple SHA1-Current\n";
            }
            $data->{Current}->{sum}  = lc($1);
            $data->{Current}->{size} = $2;
        }
        elsif ( $line =~ m{^SHA1-(History|Patches):\s*$} ) {
            $section = $1;
            if ( $data->{$section} ) {
                die "Invalid Index $file:$.: Multiple SHA1-$1\n";
            }
            $previous = undef;
        }
        elsif ( $line =~ m{^\s+($re_sum)\s+(\d+)\s+(\S+)\s*$} ) {
            my ( $sum, $size, $name ) = ( lc($1), $2, $3 );
            if ( !defined $section ) {
                die "Invalid Index $file:$.: File info without section\n";
            }
            if ( $name =~ /[^-\w.,]/ ) {
                die "Patch name $name contains invalid characters\n";
            }
            $data->{$section}->{$sum} = $name;
            if ( $section eq 'History' ) {
                $data->{$name}->{sum}  = $sum;
                $data->{$name}->{size} = $size;
                if ( defined $previous ) {
                    $data->{$previous}->{next} = $name;
                }
                $previous = $name;
            }
            else {
                $data->{$name}->{patch_sum}  = $sum;
                $data->{$name}->{patch_size} = $size;
            }
        }
        elsif ( $line =~ m{^#} or $line =~ m{^\s*$} ) {
            next;
        }
        else {
            die "Invalid Index $file:$.: $line\n";
        }
    }
    close($fh);

    foreach $section (qw/History Patches Current/) {
        defined $data->{$section}
            or die "Invalid Index: Missing SHA1-$section in $file\n";
    }

    return $data;
}

# call system($command)
# On failure: print $msg, call code or unlink files in $cleanup, then die
sub system_or_die {
    my ( $command, $msg, $cleanup ) = @_;

    $command = '/bin/bash -c "set -o pipefail; "' . quotemeta($command);
    debug("Executing '$command'\n");
    system($command);
    if ( WIFEXITED($?) ) {
        if ( WEXITSTATUS($?) != 0 ) {
            warn "$msg\n" if $msg;
            cleanup($cleanup);
            die "Command exited with code " . WEXITSTATUS($?) . "\n";
        }
        else {
            return;
        }
    }
    else {
        my $sig = WTERMSIG($?);
        warn "$msg\n" if $msg;
        cleanup($cleanup);
        die "Command killed by signal SIG" . signal_number2name($sig) . "\n";
    }
}

sub cleanup {
    my $clean = shift;

    $clean || return;

    if (ref($clean) eq 'ARRAY') {
        unlink(@{$clean});
    }
    elsif (ref($clean) eq 'CODE') {
        eval { $clean->(); };
    }
    elsif (ref($clean) eq '') {
        unlink($clean);
    }
    else {
        die 'invalid argument in cleanup';
    }
}

sub info {
    print @_ if $verbose;
}

sub debug {
    print @_ if $debug;
}

sub usage {
    my $rv = shift;
    print << "EOF";
Usage:
  $0 [-n <max number of diffs>] [-i] [-q] [-d] [-k|-c cacerts] <URL> <filename>

  -n <num>  don't download more than num patches, download whole file instead
            (use large num with slow network / fast CPU and small num with
            fast network / slow CPU)
  -i        don't exit with errror if URL does not exist
  -q        be quiet
  -d        print additional debug messages
  -k        with https: do not verify peer certificate
  -c <file> use file as CA certificate bundle (see --cacert in curl man page)
  -c <dir>  use dir as CA certificate directory (see --capath in curl man page)
EOF
    exit $rv;
}

sub HELP_MESSAGE {
    usage(0);
}

{
    my @signame;

    sub signal_number2name {
        my ($number) = @_;
        if (not @signame) {
            require Config;
            # Doubt this happens for apt-file, but the code might
            # Cargo-cult-copied or copy-wasted into another project.
            # Speaking of which, thanks to
            #  http://www.ccsf.edu/Pub/Perl/perlipc/Signals.html
            defined($Config::Config{sig_name})
              or die "Signals not available\n";
            my $i = 0;
            for my $name (split(' ', $Config::Config{sig_name})) {
                $signame[$i] = $name;
                $i++;
            }
        }
        return $signame[$number];
    }
}

# our style is roughly "perltidy -pbp"
# vim:sts=4:sw=4:expandtab
