#!/usr/bin/perl
#
# Lintian reporting harness -- Create and maintain Lintian reports automatically
#
# Copyright (C) 1998 Christian Schwarz and Richard Braakman
#
# This program is free software.  It is distributed under the terms of
# the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

use strict;
use warnings;

use constant BACKLOG_PROCESSING_GROUP_LIMIT => 1024;
use constant BACKLOG_PROCESSING_TIME_LIMIT => 4 * 3600; # 4hours

use Date::Format qw(time2str);
use Errno qw(ENOENT);
use File::Temp qw(tempfile);
use Getopt::Long;
use POSIX qw(strftime);
use YAML::Any();

sub usage {
    print <<END;
Lintian reporting harness
Create and maintain Lintian reports automatically

Usage: harness [ -i | -f | -r | -c ]

Options:
  -c         clean mode, erase everything and start from scratch (implies -f)
  -f         full mode, blithely overwrite lintian.log
  -i         incremental mode, use old lintian.log data, process changes only
  -r         generate HTML reports only
  --dry-run  pretend to do the actions without actually doing them.  The
             "normal" harness output will go to stdout rather than the
             harness.log.
  --to-stdout
             [For debugging] Have output go to stdout as well as the usual
             log files.  Note, this option has no (extra) effect with --dry-run.
  --schedule-chunk-size N
             Schedule at most N groups in a given run of Lintian.  If more than N
             groups need to be processed, harness will invoke Lintian more than
             once.  If N is 0, schedule all groups in one go.  (Default: 512)

Incremental mode is the default if you have a lintian.log;
otherwise, it's full.

Report bugs to <lintian-maint\@debian.org>.
END
    #'# for cperl-mode
    exit;
}

my $LOG_FD;
my %opt = ('schedule-chunk-size' => 512,);

my %opthash = (
    'i' => \$opt{'incremental-mode'},
    'c' => \$opt{'clean-mode'},
    'f' => \$opt{'full-mode'},
    'r' => \$opt{'reports-only'},
    'dry-run' => \$opt{'dry-run'},
    'schedule-chunk-size=i' => \$opt{'schedule-chunk-size'},
    'to-stdout' => \$opt{'to-stdout'},
    'help|h' => \&usage,
);

# init commandline parser
Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev');

# process commandline options
GetOptions(%opthash)
  or die("error parsing options\n");

# clean implies full - do this as early as possible, so we can just
# check $opt{'full-mode'} rather than a full
#   ($opt{'clean-mode'} || $opt{'full-mode'})
$opt{'full-mode'} = 1 if $opt{'clean-mode'};

die "Cannot use both incremental and full/clean.\n"
  if $opt{'incremental-mode'} && $opt{'full-mode'};
die "Cannot use other modes with reports only.\n"
  if $opt{'reports-only'} && ($opt{'full-mode'} || $opt{'incremental-mode'});

# read configuration
require './config'; ## no critic (Modules::RequireBarewordIncludes)
use vars qw($LINTIAN_ROOT $LINTIAN_LAB $LINTIAN_ARCHIVEDIR $LINTIAN_DIST
  $LINTIAN_ARCH @EXTRA_LINTIAN_OPTIONS
  $LOG_DIR
  $HTML_DIR $HTML_TMP_DIR
  $LINTIAN_AREA $HARNESS_STATE_DIR);

# export Lintian's configuration
$ENV{'LINTIAN_LAB'} = $LINTIAN_LAB;
# delete LINTIAN_{CFG,ROOT} in case they are set.
delete($ENV{'LINTIAN_CFG'});
delete($ENV{'LINTIAN_ROOT'});

my $html_reports_cmd = "$LINTIAN_ROOT/reporting/html_reports";
my ($log_file, $list_file, $lintian_log, $lintian_perf_log,$html_reports_log)
  = map {"$LOG_DIR/$_" }
  qw(harness.log changed-packages.list lintian.log lintian-perf.log html_reports.log);
my $old_lintian_log = $lintian_log . '.old';
my @lintian_cmd = (
    "$LINTIAN_ROOT/frontend/lintian",
    @EXTRA_LINTIAN_OPTIONS,
    qw(--no-cfg -I -E --pedantic -v --show-overrides),
    qw(--exp-output=format=fullewi --packages-from-file -),
    qw(--perf-debug --perf-output),
    "+$lintian_perf_log",
);
my $STATE_DIR = $HARNESS_STATE_DIR;
my $START_TIME = time();
my $state;

# import perl libraries
unshift @INC, "$LINTIAN_ROOT/lib";
require Lintian::Command;
import Lintian::Command qw(spawn safe_qx);
require Lintian::Lab;
require Lintian::Lab::Manifest;
require Lintian::Processable;
require Lintian::Relation::Version;
import Lintian::Relation::Version qw(versions_comparator);
require Lintian::Util;
import Lintian::Util qw(open_gz slurp_entire_file strip visit_dpkg_paragraph
  load_state_cache find_backlog);

my $LINTIAN_VERSION
  = safe_qx("$LINTIAN_ROOT/frontend/lintian",'--print-version');
chomp($LINTIAN_VERSION);

# turn file buffering off
STDOUT->autoflush;

unless ($opt{'dry-run'}) {
    # rotate log files
    my %opt = ('out' => '/dev/null',);
    my @rotate_logs = ($log_file, $html_reports_log, $lintian_perf_log);
    spawn(\%opt, ['savelog', @rotate_logs])
      or die "Cannot rotate log files.\n";

    # create new log file
    open($LOG_FD, '>', $log_file)
      or die "cannot open log file $log_file for writing: $!";
} else {
    $opt{'to-stdout'} = 0;
    open($LOG_FD, '>&', \*STDOUT)
      or die "Cannot open log file <stdout> for writing: $!";
    Log('Running in dry-run mode');
}
# From here on we can use Log() and Die().

if (not defined($STATE_DIR)) {
    Die(q{Missing required config option: $HARNESS_STATE_DIR});
} elsif (not -d $STATE_DIR) {
    system('mkdir', '-p', $STATE_DIR) == 0
      or Die("mkdir -p $STATE_DIR failed");
    Log("Created cache dir: $STATE_DIR");
}

my $LAB = Lintian::Lab->new($LINTIAN_LAB);

unless ($opt{'dry-run'}) {
    # purge the old packages
    $LAB->remove if $opt{'clean-mode'};

    $LAB->create({ 'mode' => 02775}) unless $LAB->exists;

} else {
    if (!$LAB->exists || $opt{'clean-mode'}) {
        # We either do not have a lab or we were asked to clean
        # the existing one.  We solve this by creating a temp
        # lab (which will be empty).  This means that A) the lab
        # will appear to be empty (as expected by clean-mode) and
        # B) that we do not have to do a dry-run check on every
        # "read-only" lab operation (we still have to guard write
        # operations).
        $LAB = Lintian::Lab->new;
        $LAB->create;
    }
}

if (!$opt{'reports-only'} && !$opt{'full-mode'} && !$opt{'incremental-mode'}) {
    # Nothing explicitly chosen, default to -i if the log is present,
    # otherwise -f.
    if (-f $lintian_log) {
        $opt{'incremental-mode'} = 1;
    } else {
        $opt{'full-mode'} = 1;
    }
}

unless ($opt{'reports-only'}) {
    $LAB->open;
    Log('Processing mirror index files');
    my @manifests = local_mirror_manifests(
        $LINTIAN_ARCHIVEDIR, [_trim_split($LINTIAN_DIST)],
        [_trim_split($LINTIAN_AREA)], [_trim_split($LINTIAN_ARCH)]);
    my @diffs = $LAB->generate_diffs(@manifests);
    my (%skip, @worklist, %dirty_groups);
    # Remove old/stale packages from the lab
    foreach my $diff (@diffs) {
        my $type = $diff->type;
        my $old_manifest = $diff->olist;
        my $man = $diff->nlist;
        Log("Removing old or changed $type packages from the lab");
        foreach my $removed (@{ $diff->removed }, @{ $diff->changed }) {
            my ($pkg_name, $pkg_version, $pkg_arch) = @$removed;
            my ($entry, $pkg_src, $pkg_src_version);
            my $sk = "$type:$pkg_name/$pkg_version";
            $sk .= "/$pkg_arch" if $pkg_arch;
            $skip{$sk} = 1; # For log-cleaning (incremental runs)
            if ($opt{'dry-run'}) {
                $entry = Lintian::Processable->new_from_metadata($type,
                    $old_manifest->get(@$removed));
            } else {
                $entry = $LAB->get_package($pkg_name, $type, $pkg_version,
                    $pkg_arch);
            }

            $pkg_src = $entry->pkg_src;
            $pkg_src_version = $entry->pkg_src_version;
            $dirty_groups{$pkg_src}->{$pkg_src_version} = 1;

            # Under dry-run, $entry is not a L::Lab::Entry, so kill it
            # so we don't try to use it as one later.
            $entry = undef if $opt{'dry-run'};
            if ($opt{'dry-run'} || $entry) {
                my $arch = '';
                $arch = " [$pkg_arch]" if $pkg_arch;
                if ($opt{'dry-run'} || $entry->remove) {
                    Log("Removed $type $pkg_name ($pkg_version)$arch");
                } else {
                    Log("Removing $type $pkg_name ($pkg_version)$arch failed."
                    );
                }
            }
        }
        Log("Adding new and changed $type packages to the lab");
        foreach my $added (@{ $diff->added }, @{ $diff->changed }) {
            my ($pkg_name, $pkg_version, $pkg_arch) = @$added;
            my $me = $man->get(@$added);
            my $entry;
            my $proc = Lintian::Processable->new_from_metadata($type, $me);
            my $pkg_src = $proc->pkg_src;
            my $pkg_src_version = $proc->pkg_src_version;

            $dirty_groups{$pkg_src}->{$pkg_src_version} = 1;

            unless ($opt{'dry-run'}) {
                $entry = $LAB->get_package($proc);
            }
            if ($opt{'dry-run'} || $entry) {
                my $ok = 0;
                my $arch = '';
                $arch = " [$pkg_arch]" if $pkg_arch;
                if ($opt{'dry-run'}) {
                    $ok = 1;
                } else {
                    eval {
                        $entry->create;
                        $entry->update_status_file
                          or die "creating status file: $!";
                        $ok = 1;
                    };
                }
                if ($ok) {
                    my $query = "$type:$pkg_name/$pkg_version";
                    $query .= "/$pkg_arch" if $pkg_arch;
                    Log("Added $type $pkg_name ($pkg_version)$arch");
                } else {
                    Log("Adding $type $pkg_name ($pkg_version)$arch failed: $@"
                    );
                }
            }
        }
    }

    # At this point %dirty_groups has queries for all dirty groups for
    # the incremental run.  Basically each group falls into 2 cases:
    #
    #  1. All entries in the group have been added/removed
    #     - Package added to or removed from suite
    #     - New version replacing an older version.
    #  2. Only part of the entries in a group has been added/removed:
    #     - binNMUs etc.
    #
    # If we just "blindly" run the packages from case 1, then binaries
    # from group 2 will not be procesed with their (full) group
    # (particularly without the source package).

    Log('Computing grouping...');
    eval {$state = load_state_cache($STATE_DIR);};
    if ($@) {
        Die($@);
    }

    if ($opt{'full-mode'}) {
        # for full run, just replace %dirty_groups with "<all groups in lab>".
        $LAB->visit_packages(
            sub {
                my ($entry) = @_;
                $dirty_groups{$entry->pkg_src}->{$entry->pkg_src_version} = 1;
            });
    } elsif (not %{$state}) {
        # New/empty state file
        Log('Populating state file... ');
        $LAB->visit_packages(
            sub {
                my ($entry) = @_;
                my $group_id
                  = join('/', $entry->pkg_src, $entry->pkg_src_version);
                $state->{$group_id} = {};
            });
    }

    # Sort so that the worklist is also sorted (makes it easier to review).
    foreach my $gname (sort keys %dirty_groups) {
        my $dver = $dirty_groups{$gname};
        for my $gversion (sort versions_comparator keys %{$dver}) {
            my $group_id = "${gname}/${gversion}";
            my $query = "GROUP:${group_id}";
            my @res = $LAB->lab_query($query);

            if (@res) {
                # Clear the "processed by" flag, so it will be reprocessed if
                # lintian is interrupted for some reason.
                delete($state->{$group_id}{'last-processed-by'});

                # Group is still in the Lab - reprocess it together
                push(@worklist, $group_id);
                foreach my $entry (@res) {
                    # Remove old log entry for all entries in this group
                    # NB: This part is unable to replace the update to %skip
                    # in the "foreach removed-entry in diff" above.  In case
                    # the entire group is removed, we will enter this loop,
                    # but we will see each of the entries removed in the
                    # loop above.
                    $skip{$entry->identifier} = 1;
                }
            } else {
                # No longer present at all, kill the entire entry
                delete($state->{$group_id});
            }
        }
    }
    save_state_cache($STATE_DIR, $state)
      if not $opt{'dry-run'};

    # Flushes the changed manifest to the file system - croaks on
    # error
    # - no need to check dry-run here as nothing changed and it frees
    #   memory to do this.
    # - in the (hopefully unlikely) case that dry-run is *buggy* and
    #   the lab actually was modified, then this will at least keep
    #   the lab metadata consistent with the actual contents.
    $LAB->close;
    # Throw away the lab to avoid it hogging memory.
    $LAB = undef;

    if ($opt{'incremental-mode'}) {
        # Extra work for the incremental run
        # - do this regardless of @worklist, since we could in theory
        #   have an "only removals"-run.  In that case, their entries
        #   should still be removed from the log.

        die "Old Lintian log file $lintian_log not found!\n"
          unless -f $lintian_log;

        # update lintian.log
        Log('Updating lintian.log...');
        rewrite_lintian_log($lintian_log, \%skip);
    }

    Log('');

    # Clear some variables we no longer need, so Perl can reclaim
    # the memory.
    %dirty_groups = %skip = @diffs = @manifests = ();

    if (@worklist) {
        my $scs = $opt{'schedule-chunk-size'};
        if ($opt{'full-mode'}) {
            # truncate in full-mode
            open(my $fd, '>', $lintian_log)
              or Die("open/truncate $lintian_log failed: $!");
            close($fd)
              or Die("close $lintian_log failed: $!");
        }
        $state = process_worklist(\@worklist, $lintian_log, $STATE_DIR, $scs);
    } else {
        Log('Skipping Lintian run - nothing to do...');
    }
}

if (defined($state) and time() <= $START_TIME + BACKLOG_PROCESSING_TIME_LIMIT){
    my $scs = $opt{'schedule-chunk-size'};
    my @worklist = find_backlog($LINTIAN_VERSION, $state);

    if (@worklist > BACKLOG_PROCESSING_GROUP_LIMIT) {
        @worklist = splice(@worklist, 0, BACKLOG_PROCESSING_GROUP_LIMIT);
    }

    Log('Processing backlog...') if @worklist;

    while (@worklist and $state) {
        my (@slice, $filter_set);
        if (time() >= $START_TIME + BACKLOG_PROCESSING_TIME_LIMIT) {
            Log('No more time for processing backlogs');
            last;
        }

        if ($scs > -1 and $scs < @worklist) {
            @slice = splice(@worklist, 0, $scs);
        } else {
            @slice = @worklist;
            undef(@worklist);
        }

        $filter_set = generate_log_filter($LINTIAN_LAB, \@slice);
        rewrite_lintian_log($lintian_log, $filter_set);

        $state = process_worklist(\@slice, $lintian_log, $STATE_DIR, -1);
    }
}

# create html reports
Log('Creating HTML reports...');
Log("Executing $html_reports_cmd $lintian_log");
my %html_reports_opts = (
    'out' => $html_reports_log,
    'err' => '&2',
);
spawn(\%html_reports_opts, [$html_reports_cmd, $lintian_log])
  or
  Log("warning: executing $html_reports_cmd returned " . (($? >> 8) & 0xff));
Log('');

# rotate the statistics file updated by $html_reports_cmd
if (!$opt{'dry-run'} && -f "$HARNESS_STATE_DIR/statistics") {
    my $date = time2str('%Y%m%d', time());
    my $dest = "$LOG_DIR/stats/statistics-${date}";
    system('cp', "$HARNESS_STATE_DIR/statistics", $dest) == 0
      or Log('warning: could not rotate the statistics file');
}

# install new html directory
Log('Installing HTML reports...');
unless ($opt{'dry-run'}) {
    system('rm', '-rf', $HTML_DIR) == 0
      or Die("error removing $HTML_DIR");
    # a tiny bit of race right here
    rename($HTML_TMP_DIR,$HTML_DIR)
      or Die("error renaming $HTML_TMP_DIR into $HTML_DIR");
}
Log('');

# ready!!! :-)
Log('All done.');
exit 0;

# -------------------------------

sub Log {
    my ($msg) = @_;
    my $ts = strftime('[%FT%T]: ', localtime());
    print {$LOG_FD} $ts, $msg,"\n";
    print $ts, $msg, "\n" if $opt{'to-stdout'};
    return;
}

sub Die {
    Log("fatal error: $_[0]");
    exit 1;
}

sub _trim_split {
    my ($val) = @_;
    return () unless $val;
    return split m/\s*+,\s*+/o, strip($val);
}

# local_mirror_manifests ($mirdir, $dists, $areas, $archs)
#
# Returns a list of manifests that represents what is on the local mirror
# at $mirdir.  3 manifests will be returned, one for "source", one for "binary"
# and one for "udeb" packages.  They are populated based on the "Sources" and
# "Packages" files.
#
# $mirdir - the path to the local mirror
# $dists  - listref of dists to consider (i.e. ['unstable'])
# $areas  - listref of areas to consider (i.e. ['main', 'contrib', 'non-free'])
# $archs  - listref of archs to consider (i.e. ['i386', 'amd64'])
#
sub local_mirror_manifests {
    my ($mirdir, $dists, $areas, $archs) = @_;
    my $active_srcs = {};
    my $srcman = Lintian::Lab::Manifest->new('source');
    my $binman = Lintian::Lab::Manifest->new('binary');
    my $udebman = Lintian::Lab::Manifest->new('udeb');
    foreach my $dist (@$dists) {
        foreach my $area (@$areas) {
            my $srcs = "$mirdir/dists/$dist/$area/source/Sources";
            my ($srcfd, $srcsub);
            # Binaries have a "per arch" file.
            # - we check those first and then include the source packages that
            #   are referred to by these binaries.
            foreach my $arch (@$archs) {
                my $pkgs = "$mirdir/dists/$dist/$area/binary-$arch/Packages";
                my $upkgs = "$mirdir/dists/$dist/$area/debian-installer/"
                  ."binary-$arch/Packages";
                my $pkgfd = _open_data_file($pkgs);
                my $binsub = sub {
                    _parse_pkgs_pg($active_srcs, $binman, $mirdir, $area, @_);
                };
                my $upkgfd;
                my $udebsub = sub {
                    _parse_pkgs_pg($active_srcs, $udebman, $mirdir, $area, @_);
                };
                visit_dpkg_paragraph($binsub, $pkgfd);
                close($pkgfd);
                $upkgfd = _open_data_file($upkgs);
                visit_dpkg_paragraph($udebsub, $upkgfd);
                close($upkgfd);
            }
            $srcfd = _open_data_file($srcs);
            $srcsub
              = sub { _parse_srcs_pg($active_srcs, $srcman, $mirdir, $area, @_) };
            visit_dpkg_paragraph($srcsub, $srcfd);
            close $srcfd;
        }
    }
    return ($srcman, $binman, $udebman);
}

# _open_data_file ($file)
#
# Opens $file if it exists, otherwise it tries common extensions (i.e. .gz) and opens
# that instead.  It may pipe the file through a external decompressor, so the returned
# file descriptor cannot be assumed to be a file.
#
# If $file does not exists and no common extensions are found, this dies.  It may also
# die if it finds a file, but is unable to open it.
sub _open_data_file {
    my ($file) = @_;
    if (-e $file) {
        open my $fd, '<', $file or Die "opening $file: $!";
        return $fd;
    }
    if (-e "$file.gz") {
        return open_gz("$file.gz");
    }
    Die "Cannot find $file";
}

# Helper for local_mirror_manifests - it parses a paragraph from Packages file
sub _parse_pkgs_pg {
    my ($active_srcs, $manifest, $mirdir, $area, $data) = @_;
    my $ts = 0;
    my $s;
    unless ($data->{'source'}) {
        $data->{'source'} = $data->{'package'};
    } elsif ($data->{'source'} =~ /^([-+\.\w]+)\s+\((.+)\)$/) {
        $data->{'source'} = $1;
        $data->{'source-version'} = $2;
    } else {
        $data->{'source-version'} = $data->{'version'};
    }
    unless (defined $data->{'source-version'}) {
        $data->{'source-version'} = $data->{'version'};
    }
    $s = $data->{'source'} . '/' . $data->{'source-version'};
    $active_srcs->{$s}++;
    $data->{'file'} = $mirdir . '/' . $data->{'filename'};
    $data->{'area'} = $area;
    # $manifest strips redundant fields for us.  But for clarity and to
    # avoid "hard to debug" cases $manifest renames the fields, we explicitly
    # remove the "filename" field.
    delete $data->{'filename'};

    if (my @stat = stat $data->{'file'}) {
        $ts = $stat[9];
    }
    $data->{'timestamp'} = $ts;

    $manifest->set($data);
    return;
}

sub process_worklist {
    my ($worklist_ref, $output_file, $state_dir, $schedule_chunk_size) = @_;
    my $round = 0;
    my $rounds = 1;
    my @worklist = @{$worklist_ref};
    my $all_ok = 1;
    my $state;

    if ($schedule_chunk_size > 0) {
        # compute the number of rounds needed.
        my $size_up = scalar @worklist + ($schedule_chunk_size - 1);
        $rounds = int($size_up / $schedule_chunk_size);
    }

    Log(
        sprintf(
            'Groups to process %d will take %d round(s) [round limit: %s]',
            scalar @worklist,
            $rounds, $schedule_chunk_size > 0 ? $schedule_chunk_size : 'none'
        ));

    Log('Command line used: ' . join(q{ }, @lintian_cmd));
    while (@worklist) {
        my $len = scalar @worklist;
        my @work_splice;
        $round++;
        # correct bounds to fit chunk size
        if ($schedule_chunk_size > 0 and $len > $schedule_chunk_size) {
            $len = $schedule_chunk_size;
        }

        @work_splice = splice(@worklist, 0, $len);

        Log("Running Lintian (round $round/$rounds) ...");
        if ($len == 1) {
            Log(' - Single group: ' . $work_splice[0]);
        } else {
            Log(    ' - Range: GROUP:'
                  . $work_splice[0]
                  . q{ ... GROUP:}
                  . $work_splice[-1]);
        }

        next if ($opt{'dry-run'});

        my $pid = open(my $lintpipe, '|-') // Die("fork failed: $!");
        if (not $pid) {
            # child => reopen STD{OUT,ERR} and exec lintian
            open(STDOUT, '>>', $lintian_log)
              or die("re-open STDOUT failed: $!");
            open(STDERR, '>&', *STDOUT)
              or die("re-open STDERR failed: $!");
            exec(@lintian_cmd)
              or die("exec @lintian_cmd failed: $!");
        }
        foreach my $group_id (@work_splice) {
            # Technically, we emit these items in reverse order to Lintian,
            # but Lintian sorts it anyway
            print {$lintpipe} "!query: GROUP:$group_id\n";
        }
        $! = 0;
        close $lintpipe;
        Die("Closing pipe failed: $!") if $!;
        if ($?) {
            # exit 1 (policy violations) happens all the time (sadly)
            # exit 2 (broken packages) also happens all the time...
            my $res = ($? >> 8) & 0xff;
            my $sig = $? & 0xff;
            if ($res != 1 and $res != 0) {
                Log("warning: executing lintian returned $res");
            } elsif ($sig) {
                Log("Lintian terminated by signal: $sig");
                # If someone is sending us signals (e.g. SIGINT/Ctrl-C)
                # don't start the next round.
                #  - 35k / 512 => 700 rounds (or 700 SIGINTs to stop...)
                Log(' - skipping the rest of the worklist');
                return;
            }
        } else {
            Log('Lintian finished successfully');
        }
        Log('Updating harness state cache');
        eval {$state = load_state_cache($STATE_DIR);};
        if ($@) {
            Die($@);
        }
        for my $group_id (@work_splice) {
            $state->{$group_id}{'last-processed-by'} = $LINTIAN_VERSION;
        }
        save_state_cache($STATE_DIR, $state);
    }
    return $state;
}

# Helper for local_mirror_manifests - it parses a paragraph from Sources file
sub _parse_srcs_pg {
    my ($active_srcs, $manifest, $mirdir, $area, $data) = @_;
    my $ts = 0;
    my $dir = $data->{'directory'}//'';
    my $s = $data->{'package'} . '/' . $data->{'version'};
    # only include the source if it has any binaries to be checked.
    # - Otherwise we may end up checking a source with no binaries
    #   (happens if the architecture is "behind" in building)
    return unless $active_srcs->{$s};
    $dir .= '/' if $dir;
    foreach my $f (split m/\n/, $data->{'files'}) {
        strip($f);
        next unless $f && $f =~ m/\.dsc$/;
        my (undef, undef, $file) = split m/\s++/, $f;
        # $dir should end with a slash if it is non-empty.
        $data->{'file'} = $mirdir . "/$dir" . $file;
        last;
    }
    $data->{'area'} = $area;
    # Rename a field :)
    $data->{'source'} = $data->{'package'};

    if (my @stat = stat $data->{'file'}) {
        $ts = $stat[9];
    }
    $data->{'timestamp'} = $ts;

    # $manifest strips redundant fields for us.
    $manifest->set($data);
    return;
}

sub save_state_cache {
    my ($state_dir, $state) = @_;
    my $state_file = "$state_dir/state-cache";
    my ($tmp_fd, $tmp_path)= tempfile('state-cache-XXXXXX', DIR => $state_dir);

    # atomic replacement of the state file; not a substitute for
    # proper locking, but it will at least ensure that the file
    # is in a consistent state.
    eval {
        print {$tmp_fd} YAML::Any::Dump($state);

        close($tmp_fd) or die("close $tmp_path: $!");

        # There is no secret in this.  Set it to 0644, so it does not
        # require sudo access on lintian.d.o to read the file.
        chmod(0644, $tmp_path);

        rename($tmp_path, $state_file)
          or die("rename $tmp_path -> $state_file: $!");
    };
    if (my $err = $@) {
        if (-e $tmp_path) {
            unlink($tmp_path)
              or Log("unlink $tmp_path failed: $!");
        }
        Die($@);
    }
    return 1;
}

sub rewrite_lintian_log {
    my ($lintian_log, $filter_set) = @_;
    return if ($opt{'dry-run'});

    rename($lintian_log, $old_lintian_log)
      or Die("cannot rename lintian.log to $old_lintian_log: $!");
    open(my $nfd, '>', $lintian_log)
      or Die("cannot open lintian.log $lintian_log for writing: $!");
    open(my $ofd, '<', $old_lintian_log)
      or Die("cannot open old lintian.log $old_lintian_log for reading: $!");
    my $copy_mode = 1;
    while (<$ofd>) {
        if (
            m/^N: [ ] Processing [ ] (binary|udeb|source) [ ]
                   package [ ] (\S+) [ ] \(version [ ] (\S+), [ ]
                   arch [ ] (\S+)\)[ ]\.\.\./oxsm
          ) {
            my ($type, $pkg, $ver, $arch) = ($1,$2, $3, $4);
            my $k = "$type:$pkg/$ver";
            $k .= "/$arch" if $type ne 'source';
            $copy_mode = 1;
            $copy_mode = 0 if exists $filter_set->{$k};
        }
        if ($copy_mode) {
            print {$nfd} $_;
        }
    }
    print {$nfd} "N: ---end-of-old-lintian-log-file---\n";
    close($nfd) or Die("Close $lintian_log: $!");
    close($ofd); # Ignore (read-only handle)
    return 1;
}

sub generate_log_filter {
    my ($lintian_lab_path, $worklist_ref) = @_;
    my $lab = Lintian::Lab->new($lintian_lab_path);
    my %filter;
    $lab->open;
    for my $group_id (@{$worklist_ref}) {
        my @res = $lab->lab_query("GROUP:${group_id}");
        if (@res) {
            foreach my $entry (@res) {
                # Remove old log entry for all entries in this group
                # NB: This part is unable to replace the update to %filter
                # in the "foreach removed-entry in diff" above.  In case
                # the entire group is removed, we will enter this loop,
                # but we will see each of the entries removed in the
                # loop above.
                $filter{$entry->identifier} = 1;
            }
        }
    }
    $lab->close;
    return \%filter;
}

# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et
