#!/usr/bin/env python
# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
#
# duplicity -- Encrypted bandwidth efficient backup
# Version 0.5.06 released January 09, 2009
#
# Copyright (C) 2002 Ben Escoto <bescoto@stanford.edu>
#
# This file is part of duplicity.
#
# Duplicity is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# Duplicity is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with duplicity; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# See http://www.nongnu.org/duplicity for more information.
# Please send mail to me or the mailing list if you find bugs or have
# any suggestions.

from __future__ import generators
import getpass, gzip, os, sys, time, types
import traceback

import duplicity.backends
import duplicity.errors

from duplicity import collections, commandline, diffdir, dup_temp, \
     dup_time, file_naming, globals, gpg, log, manifest, patchdir, \
     path, robust, tempdir, asyncscheduler, util

import gettext
gettext.install('duplicity')

# If exit_val is not None, exit with given value at end.
exit_val = None


def get_passphrase(n):
    """Get passphrase from environment or, failing that, from user

    If n=3, a password is requested and verified. If n=2, the current
    password is verified. If n=1, a password is requested without
    verification for the time being."""

    try:
        return os.environ['PASSPHRASE']
    except KeyError:
        pass

    log.Log("PASSPHRASE variable not set, asking user.", 5)
    while 1:
        if n == 2:
            pass1 = globals.gpg_profile.passphrase
        else:
            pass1 = getpass.getpass("GnuPG passphrase: ")

        if n == 1:
            pass2 = pass1
        else:
            pass2 = getpass.getpass("Retype passphrase to confirm: ")

        if not pass1 == pass2:
            print "First and second passphrases do not match!  Please try again."
            n = 3
            continue
        if not pass1 and not globals.gpg_profile.recipients:
            print "Cannot use empty passphrase with symmetric encryption!  Please try again."
            n = 3
            continue
        return pass1


def dummy_backup(tarblock_iter):
    """Fake writing to backend, but do go through all the source paths."""
    try:
        # Just spin our wheels
        while tarblock_iter.next():
            pass
    except StopIteration:
        pass
    log.Progress(None, diffdir.stats.SourceFileSize)
    return 0


def write_multivol(backup_type, tarblock_iter, backend):
    """Encrypt volumes of tarblock_iter and write to backend

    backup_type should be "inc" or "full" and only matters here when
    picking the filenames.  The path_prefix will determine the names
    of the files written to backend.  Also writes manifest file.
    Returns number of bytes written.

    """
    def get_indicies(tarblock_iter):
        """Return start_index and end_index of previous volume"""
        start_index = tarblock_iter.recall_index()
        if start_index is None:
            start_index = ()
        end_index = tarblock_iter.get_previous_index()
        if end_index is None:
            end_index = start_index
        return start_index, end_index

    def put(tdp, dest_filename):
        backend.put(tdp, dest_filename)
        putsize = tdp.getsize()
        tdp.delete()

        return putsize

    at_end = 0
    mf = manifest.Manifest().set_dirinfo()
    bytes_written = 0
    vol_num = 0

    # This assertion must be kept until we have solved the problem
    # of concurrency at the backend level. Concurrency 1 is fine
    # because the actual I/O concurrency on backends is limited to
    # 1 as usual, but we are allowed to perform local CPU
    # intensive tasks while that single upload is happening. This
    # is an assert put in place to avoid someone accidentally
    # enabling concurrency above 1, before adequate work has been
    # done on the backends to make them support concurrency.
    assert globals.async_concurrency <= 1

    io_scheduler = asyncscheduler.AsyncScheduler(globals.async_concurrency)
    async_waiters = []

    while not at_end:
        # Create volume
        tarblock_iter.remember_next_index() # keep track of start index
        vol_num += 1
        dest_filename = file_naming.get(backup_type, vol_num,
                                        encrypted = globals.encryption,
                                        gzipped = not globals.encryption)
        tdp = dup_temp.new_tempduppath(file_naming.parse(dest_filename))
        if globals.encryption:
            at_end = gpg.GPGWriteFile(tarblock_iter, tdp.name,
                                      globals.gpg_profile, globals.volsize)
        else:
            at_end = gpg.GzipWriteFile(tarblock_iter, tdp.name, globals.volsize)
        tdp.setdata()

        # Add volume information to manifest
        vi = manifest.VolumeInfo()
        start_index, end_index = get_indicies(tarblock_iter)
        vi.set_info(vol_num, start_index, end_index)
        vi.set_hash("SHA1", gpg.get_hash("SHA1", tdp))
        mf.add_volume_info(vi)

        async_waiters.append(io_scheduler.schedule_task(lambda tdp, dest_filename: put(tdp, dest_filename),
                                (tdp, dest_filename)))

        # Log human-readable version as well as raw numbers for machine consumers
        log.Progress('Processed volume %d' % vol_num, diffdir.stats.SourceFileSize)

    # Collect byte count from all asynchronous jobs; also implicitly waits
    # for them all to complete.
    for waiter in async_waiters:
        bytes_written += waiter()

    # Upload the collection summary.
    bytes_written += write_manifest(mf, backup_type, backend)

    return bytes_written


def write_manifest(mf, backup_type, backend):
    """Write manifest to file in archive_dir and encrypted to backend

    Returns number of bytes written

    """
    mf_string = mf.to_string()
    if globals.archive_dir:
        local_mf_name = file_naming.get(backup_type, manifest = 1)
        fin = dup_temp.get_fileobj_duppath(globals.archive_dir, local_mf_name)
        fin.write(mf_string)
        fin.close()

    sizelist = [] # will hold length of file in bytes
    remote_mf_name = file_naming.get(backup_type, manifest = 1,
                                     encrypted = globals.encryption)
    remote_fin = backend.get_fileobj_write(remote_mf_name, sizelist = sizelist)
    remote_fin.write(mf_string)
    remote_fin.close()
    return sizelist[0]


def get_sig_fileobj(sig_type):
    """Return a fileobj opened for writing, save results as signature

    If globals.archive_dir is available, save signatures there
    gzipped.  Save them on the backend encrypted as needed.

    """
    assert sig_type == "full-sig" or sig_type == "new-sig"

    sig_filename = file_naming.get(sig_type, encrypted = globals.encryption,
                                   gzipped = not globals.encryption)
    fh = globals.backend.get_fileobj_write(sig_filename)

    # by MDR.  This was changed to use addfilehandle so we get both
    # remote and local sig files
    if globals.archive_dir:
        local_sig_filename = file_naming.get(sig_type, gzipped = 1)
        fh.addfilehandle(dup_temp.get_fileobj_duppath(globals.archive_dir,
                                                      local_sig_filename))
    return fh


def pubkey_only():
    return not globals.gpg_profile.sign_key and globals.gpg_profile.recipients and globals.encryption


def full_backup(col_stats):
    """Do full backup of directory to backend, using archive_dir"""
    if globals.dry_run:
        tarblock_iter = diffdir.DirFull(globals.select)
        bytes_written = dummy_backup(tarblock_iter)
        col_stats.set_values(sig_chain_warning = None)
    else:
        sig_outfp = get_sig_fileobj("full-sig")
        tarblock_iter = diffdir.DirFull_WriteSig(globals.select, sig_outfp)
        bytes_written = write_multivol("full", tarblock_iter, globals.backend)
        sig_outfp.close()
        col_stats.set_values(sig_chain_warning = None).cleanup_signatures()
    print_statistics(diffdir.stats, bytes_written)


def check_sig_chain(col_stats):
    """Get last signature chain for inc backup, or None if none available"""
    if not col_stats.matched_chain_pair:
        if globals.incremental:
            log.FatalError(_("Fatal Error: Unable to start incremental backup.  "
                             "Old signatures not found and incremental specified"),
                           log.ErrorCode.inc_without_sigs)
        else:
            log.Warn(_("No signatures found, switching to full backup."))
        return None
    return col_stats.matched_chain_pair[0]


def print_statistics(stats, bytes_written):
    """If globals.print_statistics, print stats after adding bytes_written"""
    if globals.print_statistics:
        diffdir.stats.TotalDestinationSizeChange = bytes_written
        print diffdir.stats.get_stats_logstring(_("Backup Statistics"))


def incremental_backup(sig_chain):
    """Do incremental backup of directory to backend, using archive_dir"""
    dup_time.setprevtime(sig_chain.end_time)
    if globals.dry_run:
        tarblock_iter = diffdir.DirDelta(globals.select,
                                  sig_chain.get_fileobjs())
        bytes_written = dummy_backup(tarblock_iter)
    else:
        new_sig_outfp = get_sig_fileobj("new-sig")
        tarblock_iter = diffdir.DirDelta_WriteSig(globals.select,
                                  sig_chain.get_fileobjs(), new_sig_outfp)
        bytes_written = write_multivol("inc", tarblock_iter, globals.backend)
        new_sig_outfp.close()
    print_statistics(diffdir.stats, bytes_written)


def list_current(col_stats):
    """List the files current in the archive (examining signature only)"""
    sig_chain = check_sig_chain(col_stats)
    if not sig_chain:
        log.FatalError(_("No signature data found, unable to list files."),
                       log.ErrorCode.no_sigs)
    path_iter = diffdir.get_combined_path_iter(sig_chain.get_fileobjs())
    userlist = []
    loglist = []
    for path in path_iter:
        if path.difftype != "deleted":
             userlist.append("%s %s" % (dup_time.timetopretty(path.getmtime()),
                                        path.get_relative_path()))
             loglist.append("%s %s" % (dup_time.timetostring(path.getmtime()),
                                       util.escape(path.get_relative_path())))
    log.Log('\n'.join(userlist), log.INFO, log.InfoCode.file_list,
            '\n' + '\n'.join(loglist), True)


def restore(col_stats):
    """Restore archive in globals.backend to globals.local_path"""
    if globals.dry_run:
      return
    if not patchdir.Write_ROPaths(globals.local_path,
                                  restore_get_patched_rop_iter(col_stats)):
        if globals.restore_dir:
            log.FatalError(_("%s not found in archive, no files restored.")
                           % (globals.restore_dir,),
                           log.ErrorCode.restore_dir_not_found)
        else:
            log.FatalError(_("No files found in archive - nothing restored."),
                           log.ErrorCode.no_restore_files)


def restore_get_patched_rop_iter(col_stats):
    """Return iterator of patched ROPaths of desired restore data"""
    if globals.restore_dir:
        index = tuple(globals.restore_dir.split("/"))
    else:
        index = ()
    time = globals.restore_time or dup_time.curtime
    backup_chain = col_stats.get_backup_chain_at_time(time)
    assert backup_chain, col_stats.all_backup_chains
    backup_setlist = backup_chain.get_sets_at_time(time)
    num_vols = 0
    for s in backup_setlist:
        num_vols += len(s)
    cur_vol = [0]

    def get_fileobj_iter(backup_set):
        """Get file object iterator from backup_set contain given index"""
        manifest = backup_set.get_manifest()
        volumes = manifest.get_containing_volumes(index)
        for vol_num in volumes:
            yield restore_get_enc_fileobj(backup_set.backend,
                                          backup_set.volume_name_dict[vol_num],
                                          manifest.volume_info_dict[vol_num])
            cur_vol[0] += 1
            log.Progress(_('Processed volume %d of %d') % (cur_vol[0], num_vols),
                         cur_vol[0], num_vols)

    fileobj_iters = map(get_fileobj_iter, backup_setlist)
    tarfiles = map(patchdir.TarFile_FromFileobjs, fileobj_iters)
    return patchdir.tarfiles2rop_iter(tarfiles, index)


def restore_get_enc_fileobj(backend, filename, volume_info):
    """Return plaintext fileobj from encrypted filename on backend

    If volume_info is set, the hash of the file will be checked,
    assuming some hash is available.  Also, if globals.sign_key is
    set, a fatal error will be raised if file not signed by sign_key.

    """
    parseresults = file_naming.parse(filename)
    tdp = dup_temp.new_tempduppath(parseresults)
    backend.get(filename, tdp)
    restore_check_hash(volume_info, tdp)

    fileobj = tdp.filtered_open_with_delete("rb")
    if parseresults.encrypted and globals.gpg_profile.sign_key:
        restore_add_sig_check(fileobj)
    return fileobj


def restore_check_hash(volume_info, vol_path):
    """Check the hash of vol_path path against data in volume_info"""
    hash_pair = volume_info.get_best_hash()
    if hash_pair:
        calculated_hash = gpg.get_hash(hash_pair[0], vol_path)
        if calculated_hash != hash_pair[1]:
            log.FatalError("%s\n%s\n%s\n" %
                           (_("Invalid data - %s hash mismatch:") % hash_pair[0],
                            _("Calculated hash: %s") % calculated_hash,
                            _("Manifest hash: %s") % hash_pair[1]),
                           log.ErrorCode.mismatched_hash)


def restore_add_sig_check(fileobj):
    """Require signature when closing fileobj matches sig in gpg_profile"""
    assert (isinstance(fileobj, dup_temp.FileobjHooked) and
            isinstance(fileobj.fileobj, gpg.GPGFile)), fileobj
    def check_signature():
        """Thunk run when closing volume file"""
        actual_sig = fileobj.fileobj.get_signature()
        if actual_sig != globals.gpg_profile.sign_key:
            log.FatalError(_("Volume was signed by key %s, not %s") %
                           (actual_sig, globals.gpg_profile.sign_key),
                           log.ErrorCode.unsigned_volume)
    fileobj.addhook(check_signature)


def verify(col_stats):
    """Verify files, logging differences"""
    global exit_val
    collated = diffdir.collate2iters(restore_get_patched_rop_iter(col_stats),
                                     globals.select)
    diff_count = 0; total_count = 0
    for backup_ropath, current_path in collated:
        if not backup_ropath:
            backup_ropath = path.ROPath(current_path.index)
        if not current_path:
            current_path = path.ROPath(backup_ropath.index)
        if not backup_ropath.compare_verbose(current_path):
            diff_count += 1
        total_count += 1
    # Unfortunately, ngettext doesn't handle multiple number variables, so we
    # split up the string.
    log.Log(_("Verify complete: %s, %s.") %
            (gettext.ngettext("%d file compared",
                              "%d files compared", total_count) % total_count,
             gettext.ngettext("%d difference found",
                              "%d differences found", diff_count) % diff_count),
            3)
    if diff_count >= 1:
        exit_val = 1


def cleanup(col_stats):
    """Delete the extraneous files in the current backend"""
    extraneous = col_stats.get_extraneous()
    if not extraneous:
        log.Warn(_("No extraneous files found, nothing deleted in cleanup."))
        return

    filestr = "\n".join(extraneous)
    if globals.force:
        log.Log(gettext.ngettext("Deleting this file from backend:",
                                 "Deleting these files from backend:",
                                 len(extraneous))+"\n"+filestr, 3)
        if not globals.dry_run:
            col_stats.backend.delete(extraneous)
    else:
        log.Log("%s\n%s\n%s" %
                (gettext.ngettext("Found the following file to delete:",
                                  "Found the following files to delete:",
                                  len(extraneous)),
                 filestr,
                 _("Run duplicity again with the --force option to actually delete.")),
                3)


def remove_all_but_n_full(col_stats):
    "Remove backup files older than the last n full backups."
    assert globals.keep_chains is not None

    globals.remove_time = col_stats.get_nth_last_full_backup_time(globals.keep_chains)

    return remove_old(col_stats)


def remove_old(col_stats):
    """Remove backup files older than globals.remove_time from backend"""
    assert globals.remove_time is not None
    def set_times_str(setlist):
        """Return string listing times of sets in setlist"""
        return "\n".join(map(lambda s: dup_time.timetopretty(s.get_time()),
                             setlist))

    req_list = col_stats.get_older_than_required(globals.remove_time)
    if req_list:
        log.Warn("%s\n%s\n%s" %
                 (_("There are backup set(s) at time(s):"),
                  set_times_str(req_list),
                  _("Which can't be deleted because newer sets depend on them.")))

    if (col_stats.matched_chain_pair and
        col_stats.matched_chain_pair[1].end_time < globals.remove_time):
        log.Warn(_("Current active backup chain is older than specified time.  "
                   "However, it will not be deleted.  To remove all your backups, "
                   "manually purge the repository."))

    setlist = col_stats.get_older_than(globals.remove_time)
    if not setlist:
        log.Warn(_("No old backup sets found, nothing deleted."))
        return
    if globals.force:
        log.Log(gettext.ngettext("Deleting backup set at time:",
                                 "Deleting backup sets at times:",
                                 len(setlist)) +
                "\n" + set_times_str(setlist), 3)
        if globals.dry_run:
            col_stats.set_values(sig_chain_warning = None)
        else:
            setlist.reverse() # save oldest for last
            for set in setlist:
                set.delete()
            col_stats.set_values(sig_chain_warning = None).cleanup_signatures()
    else:
        log.Warn(gettext.ngettext("Found old backup set at the following time:",
                                  "Found old backup sets at the following times:",
                                  len(setlist)) +
                 "\n" + set_times_str(setlist) + "\n" +
                 _("Rerun command with --force option to actually delete."), 3)


def check_last_manifest(col_stats):
    """Check consistency and hostname/directory of last manifest"""
    if not col_stats.all_backup_chains:
        return
    last_backup_set = col_stats.all_backup_chains[-1].get_last()
    last_backup_set.check_manifests()


def main():
    """Start/end here"""
    dup_time.setcurtime()
    log.setup()
    action = commandline.ProcessCommandLine(sys.argv[1:])
    col_stats = collections.CollectionsStatus(globals.backend,
                                              globals.archive_dir).set_values()
    log.PrintCollectionStatus(col_stats)

    last_full_time = col_stats.get_last_full_backup_time()
    if last_full_time > 0:
        log.Log(_("Last full backup date:") + " " + dup_time.timetopretty(last_full_time), 4)
    else:
        log.Log(_("Last full backup date: none"), 4)
    if action == "inc" and last_full_time < globals.full_force_time:
        log.Log(_("Last full backup is too old, forcing full backup"), 3)
        action = "full"

    os.umask(077)

    # for public key encryption (without signing!), no passphrase is required.

    # cases where we do not need to get a passphrase:
    # full: with pubkey enc. doesn't depend on old encrypted info
    # inc and pubkey enc.: need a manifest, which the archive dir has unencrypted
    # with encryption disabled
    # listing files: needs a manifest, but the archive dir has that
    # collection status only looks at a repository
    # NOTE: both full and inc are handled later
    if ((not globals.encryption) or
        (action == "list-current" and globals.archive_dir) or
        action in ["full", "inc", "collection-status",
                   "remove-older-then",
                   "remove-all-but-n-full",
                   ]):
        globals.gpg_profile.passphrase = ""
    else:
        globals.gpg_profile.passphrase = get_passphrase(1)

    if action == "restore":
        restore(col_stats)
    elif action == "verify":
        verify(col_stats)
    elif action == "list-current":
        list_current(col_stats)
    elif action == "collection-status":
        log.PrintCollectionStatus(col_stats, True)
    elif action == "cleanup":
        cleanup(col_stats)
    elif action == "remove-old":
        remove_old(col_stats)
    elif action == "remove-all-but-n-full":
        remove_all_but_n_full(col_stats)
    else:
        assert action == "inc" or action == "full", action
        if action == "full":
            if not pubkey_only() and globals.encryption:
                globals.gpg_profile.passphrase = get_passphrase(3)
            full_backup(col_stats)
        else:  # attempt incremental
            if not (pubkey_only() and globals.archive_dir) and globals.encryption:
                globals.gpg_profile.passphrase = get_passphrase(1)
            check_last_manifest(col_stats) # not needed for full backup
            sig_chain = check_sig_chain(col_stats)
            if not sig_chain:
                if not pubkey_only() and globals.encryption:
                    globals.gpg_profile.passphrase = get_passphrase(2)
                full_backup(col_stats)
            else:
                incremental_backup(sig_chain)
    globals.backend.close()
    log.shutdown()
    if exit_val is not None:
        sys.exit(exit_val)

def with_tempdir(fn):
    try:
        fn()
    finally:
        tempdir.default().cleanup()

if __name__ == "__main__":
    try:
        with_tempdir(main)
    except duplicity.errors.UserError, e:
        # For user errors, don't show an ugly stack trace by
        # default. But do with sufficient verbosity.
        log.Info(_("User error detail: %s")
                 % (''.join(traceback.format_exception(*sys.exc_info()))))
        log.FatalError("%s: %s" % (e.__class__.__name__, str(e)),
                       log.ErrorCode.user_error,
                       e.__class__.__name__)
    except Exception, e:
        log.FatalError("%s" % (''.join(traceback.format_exception(*sys.exc_info()))),
                       log.ErrorCode.exception,
                       e.__class__.__name__)
