#!/usr/bin/env python

# Copyright (c) 2012 Samuel Bronson
#
# Made available under the "expat" license, which follows:
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

import sys
import tarfile
import gzip
# Requires python-lzma for xz support.
import lzma
import re
import posixpath
from pprint import pprint


try:
    infn, pkg, ver = sys.argv[1:]
except ValueError:
    print "usage: %s gcc-X.Y.Z.tar.(gz|bz2) gcc-X.Y-doc X.Y.Z[.mumble]" % sys.argv[0]
    exit(1)

# Output type, "xz" or "gz"
outtype = "xz"

# output filename
#outfn = '%s_%s.orig.tar.gz' % (pkg, ver)
outfn = ( '%s_%s.orig.tar.' + outtype ) % (pkg, ver)

# directory that the output tarball would extract to
# (the prefix in the tarball)
outdir = '%s-%s.orig' % (pkg, ver)

intar = tarfile.open(infn, 'r|*', format=tarfile.PAX_FORMAT)

if outtype == "gz":
    outtarball = gzip.GzipFile(outfn, 'wb', mtime=0)
else:
    outtarball = lzma.LZMAFile(outfn, mode='wb', options={'format':'xz'})

outtar = tarfile.open(None, 'w:tar', fileobj=outtarball)

pat_s = r"""
  ^.*\.(7|texi|rst|texinfo)$
| ^gcc/ada/(xgnatugn.adb|ug_words)$
| ^gcc/ada/doc/
| ^contrib/texi2pod\.pl$
"""
pat = re.compile(pat_s, re.VERBOSE)

for member in intar:
    if not member.isreg():
        continue

    _, path = member.name.split('/', 1)

    mo = pat.search(path)
    if not mo:
        continue

    outpath = posixpath.join(outdir, path)
    print outpath

    mf = intar.extractfile(member)
    member.name = outpath
    outtar.addfile(member, mf)

intar.close()
outtar.close()
outtarball.close()
