#!/bin/sh
#
# download the AllInOne page of a manual on wiki.debian.org as docbook
# and transform it into proper XML
#
# very loosly based on the moinmoin2pdf script from Petter Reinholdtsen
#
# Author/Copyright:	Holger Levsen
# Licence:		GPL2+
# first edited:		2006-07-06
# last edited:		2009-05-30
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

#set -x

exit_loud_and_clearly() {
	echo "-----------------------------------------------------"
	echo
	echo "$1"
	echo
	echo "-----------------------------------------------------"
	exit 1
}

if [ ! $(which xmllint) ] ; then
	exit_loud_and_clearly "Please install libxml2-utils."
fi

if [ ! $(which GET) ] ; then
	exit_loud_and_clearly "Please install libwww-perl."
fi

if [ "$name" = "" ] ; then 
	exit_loud_and_clearly "error: missing \$name variable, not exported from Makefile?"
fi
TMPFILE=$(mktemp)
xmlfile=$name.xml

# Make sure all section IDs are unique.  If file--subsection is not
# unique, use file--section--subsection--subsubsection instead.
unique_section_ids() {
    # Emulate anchor_name_from_text() function from moin
    perl -MURI::Escape -pe "my \$file = escape(${ASCIINAME}); my @h = \$file;" \
	-e 'my %ids; sub escape {
        my $s = shift;
	$s=~s/ /_/g;
	$s=~s/~/+/g;
	$s=~s/"/.22/g;
	$s=~s/\+/+-/g;
	$s=~s/\//+AC8/g;
	$s=~s/²/+ALI-/g;
	$s=uri_escape($s);
	$s=~s/%/./g;
	$s=~s/\.3A/:/g;
	return $s;
    }; sub anchor{
        my ($pre, $title) = @_;
        if ("</section>" eq $pre) {
            pop @h;
	    return "$pre";
        } else {
            my ($s) = $title =~ m%<title>(.+)</title>%;
            $s = escape($s);
            push(@h, $s);
            my $id = "$file--$s";
            $id = join("--", @h) if (exists $ids{$id});
            my $retval = "<section id=\"$id\">$title";
            $ids{$id} = 1;
#           print STDERR "S: $retval\n";
            return $retval;
        }
    }
    s%(</?section>)(<title>[^<]*?</title>)?%&anchor($1, $2)%eg;'
}

# the last but one sed "preserves" the 2nd matched regex
# the last sed does the same as dos2unix
# head at the end chops of the last two lines with the Category:Permalink entry
PERL_LWP_SSL_VERIFY_HOSTNAME=0 GET -H User-Agent: "${url}AllInOne?action=raw"|sed "s%<<Include(%%g" | sed "s%)>>%%g" | sed 's/<<TableOfContents(1//' | sed "s%$path1%%g" |sed 's/.$//'|head -n -2> id

for i in `cat id` ; do
	NAME=`echo "${i}" |sed "s/\(.*\)\/\(.*\)/\2/" `
	# The ø -> oe conversion is a workaround for bug #657511.
 	ASCIINAME=$(echo $NAME  | tr "ø" "oe" | iconv -t ASCII//TRANSLIT)
	TARGET=${NAME}.xml
	echo "$TARGET		${url}${i}?action=show&mimetype=text/docbook"
	# download the docbook version of the manual from the wiki and pipe it through sed to
	#   - insert the build date
	#   - convert <code> tag to <computeroutput> as this is understood by docbook (tools)
	#   - provide correct path to the images
	#   - remove the revision history
	#   - remove the Category:Permalink line
	#   - add some linebreaks
	#   - delete the first lines containing the XML declaration
	PERL_LWP_SSL_VERIFY_HOSTNAME=0 GET "${url}${i}?action=show&mimetype=text/docbook" | 
	# replace tags:
	sed "s%code>%computeroutput>%g" |
	sed "s%/htdocs/rightsidebar/img/%./images/%g" |
	# remove initial and final tags:
	perl -pe "s%</?article>%%g" |
	# remove tags and enclosed content:
	sed "s#<articleinfo>\(.*\)</articleinfo>##g" |
	# Comment useless remarks from XML: they just show an ugly drawing in XML
	perl -pe "s%<remark>.*?</remark>%<!-- $& -->%g" |
	# Broken URL: workaround to #656945
	sed "s%<ulink url=\"https://wiki.debian.org/${path1}${i}/%<ulink url=\"https://wiki.debian.org/%g" |
	# Make wiki self links actually local
	sed "s%<link linkend=\"%<link linkend=\"${ASCIINAME}--%g" |
	perl -pe "s%<ulink url=\"https://wiki.debian.org/${path1}/(HowTo/)?(\w+)#\">(.*?)</ulink>%<link linkend='\2'>\3</link>%g" |
	perl -pe "s%<ulink url=\"https://wiki.debian.org/${path1}/(HowTo/)?(\w+)#(.*?)\">(.*?)</ulink>%<link linkend='\2--\3'>\4</link>%g" |
	perl -000 -pe "s%<para><ulink url=\"https://wiki.debian.org/CategoryPermalink#\">CategoryPermalink</ulink>\s*</para>%%" |
	unique_section_ids |
	# introduce line breaks:
	sed "s%<title>%\n<title>%g" |
	sed "s%<\/title>%\n<\/title>%g" |
	sed "s%<section%\n\n<section%g" |
	sed "s%<\/section>%\n<\/section>%g" |
	sed "s%<para>%\n<para>%g" |
	sed "s%<\/para>%\n<\/para>%g" |
	sed "s%FIXME%\nFIXME%g" |
	sed "s%<itemizedlist>%\n<itemizedlist>%" |
	sed "s%<listitem>%\n<listitem>%" |
	# cut off first lines:
	sed '1,4d' > $TARGET
	if [ "$(grep -v FIXMEs $TARGET | grep FIXME | grep -v 'FIXME&gt;' | grep -v 'status ignore')" != "" ] ; then
		echo "----------------------------------" >> $TMPFILE
		echo ${url}${i} >> $TMPFILE
		grep -v FIXMEs $TARGET | grep FIXME | grep -v 'FIXME&gt;' | grep -v 'status ignore' >> $TMPFILE
	fi
done

# now only keep the page name (equals section id) without path
# (replace with the second match of the regular expression)
sed -i "s/\(.*\)\/\(.*\)/\2/" id

# add id= to <section>s and a linebreak at the end
for i in `cat id` ; do
	sed -i "0,/<section id=\".*\">/ s/<section id=\".*\">/<section id=\"$i\">/" ${i}.xml 
	sed -i "$ s#>#>\n#" ${i}.xml
done

# paste it together
rm -f $xmlfile
for i in `cat id` ; do
	cat ${i}.xml >> $xmlfile
	rm ${i}.xml
done
rm id

# get images and modify $xmlfile
echo "calling ../scripts/get_images $xmlfile $path1"
../scripts/get_images $xmlfile $path1

# turn links into internal references if appropriate
# this needs to run after ./get_images
#
#  -0\777  read multiple lines
perl -0\777 -pi -e "s/<ulink url=\"$path2(.*)\/(.*)\">(.*)\n<\/ulink>/<link linkend=\"\2\">\3<\/link>/g" $xmlfile

# set DOC_DATE based on current date as get_manual is only called manually when the document is updated from the wiki
DOC_DATE="$(TZ=UTC date +'%Y-%m-%d')"
# make it a docbook article again
sed -i "1,/</ s#<#<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE article PUBLIC \"-//OASIS//DTD DocBook XML V4.4//EN\" \"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd\"><article lang=\"en\"><articleinfo><title>$DEBIAN_EDU_DOC_TITLE $DOC_DATE</title></articleinfo>\n<#" $xmlfile
sed -i "$ s#>#>\n</article>#" $xmlfile
# remove the first empty lines
sed -i "1,2d" $xmlfile

# clean it further
TMPFILE2=$(mktemp)
xmllint $xmlfile > $TMPFILE2
mv $TMPFILE2 $xmlfile

# motivate
if [ "$(grep -v FIXMEs $xmlfile |grep FIXME |grep -v 'status ignore'|uniq)" != "" ] ; then
	echo "====================" >> $TMPFILE
	echo `grep -v FIXMEs $xmlfile |grep FIXME |grep -v 'status ignore'|uniq|wc -l` FIXMEs left to fix >> $TMPFILE
	echo "====================" >> $TMPFILE
fi
mv $TMPFILE fixme-status.txt
