#! /usr/bin/perl -w

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

# a script to import GNU info manuals
# Copyright (c) 2007, Arabeyes.org by Muayyad Alsadi<alsadi@gmail.com>
# Released under terms of GPL 2

# rm -r info/; th-info-to-th digits PATH/TO/FILE.info.gz
# where digits any number >= 2 (it then suggests the best number)
# then enter info/+/ and create 0 control info file
# run the th-gen-stub-search-cache script inside book
# then run th-pack

# FIXME: what is inside a .tag could be [] which may case problems

# TODO: what does \0\x8[index] mean

# only support GZiped info files
use utf8;
use encoding 'utf8';
use open ':utf8'; # input and output default layer will be UTF-8

my $subtree="info";
my $maxlen=2;
my $digits=shift or die "no digits argument";
my $file=shift or die "no info filename argument";
my $prefix="";
my (@toc,@keys);
my @stack;
my @t_stack;
#push @stack,sprintf("%0${digits}d", 0); # commented to skip the (dir) node
push @t_stack,"(dir)";
if ($file=~/^(.*)\/([^\/]+)$/) {$prefix=$1}

mkdir $subtree or die "could not create subtree [$!]\n";
mkdir "${subtree}/+" or die "could not create subtree";
open E, ">EXCESS";
open TOC, ">${subtree}/+/1";
#open REF, ">${subtree}/+/2";
open KEYS, ">${subtree}/+/3";

my @toc_stack; # file	type	title
my %toc_by_id;
my $is_started=0;
my ($ref1,$ref2, $last_ref1, $last_ref2)= (-1,-1,-1,-1);
my ($txt, $last_fn)=("","");
my $out;

use Compress::Zlib ;
# $dest = compress($source) ;
# $dest = uncompress($source) ;
# $gz = gzopen($filename or filehandle, $mode) ;
# $bytesread = $gz->gzread($buffer [,$size]) ;
# $bytesread = $gz->gzreadline($line) ;
# $byteswritten = $gz->gzwrite($buffer) ;
# $status = $gz->gzflush($flush) ;
# $status = $gz->gzclose() ;
my $mode;
my @hi;
my @files;
my ($page, $node, $next, $prev, $up);

$line="";
$mode=0;
print "Processing file[$file]\n**********************\n";
$gz = gzopen($file, "r") ;
while ($bytesread = $gz->gzreadline($line))
{
  chomp $line;
  consume_line($line);
}
consume_line("\037");
$status = $gz->gzclose() ;
for $file (@files) {
  $line=""; $mode=0;
  print "Processing file[$file]\n**********************\n";
  $gz = gzopen("$prefix/$file", "r") ;
  while ($bytesread = $gz->gzreadline($line))
  {
    chomp $line;
    consume_line($line);
  }
  consume_line("\037");
  $status = $gz->gzclose() ;
}
write_thwab_toc_and_keys();
printf "digits should be %d\n",$maxlen;
close TOC;
close KEYS;
# header
#File: ,  Node: ,  Next: ,  Prev: ,  Up: (dir)\n
#File: ,  Node: ,  Next: ,  Prev: ,  Up: \n

# links
#"* Variadic Functions::" to be "* .@[Variadic Functions]"
# "*note Variadic Functions::" to be ".@[Variadic Functions]"
# "*Note Variadic Functions::" to be ".@[Variadic Functions]"
# partial links having *something\nsomething:: could be fixed by //m
# more than one on a line

# "^\s*\*([^*:]+):\*" to be marked as .!![$1]
# "^ -- ([\p{L} ]+):" to be marked as .*._[$1]

# "START-INFO-DIR-ENTRY"
# "* fchown: (libc)File Owner."
# "END-INFO-DIR-ENTRY"
# means that all "fchown" should be converted to ".*[fchown]"
# Indirect:
# file.info-1: 763
# file.info-2: 300760
# file.info-3: 523871
# \037

# alg:
# look for page header "File: ,  Node: ..." turn on flag
# if (/\037/) {send(page); page="";}
# else {page+=line}
sub consume_line
{
  $line=shift;
  $_=$line;
  if (/\0/) {return}
  if ($mode==0) { # main
    if (/^Indirect:$/) {$mode=1;
    } elsif (/^START-INFO-DIR-ENTRY$/) {$mode=2;
#     } elsif (/^File:[^,]*,\s*Node:\s*([^,]+)\s*,\s*Next:\s*([^,]+)\s*,\s*Prev:\s*([^,]*)\s*,\s*Up:\s*\(dir\)\s*$/) {
#       $mode=3; $page="";
#       ($node,$next,$prev,$up)=($1,$2,$3,'(dir)');
    } elsif (/^File:[^,]*,\s*Node:\s*([^,]+)\s*,\s*Next:\s*([^,]+)\s*,\s*Prev:\s*([^,]*)\s*,\s*Up:\s*([^,]*)\s*$/) {
      $mode=4; $page="";
      ($node,$next,$prev,$up)=($1,$2,$3,$4);    
    } elsif (/^File:[^,]*,\s*Node:\s*([^,]+)\s*,(\s*Next:\s*([^,]+)\s*,)?(\s*Prev:\s*([^,]*)\s*,)?\s*Up:\s*([^,]*)\s*$/) {
      $mode=4; $page="";
      $node=$1;
      $next=$3 or $next="";
      $prev=$5 or $prev="";
      $up=$6;
#     } elsif (/^File:[^,]*,\s*Node:\s*([^,]+)\s*,\s*Prev:\s*([^,]*)\s*,\s*Up:\s*([^,]*)\s*$/) {
#       $mode=4; $page="";
#       ($node,$next,$prev,$up)=($1,"",$2,$3);
#     } elsif (/^File:[^,]*,\s*Node:\s*([^,]+)\s*,\s*Next:\s*([^,]+)\s*,\s*Up:\s*([^,]*)\s*$/) {
#       $mode=4; $page="";
#       ($node,$next,$prev,$up)=($1,$2,"",$3);
    } else {
      print E "$_\n";
    }
  } elsif ($mode==1) { # files
    # file.info-1: 763
    # file.info-2: 300760
    # file.info-3: 523871
    # \037
    if (/^\037/) {$mode=0;
    } elsif (/^([^:]+):/){ push @files, "$1.gz";
    } else { print "unknown [$_] in mode [$mode]\n";
    }  
  } elsif ($mode==2) { # high light terms
    if (/^END-INFO-DIR-ENTRY$/) { $mode=0;
    } elsif (/^\*\s*(\S[^:]*):/) { push @hi, "$1";
    } else { print "unknown [$_] in mode [$mode]\n";
    }
#   } elsif ($mode==3) { # top page
#     if (/^\037/) {
#       $mode=0;
#       save_top_page($page,$node,$next);
#     } else { $page.="$_\n";
#     }
  } elsif ($mode==4) { # page
    if (/^\037/) {
      $mode=0;
      save_page($page,$node,$next,$prev,$up);
    } else { $page.="$_\n";
    }
  }
  
}

sub info_page_to_thts
{
  $p=shift;
  $p=~s/^[ \t]*\n//m; # remove first empty line
  $p=~s/^(.*)\n(\*|=)+\n/$1\n/m; # remove "************" or "========"
  $p=~s/(?<!\.|\d)\.(?!\.|\d)/. /gom; # escape thts dots
  
  $p=~s/^[ \t]*\*([^*:\n]+):\*/.!![$1:]/gom;
  $p=~s/^[ \t]*--[ \t]+([^*:\n]+):/.*._[$1:]/gom;
  #$p=~s/^\s*--\s+([\p{L} ]+):/.*._[$1:]/gom; # safer
  
  # TODO: highlight all members of @hi in $page
  
  # links
  $p=~s/^[ \t]*\*[ \t]+([^*:\n]+)::/* .@[$1]/gom; # "* Something::"
  $p=~s/^([ \t]*\*[ \t]+([^*:\n]+):[ \t]+)([^\n]+)(\.[^\n]+)$/$1.@[$3]$4/gom; # "* alias expansion: "
  $p=~s/\*note\s+([^*:\n]*)\n[ \t]*([^*:\n]+)::/.@[$1 $2]/gomi; # multi line
  $p=~s/\*note\s+([^*:\n]+)::/.@[$1]/gomi; # "*Note Something::"
  return $p;
}
sub save_page
{
  ($page,$node,$next,$prev,$up)=(shift,shift,shift,shift,shift);
#   while($#stack>=$toc_lvl) {pop @stack;}
#   $i=pop @stack; $i+=1; push @stack,sprintf("%0${digits}d", $i);
  print "n=[$node] next=[$next] prev=[$prev] up=[$up]\n";
  #print "** t_stack [". join('/', @t_stack)."]\n";
  #print "up=[$up] node=${t_stack[$#t_stack]}\n";
  if ($up eq $t_stack[$#t_stack]) { # convert last node to parent
    # add 1st child child
    $dir=join '/', @stack; $dir=~s/\/$//;
    #print "[$node][$dir]\n";
    if ($dir) {
      $fn=sprintf("$dir/%0${digits}d",0);
      if ($#stack >= 0) {
        rename "${subtree}/$dir", "${subtree}/${dir}_" or die "could not rename $dir [$!]";
        mkdir "${subtree}/$dir" or die "$!";
        rename "${subtree}/${dir}_", "${subtree}/$fn" or die "could not rename []\n$!";
      }
      $toc_e=pop @toc or $toc_e="$dir\t2\tTop\n"; chomp $toc_e;
      @toc_t=split /\t/,$toc_e;
      $key_e=pop @keys or $key_e="Top\t$dir\n";  chomp $key_e;
      #@key_t=split /\t/,$key_e;
      push @toc, "$dir\t5\t${toc_t[2]}\n";
      push @toc, "$fn\t2\t_\n";
      push @keys, "${toc_t[2]}\t$fn\n";
    }
    push @stack,sprintf("%0${digits}d", 1);
    push @t_stack,$node;
    # save page as child
    $fn=join '/', @stack; $fn=~s/\/$//;
    push @toc, "$fn\t2\t$node\n";
    push @keys, "$node\t$fn\n";
    open I, ">${subtree}/${fn}" or die "could not open [${subtree}/${fn}] for node=[$node]";
    $page=info_page_to_thts($page);
    print I $page;
    close I;
  } else {
    # do a santy check
    #if ($prev and $prev ne $t_stack[$#t_stack]) {die "node [$node] should be after [$prev] not [${t_stack[$#t_stack]}]\n";}
    # find parent
    #print "stack[@stack]\n";
    #print "t_stack [". join('/', @t_stack)."]\n";
    while($#t_stack>=1 and $up ne $t_stack[$#t_stack-1]) {
    pop @stack; pop @t_stack;}
    if ($#stack<0) {
      # the libc info is broken this fix the whole thing
      recover_stacks($up);
    }
    #print "stack[@stack]\n";
    # add child next to last child
    $i=pop @stack; $i+=1;
    if (length($i)>$maxlen) {$maxlen=length($i)}
    push @stack,sprintf("%0${digits}d", $i);
    $i=pop @t_stack;
    push @t_stack,$node;
    
    # save page as child
    $fn=join '/', @stack; $fn=~s/\/$//;
    push @toc, "$fn\t2\t$node\n";
    push @keys, "$node\t$fn\n";
    open I, ">${subtree}/${fn}" or die "could not open [${subtree}/${fn}] for node=[$node]";
    $page=info_page_to_thts($page);
    print I $page;
    close I;
    
  }
  #print "!! t_stack [". join('/', @t_stack)."]\n";
}
sub write_thwab_toc_and_keys
{
  for $i (@toc) { print TOC $i; }
  for $i (@keys) { print KEYS $i; }
}
sub recover_t_stack()
{
  my ($i,$j,$s,$ss);
  $ss="";
  $i=0;
  $j=0;
  for $s (@stack) {
    $ss.=$s;
    for ($i=$j; $i<=$#toc; $i++) {
      if ($toc[$i]=~/^\Q$ss\E\t(\d+)\t([^\n]+)\n$/) {
        push @t_stack, $2;
        $j=$i++; last;
      }
    }
    $ss.='/';
  }
}
sub recover_stacks {
  my $up=shift;
  my $s;
  @stack=();
  @t_stack=();
  push @t_stack,"(dir)";
  for $i (@toc) {
    if ($i=~/^([^\t]+)\t(\d+)\t\Q$up\E\n$/) {
      $s=$1;
      @stack=split /\//,$s;
      recover_t_stack();
      return;
    }
  }
  die "could not find up=[$up]\n";
}
