#! /usr/bin/perl -w

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

# ------------
# Description:
# ------------
#  This script will read  full_wordlist_*.po files from standard input
#  to generate Thwab file tree and related headword index file
#
#  cat full_wordlist_*.po | th-po-to-th
#
# Then you need to chdir to th-dict edit +/0 file to put comments
# copyrights, and set dictionary name ..etc
# remove back up if any "+/0~"
#
# Then in side any the th-dict subtree call "th-pack" script
#
# Copyright © 2006, Muayyad Saleh Alsadi<alsadi@gmail.com>
# (www.arabeyes.org - under GPL license)

use utf8;
use encoding 'utf8';
use open ':utf8'; # input and output default layer will be UTF-8
use POSIX;
my $prefix="th-dict";
my $digits=4;
my $next_ch=1;
my $ch_n=$next_ch;
my $itm_n=0;
my %chaps;
my %items;
my $ch="";
my $itm="";
if ($#ARGV==0) {$digits=$ARGV[0];}
if (-d "$prefix") {print "type 'rm -R $prefix'\n"; exit -1}
mkdir $prefix or die "Could not create Thwab directory";
mkdir $prefix."/+" or die "Could not create Thwab control + directory";
chdir $prefix;
open I ,">+/0" or die "Could not create Thwab info +/0 file";
open IX ,">+/3" or die "Could not create Thwab key index +/3 file";
print I "thwab = dict-SOMETHING-XX-YY\n",
	"version = 1\n",
	"charset = UTF-8\n",
	"format = plain\n",
	"digits = $digits\n",
	"title = Dictionary Title\n",
	"subtitle = Dictionary Subtitle\n",
	"lang = en\n",
	"classification = Lexical:Dictionaries\n",
	"key = Headword\n",
	"author = Author:SHORT:FULL:::2006:MY\@EMAIL.COM:www.MY-WEBSITE.org:Some Country,Translator:SHORT:FULL:::2006:MY\@EMAIL.COM:www.MY-WEBSITE.org:Some Country,Translator:SHORT:FULL:::2006:MY\@EMAIL.COM:www.MY-WEBSITE.org:Some Country\n",
	"computerized = alsadi:Muayyad Saleh Al-Sadi:::2006:alsadi\@gmail.com:www.cltb.net\n",
	"_\nCOMMENTS HERE\n\n",
	"In Thwab viewer, use \"Search for Key (Headword)\"\ntool to look-up.\n\nCopyright © COPYRIGHTS HERE\n\n",
	"Converted to Thwab iTar format using \"th-po-to-th\" script\n",
	"Written by Muayyad Saleh Al-Sadi<alsadi\@gmail.com>\n";
close I;
sub add_to_th($$){
  my $id=shift;
  my $str=shift;
  if ($id=~/\W*(\w)/) {
    $ch="$1"; $ch=uc $ch;
    if ($ch=~/[^\p{alpha}]/) {$ch="0"}
  } else {$ch="0"}
  if (exists $chaps{$ch}) {
    $ch_n=$chaps{$ch};
    $items{$ch}=$items{$ch}+1;
    $itm_n=$items{$ch};
    $dn=sprintf "%0$digits"."d",$ch_n;
  } else {
    $chaps{$ch}=$next_ch;
    $ch_n=$chaps{$ch};
    $items{$ch}=0;
    $itm_n=$items{$ch};
    $next_ch=$next_ch+1;
    $dn=sprintf "%0$digits"."d",$ch_n;
    mkdir $dn;
    print "new [$ch] chapter as [$dn]\n";
  }
  $fn=$dn.sprintf("/%0$digits"."d",$itm_n);
  open F ,">".$fn or die "Could not create Thwab memver file [$fn]";
  printf F "%s\n\n%s\n", $id,$str;
  close F;
  $id=~s/\n/_/gm;
  $id=~s/\t/_/g;
  printf IX "%s\t%s\n",$id,$fn;
}
my $st=0; # st=0 add to msgid, 1 add to msgstr
my $id=0;
my $str=0;
my $started=0; # st=0 add to msgid, 1 add to msgstr

while(<STDIN>) {
  chomp;
  if (/^\s*msgid\s*"(.*)"\s*$/) {
  if ($started != 0) { add_to_th($id,$str) }
  $id=$1;
  if (not $id) {next}
  $st=0;
  $started=1;
  }
  elsif (/^\s*msgstr\s*"(.*)"\s*$/) {
  $str=$1;
  $st=1;
  } elsif (/^\s*"(.*)"\s*$/) {
  if ($st==0) {$id=$id."$1"}
  else {$str=$str."$1"}
  } elsif (/^\s*$/ or /^\s*#/) {
   }else {
    print "skip [$_]\n";
  }
}
if ($started != 0) { add_to_th($id,$str) }
my ($i,$k);
my $j=2;
for $i (values %items) {
  if ($i>$j) {$j=$i}
}
$k=length(sprintf("%d",$j));
if ($digits != $k) {
  printf "digits should be [%d]\nYou may like to rerun it  passing %d as argument\n",$k,$k
}
