#! /usr/bin/perl -w

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

use utf8;
use encoding 'utf8';
use open ':utf8'; # input and output default layer will be UTF-8
use POSIX;
my %words;
sub gen_search_cache($) {
  my $fn=shift;
  my ($i,$ii,$j);
  my @a;
  $j=0;
  open FN, $fn;
  while(<FN>) {
    @a=split;
    $chars_to_keep="\x{27}\x{2019}\x{651}"; # some may need dash or underscore (but it's better not to keep them so that "userfriendely" matches "user-friendely")
    for $i (@a) {
    $i=~s/[^\p{L}\p{N}$chars_to_keep]//go;
    if (not $i or not $i=~/\p{L}/o) {next}
    if (exists($words{$i})) {$words{$i}=[ $words{$i}[0].sprintf(" %s_%d",$fn,$j), $words{$i}[1]+1];}
    else {$words{$i}=[sprintf("%s_%d",$fn,$j),1];}
    $j=$j+1;
    }
  }
  close FN;
}

sub recursive_plus() {
  my $dir=shift;
  my $func= shift;
  my $i;
  my $j;
  my @a;
  &$func ("$dir") unless ($dir eq "." or $dir eq "./");
  opendir D, $dir;
  @a=readdir D;
  @a=grep {
    ($_ ne ".") && ($_ ne "..") && ($_ ne "+")
  } @a;
  @a=sort @a;
  foreach $i (@a) {
	$j=$dir."/".$i;
	$j=~s/\/\//\//; # remove repeated //
	$j=~s/^\.\///; # remove leading ./
	$j=~s/\/$//; # remove tailing /
	if (-d $j) {recursive_plus("$j",$func);}
	else {&$func ("$j");}
  }
  closedir D;
}
sub cd_th() {
  my $pwd=getcwd();
  do {
    if (-d "+") {return $pwd;}
    chdir("..");
    $pwd=getcwd();
  }until ($pwd eq "/");
  return 0;
}

my $prefix;
$prefix=cd_th() or die "Could not found Thwab control + directory";
printf "@<%s>\n",$prefix;
&recursive_plus (".", \&gen_search_cache);
open IX, ">+/4";
@aa=sort (keys %words);
for $i (@aa) {
  printf IX "_\t%s %d %s\n",$i,$words{$i}[1],$words{$i}[0];
}
close IX;
