#!/usr/local/bin/perl -w

=head1 NAME

mkkhash - Juman ե Berkeley DB Ѵ

=head1 SYNOPSIS

mkkhash SOURCE TARGET

=head1 DESCRIPTION

Juman °Ƥ뼭 F<SOURCE> L<Juman::Katuyou> 黲ȤǤ
Ѵ륹ץȡ

=head1 DB FORMAT

줿 Berkeley DB ΥϥåϰʲηǥǡǼƤ롥

    ѷ:ѷ => ѷֹ\t[\tɤ߸]
    ѷ        => ѷ\tѷ\t ...

=head1 COPYRIGHT

Copyright (c) 2001 Satoshi Sato
Copyright (C) 2002 TSUCHIYA Masatoshi

=cut

use DB_File;
use POSIX qw/ O_RDONLY O_CREAT /;
use strict;

&main( @ARGV );

sub main {
    my( $source, $target ) = @_;

    $source ||= '/share/tool/juman/dic/JUMAN.katuyou';
    die "Error: No file $source at: " unless -f $source;

    $target ||= 'JUMAN.katuyou.db';
    die "Error: $target already exists at: " if -f $target;

    &mkkhash( $source, $target );
}

sub mkkhash {
    my( $source, $target ) = @_;
    print STDERR "mkkhash: $source -> $target ...\n";

    # Juman եɤ߹
    open( F, "< $source" ) or die "Can't read file($source): $!\n";
    my @line = <F>;
    close F;

    # Ȥ
    chomp @line;
    @line = grep { s/;.*$//; $_ } @line;

    # ꥹȤɤ߹
    my $list = &read_sexps( [ @line ] );

    # BerkeleyDB եؤν񤭹
    my %hash;
    tie( %hash, 'DB_File', $target, O_CREAT ) or die "Can't create file($target): $!\n";
    my $i;
    for( @$list ){
	$i++;
	my $type = shift @$_;
	my @form = ();
	print STDERR "$i: $type ";
	for( @{$_->[0]} ){
	    print STDERR ".";
	    my $form = shift @$_;
	    push(@form, $form);
	    my $key = "$type:$form";
	    my $value = join("\t", (scalar @form), @$_);
	    $hash{$key} = $value;
	}
	$hash{$type} = join("\t", $i, @form);
	print STDERR ' ', (scalar @form), "ѷ\n";
    }
    untie %hash;
}

# ʣSɤ߹
sub read_sexps {
    my( $buf ) = @_;
    my $list = [];

    while( @$buf ){
	my $x = &read_sexp( $buf );
	last unless $x;
	push( @$list, $x );
    }
    $list;
}

# Sɤ߹
sub read_sexp {
    my( $buf ) = @_;

    while( @$buf ){
	$buf->[0] =~ s/^(\s|)*//;		# 򥹥åפ
	last if $buf->[0];
	shift @$buf;
    }
    return '' unless @$buf;

    if( $buf->[0] =~ s/^\(// ){			# Ƭ
	&read_sexp_list( [], $buf );
    } else {
	$buf->[0] =~ s/^([^\(\)\s]*)//;
	my $x = $1;
	$x;
    }
}

sub read_sexp_list {
    my( $list, $buf ) = @_;

    while (@$buf) {
	while (@$buf) {
	    $buf->[0] =~ s/^(\s|)*//;		# 򥹥åפ
	    last if $buf->[0];
	    shift @$buf;
	}
	die "Syntax error: stopped" unless @$buf;
	return $list if $buf->[0] =~ s/^\)//;	# ƬĤ
	my $y = &read_sexp( $buf );
	die "Syntax error: stopped" unless $y;
	push( @$list, $y );
    }
    die "Syntax error: stopped";
}
