#!/bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#-----------------------------------------------------------------------
#$Author: antanas $
#$Date: 2017-05-24 20:10:55 +0300 (Tr, 24 geg. 2017) $ 
#$Revision: 5322 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v2.3/scripts/cif_split_primitive $
#------------------------------------------------------------------------
#*
#* Split CIF files into separate files with one data_ section each.
#*
#* This is a very naive and primitive version of the splitter, which
#* expects each data_... section to start on a new line. It may fail on
#* some CIF files that do not follow such convention. For splitting of
#* any correctly formatted CIF files, one must do full CIF parsing
#* using CIF grammar and tokenisation of the file.
#*
#* USAGE:
#*    $0 --options input1.cif input*.cif
#**

use strict;
use warnings;
use File::Basename qw( basename );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw( report_message );
use COD::ToolsVersion;

my $die_on_error_level = {
    ERROR   => 1,
    WARNING => 0,
    NOTE    => 0
};

my $verbose = 1;
my $output_dir = '';

#* OPTIONS:
#*   -o, --output-dir out/
#*                     Put all split files into the directory out/
#*                     (default './').
#*   -v, --verbose
#*                     Print names of the generated files to STDERR.
#*   -q, --quiet
#*                     Do not print file names to STDERR.
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
   "-o,--output-dir" => \$output_dir,
   "-v,--verbose"    => sub { $verbose = 1 },
   "-q,--quiet"      => sub { $verbose = 0 },
   "--options"       => sub { options; exit },
   "--help,--usage"  => sub { usage; exit },
    '--version'         => sub { print 'cod-tools version ',
                                 $COD::ToolsVersion::Version, "\n";
                                 exit }
);

$output_dir =~ s./+$..;

my $output_file;
my @initial_comments = ();
my @data_global = ();
my %files = ();
my %has_end = ();
my $global = 0;
my $skip = 1;
my $initial_comments = 1;
my $data_id;

while(<>) {

    if( /^\s*\#|^\s*$/ && $initial_comments ) {
        push @initial_comments, $_;
        next;
    }

    $initial_comments = 0;

    if( /^\s*data_global\s*$/ || /^\s*data_global\s+/ ) {
        if( int(@data_global) == 0 ) {
            push @data_global, $_;
            $global = 1;
            $skip = 0;
        } else {
            report_message( {
                'program'   => $0,
                'err_level' => 'WARNING',
                'message'   => 'second data_global encountered -- skipping'
            }, $die_on_error_level->{'WARNING'} );
            $global = 1;
            $skip = 1;
        }
        next;
    }

    if( /^\s*data_([^\s]+)/ ) {
        $data_id = $1;
        $data_id =~ s/^data_//;

        $global = 0;
        $skip = 0;

        close STDOUT or die "$0: STDOUT: ERROR, could not close the STDOUT\n";

        my $suffix = $data_id;
        $suffix =~ s/[^-+._a-zA-Z0-9]/_/g;

        my $basename = basename( $ARGV, '.cif' );
        if( $basename ne '-' ) {
            $output_file = basename( $ARGV, '.cif' ) . "_${suffix}" . '.cif';
        } else {
            $output_file = $suffix . '.cif';
        }
        if( $output_dir ne '' ) {
            $output_file = $output_dir . '/' . $output_file;
        }

        if( !exists $files{$output_file} ) {
            open STDOUT, '>', "$output_file" or
                die "$0: $output_file: ERROR, could not open file for "
                  . 'writing -- ' . lcfirst($!) . "\n";

            $files{$output_file} = $output_file;

            print STDERR "$output_file\n" if $verbose;
            for ( @initial_comments ) {
                print;
            }
            for ( @data_global ) {
                print;
            }
        } else {
            open STDOUT, '>>', "$output_file" or
                die "$0: $output_file: ERROR, could not open file for"
                  . 'appending -- ' . lcfirst($!) . "\n";

            print STDERR "$output_file (appending)\n" if $verbose;
        }
    }

    if( /^\s*\#=+END/ ) {
        $has_end{$output_file} = 1;
    }

    if( $global == 1 && $skip == 0 ) {
        push @data_global, $_;
        next;
    }

    if( $skip == 1 ) {
        next;
    }

    print;

    if( eof ARGV ) {
        $output_file = undef;
        @data_global = ();
        @initial_comments = ();
        $global = 0;
        $skip = 1;
        $initial_comments = 1;
    }
}

close STDOUT or die "$0: $output_file: ERROR, could not close file after "
                  . 'reading -- ' . lcfirst($!) . "\n";
## foreach $output_file (keys %files) {
##     unless( defined $has_end{$output_file} ) {
##         open( STDOUT, ">>$output_file" ) or
##             die( "Could not open file '${output_file}' for appending: $!" );
##         print "#===END\n";
##         close STDOUT;
##     }
## }
