#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: andrius $
#$Date: 2019-01-21 09:23:55 +0200 (Pr, 21 saus. 2019) $
#$Revision: 6648 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v2.3/scripts/cif_validate $
#------------------------------------------------------------------------------
#*
#* Parse CIF file(s) and CIF dictionary(ies).
#* Check CIF file against CIF dictionaries.
#*
#* USAGE:
#*    $0 --dictionaries 'cif_core.dic,cif_cod.dic' --options input1.cif input*.cif
#**

use strict;
use warnings;
use File::Basename qw( basename );
use List::MoreUtils qw( any uniq );
use COD::CIF::Parser qw( parse_cif ) ;
use COD::CIF::DDL::Ranges qw( parse_range
                              range_to_string
                              is_in_range );
use COD::CIF::Tags::Manage qw( has_special_value has_numeric_value );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::CIF::DDL::Validate qw( check_enumeration_set );
use COD::SOptions qw( getOptions get_value );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw( process_warnings
                          process_parser_messages
                          report_message );
use COD::ToolsVersion;

# DDL1 core dictionary version 1.4.1
my %ddl1_enumeration_defaults = (
    '_list' => 'no',
    '_list_mandatory' => 'no',
    '_type_conditions' => 'no',
    '_type_construct' => '.*',
    '_list_level' => '1',
);

my @dict_files;
my $use_parser = 'c';
my $enum_as_set_tags = ['_atom_site_refinement_flags'];
my $ignore_case = 0;
my $report_local_tags = 0;
my $report_deprecated = 0;
my $debug = 0;

my $die_on_errors   = 1;
my $die_on_warnings = 0;
my $die_on_notes    = 0;

sub get_dict($$);
sub check_list_link_parent($$$);

#* OPTIONS:
#*   -d, --dictionaries 'cif_core.dic,cif_cod.dic'
#*                     A list of CIF dictionary files (according to DDL2)
#*                     to be used in CIF file validation. List elements
#*                     are separated either by ',' or by ' '. To include
#*                     dictionaries with filenames containing these symbols,
#*                     the --add-dictionary option is used.
#*   -D, --add-dictionary 'cif new dictionary.dic'
#*                     Add additional CIF dictionary to the list.
#*   --clear-dictionaries
#*                     Remove all CIF dictionaries from the list.
#*   --treat-as-set _atom_site_refinement_flags
#*                     Treat values of given data items as a set. For example,
#*                     more than one enumeration value could be defined
#*                     for a single element. Any number of data item tags can
#*                     be provided in the following way:
#*                     $0 --treat-as-set _tag_1 --treat-as-set _tag_2
#*                     Default is the '_atom_site_refinement_flags' data item.
#*   --no-treat-as-set
#*                     Do not treat values of any data items as sets.
#*                     (see --treat-as-set).
#*   --report-local-tags
#*                     Validate data items having tags prefixed with
#*                     the '_[local]' prefix. The prefix was used in the COD
#*                     to denote locally used data items prior to the creation
#*                     of cif_cod.dic. The default is to ignore these data items.
#*   --no-report-local-tags, --ignore-local-tags
#*                     Ignore data items having tags prefixed with '_[local]'
#*                     prefix. Default.
#*   --ignore-case
#*                     Ignore letter case while validating enumeration values.
#*                     For example, even though '_atom_site_adp_type' is
#*                     restricted to values ('Uani', 'Uiso', 'Uovl', ...),
#*                     value 'UANI' would still be treated as valid.
#*   --respect-case, --case-sensitive, --dont-ignore-case
#*                     Respect letter case while validating enumeration
#*                     values. Default.
#*   --report-deprecated
#*                     Report data items that are marked as deprecated in the
#*                     dictionaries. Data item deprecation usually means that
#*                     it has been replaced with an another data item(s).
#*   --ignore-deprecated
#*                     Do not report data items that are marked as deprecated
#*                     in the dictionaries (default).
#*   --use-perl-parser
#*                     Use Perl parser to parse CIF files.
#*   --use-c-parser
#*                     Use C parser to parse CIF files (default)
#*
#*   -c, --always-continue
#*                     Continue processing and return successful return status
#*                     even if errors are diagnosed.
#*   -c-, --always-die
#*                     Stop and return error status if errors are diagnosed.
#*   --continue-on-errors
#*                     Do not terminate script if errors are raised (default).
#*   --die-on-errors
#*                     Terminate script immediately if errors are raised.
#*   --continue-on-warnings
#*                     Do not terminate script if warnings are raised (default).
#*   --die-on-warnings
#*                     Terminate script immediately if warnings are raised.
#*   --continue-on-notes
#*                     Do not terminate script if notes are raised (default).
#*   --die-on-notes
#*                     Terminate script immediately if notes are raised.
#*   --debug
#*                     Output extra information for debugging purposes.
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   -v, --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    '-d,--dictionaries'    => sub{ @dict_files = split m/,|\s+/, get_value() },
    '-D,--add-dictionary'  => sub{ push @dict_files, get_value() },
    '--clear-dictionaries' => sub{ @dict_files = () },

    '--treat-as-set'                    => $enum_as_set_tags,
    '--no-treat-as-set'                 => sub{ $enum_as_set_tags = [] },

    '--ignore-case'                     => sub{ $ignore_case = 1 },
    '--dont-ignore-case,--respect-case' => sub{ $ignore_case = 0 },
    '--case-sensitive'                  => sub{ $ignore_case = 0 },

    '--report-local-tags'               => sub{ $report_local_tags = 1 },
    '--no-report-local-tags'            => sub{ $report_local_tags = 0 },
    '--ignore-local-tags'               => sub{ $report_local_tags = 0 },

    '--report-deprecated'               => sub{ $report_deprecated = 1 },
    '--ignore-deprecated'               => sub{ $report_deprecated = 0 },

    '--use-perl-parser'                 => sub{ $use_parser = 'perl' },
    '--use-c-parser'                    => sub{ $use_parser = 'c' },

    '-c,--always-continue'              => sub { $die_on_errors   = 0;
                                                 $die_on_warnings = 0;
                                                 $die_on_notes    = 0 },
    '-c-,--always-die'                  => sub { $die_on_errors   = 1;
                                                 $die_on_warnings = 1;
                                                 $die_on_notes    = 1 },

    '--continue-on-errors'          => sub { $die_on_errors = 0 },
    '--die-on-errors'               => sub { $die_on_errors = 1 },

    '--continue-on-warnings' => sub { $die_on_warnings = 0 },
    '--die-on-warnings'      => sub { $die_on_warnings = 1 },

    '--continue-on-notes'    => sub { $die_on_notes = 0 },
    '--die-on-notes'         => sub { $die_on_notes = 1 },

    '--options'         => sub{ options; exit },
    '--help,--usage'    => sub{ usage; exit; },
    '--debug'           => sub{ $debug = 1 },
    '-v,--version'      => sub { print 'cod-tools version ',
                                 $COD::ToolsVersion::Version, "\n";
                                 exit }
);

my $die_on_error_level = {
    'ERROR'   => $die_on_errors,
    'WARNING' => $die_on_warnings,
    'NOTE'    => $die_on_notes
};

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

# Reading dictionary files

my %dict_tags;
if( @dict_files ) {
    my $tag_count = 0;
    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    foreach my $dict ( @dict_files ) {
        my ( $data, $err_count, $messages ) = parse_cif( $dict, $options );
        process_parser_messages( $messages, $die_on_error_level );

        local $SIG{__WARN__} = sub { process_warnings( {
                                       'message'       => @_,
                                       'program'       => $0,
                                       'filename'      => $dict,
                                     }, $die_on_error_level ) };

        $dict_tags{$dict} = get_dict( $data, \%ddl1_enumeration_defaults );
        if ( scalar( keys %{$dict_tags{$dict}} ) == 0 ) {
            warn "no data item definitions found\n";
        }
        $tag_count += scalar( keys %{$dict_tags{$dict}} );
    }

    if( $tag_count == 0 ) {
        report_message( {
            'program'   => $0,
            'err_level' => 'ERROR',
            'message'   => 'no data item definitions were found in the '
                         . 'provided dictionary files '
                         . '(\'' . join( '\', \'', @dict_files ) . '\')'
        }, $die_on_errors );
    }
} else {
    report_message( {
        'program'   => $0,
        'err_level' => 'ERROR',
        'message'   => 'at least one dictionary file should be provided by '
                     . 'using the \'--dictionaries\' option. Automatic '
                     . 'dictionary download is not implemeted yet'
    }, $die_on_errors );
    my $dict_iucr_uri = 'ftp://ftp.iucr.org/pub/cif_core.dic';
}

# Iterating through the CIF files

@ARGV = ('-') unless @ARGV;

my $validation_options = {
    'report_deprecated' => $report_deprecated,
    'ignore_case'       => $ignore_case,
    'enum_as_set_tags'  => $enum_as_set_tags
};

for my $filename ( @ARGV ) {
    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );

    next if !defined $data;

    # convert all tags to a 'canonical' form
    canonicalize_all_names( $data );

    for my $block ( @{$data} ) {
        my $dataname = 'data_' . $block->{'name'};

        local $SIG{__WARN__} = sub {
            process_warnings( {
                'message'  => @_,
                'program'  => $0,
                'filename' => $filename,
                'add_pos'  => $dataname
            }, $die_on_error_level )
        };

        my @tag_value_notes;
        for my $dict_f ( sort keys %dict_tags ) {
            push @tag_value_notes,
                 @{ validate_data_block( $block, $dict_tags{$dict_f},
                                         $validation_options ) };
        };

        push @tag_value_notes,
             @{ report_unrecognised_data_names( $block, \%dict_tags,
                                                $report_local_tags ) };

        my %tag_value_note_count;
        foreach (@tag_value_notes) {
            $tag_value_note_count{$_}++
        }

        my $print_note_count = 1;
        foreach my $note (sort keys %tag_value_note_count) {
            warn "NOTE, $note" .
                 ( $print_note_count && $tag_value_note_count{$note} > 1 ?
                   " ($tag_value_note_count{$note} times)\n" : "\n" );
        }
    }
}

##
# Extracts data items from dictionary (parsed using COD::CIF::Parser).
# @param $dict_f
#       Reference to COD::CIF::Parser output CIF object.
# @param $ddl1_defaults
#       Reference to a hash containing default values of data items
#       that appears in DDL1 data item definitions.
# @return
#       Hash of tags and related references to parsed data blocks.
##
sub get_dict($$)
{
    my ($dict_f, $ddl1_defaults) = @_;
    my $tags = {};
    my $data_block = $dict_f->[0];
    if ( @{$data_block->{'save_blocks'}} ) {
        # DDL2 and DDLm data item definitions are stored in save frames.
        my $values = $data_block->{values};
        if( exists $values->{'_dictionary.ddl_conformance'} &&
            $values->{'_dictionary.ddl_conformance'}[0] =~ /^3[.]/ ) {
            warn 'dictionary is DDLm-compliant and can not be handled ' .
                 'in this version' . "\n";
        }

        for my $saveblk ( @{$data_block->{save_blocks}} ) {
            next if !exists $saveblk->{values}{'_item.name'};
            for ( @{$saveblk->{values}{'_item.name'}} ) {
                $tags->{lc $_} = $saveblk;
                $tags->{lc $_}{values}{_dataname} = $_;
            }
        }
    } else {
        # DDL1 data item definitions are stored in data blocks.
        $tags = get_ddl1_dict($dict_f, $ddl1_defaults);
    }

    return $tags;
}

##
# Builds a dictionary structure from a parsed DDL1 dictionary.
# @param $dict_data_blocks
#       Reference to DDL1 dictionary structure as returned by the
#       COD::CIF::Parser. Normally, a DDL1 dictionary consists of
#       multiple data blocks each defining a data category or a
#       data item.
# @param $ddl1_defaults
#       Reference to a hash containing default values of data items
#       that appears in DDL1 data item definitions.
# @return
#       Reference to a hash containing data item definitions.
##
sub get_ddl1_dict
{
    my ($dict_data_blocks, $ddl1_defaults) = @_;

    my %definitions;
    for my $data_block (@{$dict_data_blocks}) {
        # category definitions usually do no contain the '_type' data item
        next if !exists $data_block->{'values'}{'_type'};
        $data_block = add_default_data_items( $data_block, $ddl1_defaults );
        for ( map { lc } @{$data_block->{'values'}{'_name'}} ) {
            $definitions{$_} = $data_block;
            $definitions{$_}{values}{'_dataname'} = $data_block->{'name'};
        }
    }

    return \%definitions;
}

##
# Adds data items with the default values to the given data frame
# if they are not already present in the data frame.
# @param $data_block
#       Reference to data block or a save frame as returned by the
#       COD::CIF::Parser that should be modified.
# @param $ddl1_defaults
#       Reference to a hash containing default values of data items
#       that appears in DDL1 data item definitions.
# @return
#       Reference to the data frame with the default data items added.
##
sub add_default_data_items
{
    my ($data_block, $default_values) = @_;

    for my $tag ( keys %{$default_values} ) {
        if ( !exists $data_block->{'values'}{$tag} ) {
            $data_block->{'values'}{$tag} = [ $default_values->{$tag} ];
        }
    }

    return $data_block;
}

sub validate_data_block
{
    my ( $data_block, $dict, $options ) = @_;

    $options = {} if !defined $options;
    my $report_deprecated = exists $options->{'report_deprecated'} ?
                                   $options->{'report_deprecated'} : 0;
    my $ignore_case       = exists $options->{'ignore_case'} ?
                                   $options->{'ignore_case'} : 0;
    my $enum_as_set_tags  = exists $options->{'enum_as_set_tags'} ?
                                   $options->{'enum_as_set_tags'} : [];

    my @validation_messages;
    push @validation_messages,
         @{ validate_block_loop_reference_items($data_block, $dict) };

    for my $tag ( @{$data_block->{'tags'}} ) {
        my $lc_tag = lc $tag;

        next if !exists $dict->{$lc_tag};

        if( $report_deprecated ) {
            push @validation_messages,
                 @{ report_deprecated( $data_block, $tag, $dict ) }
        };

        my $dict_item = $dict->{$lc_tag}{values};

        push @validation_messages,
             @{ validate_list_mandatory( $data_block, $tag, $dict_item ) };

        push @validation_messages,
             @{ check_list_link_parent( $data_block, $tag, $dict_item ) };

        push @validation_messages,
             @{ validate_enumeration_set(
                    $data_block, $tag, $dict_item,
                    {
                        'ignore_case'  => $ignore_case,
                        'treat_as_set' => any { lc($_) eq $lc_tag }
                                                        @{$enum_as_set_tags}
                    }
             ) };

       push @validation_messages,
            @{ validate_su( $data_block, $tag, $dict_item ) };

       push @validation_messages,
            @{ validate_range( $data_block, $tag, $dict_item ) };

       push @validation_messages,
            @{ validate_data_type( $data_block, $tag, $dict_item ) };

       # TODO: move this validation step to the ddl2_validate_data_block
       # subroutine once it is created
       push @validation_messages,
            @{ ddl2_validate_data_type( $data_block, $tag, $dict_item )};
    }

    return \@validation_messages;
}

##
# Returns an array of tags of data items that have superseded the data item.
# @param $dict
#       Reference to a dictionary object as returned by the 'get_dict()'
#       subroutine.
# @param $tag
#       Lowercased name of the data item.
# @return
#       Array of tags of data items that have superseded the data item.
##
sub get_replacement_tags
{
    my ( $dict, $tag ) = @_;

    return [] if !exists $dict->{$tag};
    my $dict_item = $dict->{$tag}{'values'};
    return [] if !exists $dict_item->{'_related_item'};

    my @replace_with;
    # check if data items are deprecated (replaced with other data items)
    for( my $i = 0; $i < @{$dict_item->{'_related_item'}}; $i++ ) {
        if( $dict_item->{'_related_function'}[$i] eq 'replace' ) {
            push @replace_with, $dict_item->{'_related_item'}[$i];
        }
    }

    return \@replace_with;
}

##
# Returns an array of tags of the data items that are required to be present
# in the loop containing the analyzed data item.
# @param $dict
#       Reference to a dictionary object as returned by 'get_dict' subroutine.
# @param $tag
#       Lowercased name of the data item to analyze.
# @return $list_reference_tags
#       A reference to an array of tags of data items that are required to
#       be present in the loop containing the analyzed data items.
##
sub get_list_reference_tags
{
    my ( $dict, $tag ) = @_;

    return [] if !exists $dict->{$tag};
    my $dict_item = $dict->{$tag}{values};
    return [] if !exists $dict_item->{'_list_reference'};

    my @list_reference_tags;
    # _list_reference identifies data item(s) that must collectively be
    # in a loop. They are referenced by the names of their data blocks
    for my $ref_dataname (@{$dict_item->{'_list_reference'}}) {
      for my $dict_tag ( sort keys %{$dict} ) {
          if ( '_' . $dict->{$dict_tag}{values}{'_dataname'} eq $ref_dataname ) {
              push @list_reference_tags, $dict_tag;
          }
      }
    }

    return \@list_reference_tags;
}

##
# Checks the existence of parent (foreign) keys as specified by a DDL1 dictionary.
# @param $data_block
#       Data frame that should be validated as returned by the CIF::COD::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dict_item
#       Dictionary definition of the validated data item as returned by
#       get_dict() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub check_list_link_parent($$$)
{
    my ( $block, $tag, $dict_item ) = @_;

    return [] if !exists $dict_item->{'_list_link_parent'};
    my $parents = $dict_item->{'_list_link_parent'};

    # TODO: not handled yet, unsure how to do that
    return [] if @{$parents} > 1;

    my $parent = $parents->[0];
    return [] if !exists $block->{values}{$parent};

    my %parent_values = map { $_ => 1 } @{$block->{values}{$parent}};

    my @unmatched = uniq sort grep { !exists $parent_values{$_} }
                         @{$block->{values}{$tag}};

    my @validation_messages;
    for my $value (@unmatched) {
        # FIXME: these special CIF values should be handled properly
        # by taking their quotation into account
        next if ( $value eq '.' || $value eq '?' );
        push @validation_messages,
             "data item '$tag' contains value '$value' that was not found " .
             "among the values of the parent data item '$parent'";
    }

    return \@validation_messages;
}

##
# Checks enumeration values against a DDL1 dictionary.
# @param $data_block
#       Data frame that should be validated as returned by the CIF::COD::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dict_item
#       Dictionary definition of the validated data item as returned by
#       get_dict() subroutine.
# @param $options
#       Reference to a hash of options. The following options are recognised:
#       {
#       # Ignore the case while matching enumerators
#           'ignore_case'  => 0
#       # Treat data values as potentially consisting of a
#       # combination of several enumeration values
#           'treat_as_set' => 0
#       }
# @return
#       Array reference to a list of validation messages.
##
sub validate_enumeration_set
{
    my ($data_block, $tag, $dict_item, $options) = @_;

    return [] if !exists $dict_item->{'_enumeration'};
    my $enum_set = $dict_item->{'_enumeration'};

    my @values;
    for ( my $i = 0; $i < @{$data_block->{'values'}{$tag}}; $i++ ) {
        next if has_special_value($data_block, $tag, $i);
        push @values, $data_block->{'values'}{$tag}[$i];
    }

    my @messages;
    my $is_proper_enum = check_enumeration_set( \@values, $enum_set, $options );
    for ( my $i = 0; $i < @{ $is_proper_enum }; $i++ ) {
        if ( $is_proper_enum->[$i] ) {
            push @messages,
                "data item '$tag' value '$values[$i]' must be one of the "
              . 'enumeration values [' . ( join ', ', @{$enum_set} ) . ']';
        }
    };

    return \@messages;
}

##
# Checks values with standard uncertainties against a DDL1 dictionary.
# @param $data_block
#       Data frame that should be validated as returned by the CIF::COD::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dict_item
#       Dictionary definition of the validated data item as returned by
#       get_dict() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub validate_su
{
    my ( $data_block, $tag, $dict_item ) = @_;

    return [] if is_su_permitted($dict_item);

    my @validation_messages;
    for (my $i = 0; $i < @{$data_block->{'values'}{$tag}}; $i++) {
        next if  has_special_value($data_block, $tag, $i);
        next if !has_numeric_value($data_block, $tag, $i);

        my $value = $data_block->{'values'}{$tag}[$i];
        if ( $value =~ /([(]\d+[)])$/ ) {
        push @validation_messages,
             "data item '$tag' value '$value' is not permitted to " .
             'contain standard uncertainty \'' . $1 . '\' -- '
           . 'standard uncertainty will be ignored in further validation';
        }
    }

    return \@validation_messages;
}

##
# Evaluates if the DDL1 dictionary definition permits data item values
# to contain standard uncertainty values.
# @param $dict_item
#       Dictionary definition of the data item as returned by get_dict()
#       subroutine.
# @return
#       1 is the s.u. value is permitted, 0 otherwise.
##
sub is_su_permitted
{
    my ( $dict_item ) = @_;

    return 1 if !exists $dict_item->{'_type'};
    return 1 if $dict_item->{'_type'}[0] ne 'numb';

    return any { $_ eq 'esd' || $_ eq 'su' } @{$dict_item->{'_type_conditions'}};
}

##
# Checks if values are within the range specified by a DDL1 dictionary.
# @param $data_block
#       Data frame that should be validated as returned by the CIF::COD::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dict_item
#       Dictionary definition of the validated data item as returned by
#       get_dict() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub validate_range
{
    my ( $data_block, $tag, $dict_item ) = @_;

    return [] if !exists $dict_item->{'_enumeration_range'};

    my $range = parse_range($dict_item->{'_enumeration_range'}[0]);
    my $range_type = $dict_item->{'_type'}[0];
    my $is_su_permitted = is_su_permitted($dict_item);

    my @validation_messages;
    for (my $i = 0; $i < @{$data_block->{'values'}{$tag}}; $i++) {
        next if has_special_value($data_block, $tag, $i);
        next if !has_numeric_value($data_block, $tag, $i) &&
                $range_type eq 'numb';

        my $value = $data_block->{'values'}{$tag}[$i];
        if ( $range_type eq 'numb' ) {
            $value =~ s/[(]\d+[)]$//;
        }

        my $su;
        if( $is_su_permitted ) {
            $su = $data_block->{'precisions'}{$tag}[$i]
        }

        if( is_in_range( $value,
                { 'type'  => $range_type,
                  'range' => $range,
                  'sigma' => $su, } ) <= 0 ) {
            push @validation_messages,
                 "data item '$tag' value '" .
                 $data_block->{'values'}{$tag}[$i] .
                 '\' should be in range ' .
                 range_to_string( $range, { 'type' => $range_type } );
        }
    }

    return \@validation_messages;
}

##
# Checks if values satisfy the DDL1 data type constraints.
# @param $data_block
#       Data frame that should be validated as returned by the CIF::COD::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dict_item
#       Dictionary definition of the validated data item as returned by
#       get_dict() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub validate_data_type
{
    my ( $data_block, $tag, $dict_item ) = @_;

    return [] if !$dict_item->{'_type'};
    return [] if  $dict_item->{'_type'}[0] ne 'numb';

    my @validation_messages;
    for ( my $i = 0; $i < @{$data_block->{'values'}{$tag}}; $i++ ) {
        next if has_special_value($data_block, $tag, $i);
        next if has_numeric_value($data_block, $tag, $i);
        push @validation_messages,
            "data item '$tag' value '" . $data_block->{'values'}{$tag}[$i] .
            '\' is of type \'' . $data_block->{'types'}{$tag}[$i] .
            '\' while it should be numeric, i.e. \'FLOAT\' or \'INT\'';
    }

    return \@validation_messages;
}

##
# Checks if values satisfy the DDL2 data type constraints.
# @param $data_frame
#       Data frame that should be validated as returned by the CIF::COD::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dict_item
#       Dictionary definition of the validated data item as returned by
#       get_dict() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub ddl2_validate_data_type
{
    my ( $data_frame, $tag, $dict_item ) = @_;

    # FIXME: the DDL2 data type validation is much more complex than
    # assumed in the current implementation. For example, the basic
    # data type are described in the DDL2 dictionary using regular
    # expressions, but these data types can be extended or even overridden
    # in any other DDL2 dict
    return [] if !$dict_item->{'_item_type.code'};
    return [] if  $dict_item->{'_item_type.code'}[0] ne 'float' &&
                  $dict_item->{'_item_type.code'}[0] ne 'int';

    my @validation_messages;
    for ( my $i = 0; $i < @{$data_frame->{'values'}{$tag}}; $i++ ) {
        next if has_special_value($data_frame, $tag, $i);
        next if has_numeric_value($data_frame, $tag, $i);
        push @validation_messages,
            "data item '$tag' value '" . $data_frame->{'values'}{$tag}[$i] .
            '\' is of type \'' . $data_frame->{'types'}{$tag}[$i] .
            '\' while it should be numeric, i.e. \'FLOAT\' or \'INT\'';
    }

    return \@validation_messages;
}

##
# Checks if data names are defined in at least one of the given dictionaries.
# @param $data_block
#       Data frame that should be validated as returned by the CIF::COD::Parser.
# @param $dicts
#       Reference to a hash of dictionaries as returned by the
#       get_dict() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub report_unrecognised_data_names
{
    my ($data_block, $dicts, $report_local_tags) = @_;

    my @validation_messages;

    my @tags = sort @{$data_block->{'tags'}};
    if ( !$report_local_tags ) {
        @tags = grep { $_ !~ m/^_\[local\]/ } @tags;
    }

    for my $dict ( values %{$dicts} ) {
        @tags = grep { !exists $dict->{lc $_} } @tags;
    }

    @validation_messages = map {
              "data item '$_' was not found in the provided dictionaries";
          } @tags;

    return \@validation_messages;
}

##
# Checks if data block loops contain reference data items as specified
# by a DDL1 dictionary.
# @param $data_block
#       Data frame that should be validated as returned by the CIF::COD::Parser.
# @param $dict
#       Reference to a DDL1 dictionary structure as returned by the
#       get_dict() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub validate_block_loop_reference_items
{
    my ($data_block, $dict) = @_;

    my @validation_messages;
    for my $loop ( @{$data_block->{'loops'}} ) {
        push @validation_messages,
             @{ validate_loop_reference_items( $loop, $dict ) };
    }

    return \@validation_messages;
}

##
# Checks if a loop contains reference data items that together act as a
# primary loop key as specified by a DDL1 dictionary.
# @param $loop_tags
#       Reference to an array of data names residing in a loop.
# @param $dict
#       Reference to a DDL1 dictionary structure as returned by the
#       get_dict() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub validate_loop_reference_items
{
    my ($loop_tags, $dict) = @_;

    my @reported_key;
    my @validation_messages;
    for my $tag ( map { lc } @{$loop_tags} ) {
        next if !exists $dict->{$tag};
        for my $ref_tag ( @{ get_list_reference_tags($dict, $tag) } ) {
            next if any { $_ eq $ref_tag } @reported_key;
            next if any { lc $_ eq $ref_tag } @{$loop_tags};

            push @reported_key, $ref_tag;
            push @validation_messages,
                 "data item '$ref_tag' is mandatory in loop when " .
                 'data item(s) [' .
                 ( join ', ', map {"'$_'"}
                        @{$dict->{$tag}{'values'}{'_name'}} ) .
                 '] are defined, but was not found';
        }
    }

    return \@validation_messages;
}

##
# Checks if a data item reside in a correct loop context as specified
# by a DDL1 dictionary.
# @param $data_block
#       Data frame that should be validated as returned by the CIF::COD::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dict_item
#       Dictionary definition of the validated data item as returned by
#       get_dict() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub validate_list_mandatory
{
    my ( $data_block, $tag, $dict_item ) = @_;

    return [] if !exists $dict_item->{'_list'};

    my @validation_messages;
    if ( !exists $data_block->{'inloop'}{$tag} ) {
        if ( $dict_item->{'_list'}[0] eq 'yes' ) {
            push @validation_messages,
                 "data item '$tag' must appear in a loop";
        }
    } elsif ( $dict_item->{'_list'}[0] eq 'no' ) {
        push @validation_messages,
             "data item '$tag' must not appear in a loop";
    }

    return \@validation_messages;
}

##
# Checks if a data item is deprecated as specified by a DDL1 dictionary.
# Cases when both the replaced and the replacing data item reside in the
# same data block are also reported.
# @param $data_block
#       Data frame that should be validated as returned by the CIF::COD::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dict
#       Reference to a DDL1 dictionary structure as returned by the
#       get_dict() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub report_deprecated
{
    my ($data_block, $tag, $dict) = @_;

    my $replacement_tags = get_replacement_tags($dict, lc $tag);
    return [] if !@{$replacement_tags};

    my @validation_messages;

    push @validation_messages,
         "data item '$tag' has been replaced by data item(-s) [" .
         join(', ', map {"'$_'"} @{$replacement_tags}) . ']';

    my @existing_replacement_tags =
        grep { exists $data_block->{values}{$_} } @{$replacement_tags};
    if( @existing_replacement_tags ) {
        push @validation_messages,
             "both the replaced data item '$tag' and the replacing " .
             'data item(-s) [' .
             join( ', ', map {"'$_'"} @{$replacement_tags}) .
             '] appear in the same data block';
    }

    return \@validation_messages;
}
