# tagline-funs.awk -- AWK functions used for manipulating arch taglines
#
#  Copyright (C) 2003, 2004  Miles Bader <miles@gnu.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Written by Miles Bader <miles@gnu.org>

BEGIN {
  # Filename to file-type rules

  tagline_type[9, "ext", "am"]			= "automake"
  tagline_type[9, "ext", "ac"]			= "autoconf"
  tagline_type[9, "name", "configure\\.in"]	= "autoconf"

  # Override shell-script recognition
  tagline_type[9, "name", "configure"]		= "explicit"

  # There's no way of inserting an automatically deleted comment in
  # autoconf input files, so they end up clashing with the resulting
  # generated file.  Thus we must use explicit tags (sigh).
  tagline_type[9, "ext", "in"]			= "explicit"

  tagline_type[10, "name", "\\.arch-inventory"] = "sh"
  tagline_type[10, "name", "ChangeLog.*"] 	= "lisp"
  tagline_type[10, "name", "[Mm]akefile.*"] 	= "sh"
  tagline_type[10, "name", "\\.gdbinit.*"] 	= "sh"
  tagline_type[10, "name", "Imakefile"]		= "c" # run through cpp
  tagline_type[10, "name", "texinfo\\.tex"]	= "texi" # texinfo, not tex

  tagline_type[10, "ext", "c"] 		= "c"	 #:  /* arch-tag: ...\n  ... */
  tagline_type[10, "ext", "h"] 		= "c"
  tagline_type[10, "ext", "s"] 		= "c"	 # fed through cpp

  tagline_type[10, "ext", "c\\+\\+"]	= "c++"	 #:  // arch-tag: ...
  tagline_type[10, "ext", "cc"]		= "c++"
  tagline_type[10, "ext", "cxx"]	= "c++"
  tagline_type[10, "ext", "cpp"]	= "c++"
  tagline_type[10, "ext", "C"]		= "c++"
  tagline_type[10, "ext", "CC"]		= "c++"
  tagline_type[10, "ext", "h\\+\\+"]	= "c++"
  tagline_type[10, "ext", "hh"]		= "c++"
  tagline_type[10, "ext", "hxx"]	= "c++"
  tagline_type[10, "ext", "hpp"]	= "c++"
  tagline_type[10, "ext", "H"]		= "c++"
  tagline_type[10, "ext", "HH"]		= "c++"

  tagline_type[10, "ext", "el"] 	= "lisp" #:  ;; arch-tag: ...
  tagline_type[10, "ext", "l"] 		= "lisp"

  tagline_type[10, "ext", "pas"]        = "pascal" #:  (* arch-tag: ...\n  ... *)
  tagline_type[10, "ext", "dpr"]        = "pascal"
  tagline_type[10, "ext", "pp"]         = "pascal"

  tagline_type[10, "ext", "sh"] 	= "sh" 	 #:  # arch-tag: ...
  tagline_type[10, "ext", "bash"] 	= "sh"
  tagline_type[10, "ext", "csh"] 	= "sh"
  tagline_type[10, "ext", "sed"] 	= "sh"
  tagline_type[10, "ext", "awk"] 	= "sh"
  tagline_type[10, "ext", "perl"] 	= "sh"
  tagline_type[10, "ext", "pl"] 	= "sh"	 # perl
  tagline_type[10, "ext", "py"] 	= "sh"	 # python
  tagline_type[10, "ext", "tit"] 	= "sh"	 # by examination
  tagline_type[10, "ext", "inp"] 	= "sh"	 # by examination

  tagline_type[10, "ext", "m4"] 	= "m4"

  tagline_type[10, "ext", "tex"] 	= "tex"  #:  % arch-tag: ...
  tagline_type[10, "ext", "sty"] 	= "tex"
  tagline_type[10, "ext", "erl"] 	= "tex"  # erlang
  tagline_type[10, "ext", "hrl"] 	= "tex"

  tagline_type[10, "ext", "texi"] 	= "texi" #:  @c arch-tag: ...
  tagline_type[10, "ext", "texinfo"] 	= "texi"

  tagline_type[10, "ext", "[1-9]"] 	= "roff" #:  .\" arch-tag: ...

  tagline_type[10, "ext", "html"]	= "html" #:  <!-- arch-tag: ...\n -->
  tagline_type[10, "ext", "xml"] 	= "html"

  tagline_type[10, "ext", "bat"] 	= "bat"  #:  rem arch-tag: ...

  tagline_type[10, "ext", "pov"]	= "c++"	 # povray scene files

  # various script magic numbers all map to "sh"
  tagline_type[10, "header", "#! *[^ ]*/[a-z]*(sh|awk|perl)( .*)?"] = "sh"

  ## File-type tagline conventions

  file_type_tagline["sh"] 	= "# arch-tag: %s"
  file_type_tagline["c"] 	= "/* arch-tag: %s\n   (do not change this comment) */"
  file_type_tagline["c++"] 	= "// arch-tag: %s"
  file_type_tagline["pascal"] 	= "(* arch-tag: %s\n   (do not change this comment) *)"
  file_type_tagline["html"] 	= "<!-- arch-tag: %s\n     (do not change this comment) -->"
  file_type_tagline["lisp"] 	= ";; arch-tag: %s"
  file_type_tagline["null"] 	= "arch-tag: %s"
  file_type_tagline["roff"] 	= ".\\\" arch-tag: %s"
  file_type_tagline["tex"] 	= "%% arch-tag: %s"

  # For the following types, the most natural comment syntaxes run
  # afoul of the rule that arch-tag: must only be preceded by
  # punctuation, so various workarounds are used instead; hopefully
  # this problem will be fixed with the switch to arch-id:.

  #file_type_tagline["m4"]	= "dnl arch-tag: %s"
  file_type_tagline["m4"]	= "ifelse(dnl\tDo not change this comment\n   arch-tag: %s\n)dnl"
  #file_type_tagline["autoconf"] = "dnl arch-tag: %s"
  file_type_tagline["autoconf"]	= "m4_if(dnl\tDo not change this comment\n   arch-tag: %s\n)dnl"
  file_type_tagline["automake"] = "## arch-tag: %s"

  #file_type_tagline["texi"] 	= "@c arch-tag: %s"
  file_type_tagline["texi"] 	= "@ignore\n   arch-tag: %s\n@end ignore"

  #file_type_tagline["bat"] 	= "rem arch-tag: %s"
  file_type_tagline["bat"] 	= "goto skipArchTag\n   arch-tag: %s\n:skipArchTag"

  # Should be a bit loose
  file_type_end_marker_re["lisp"] = ";; .* ends here"
  file_type_end_marker_re["sh"]   = "# .* ends here"
}

function init_tagline_rules(  type,level,kind,regexp,idx,parts,line)
{
  if (! _tagline_rules_initialized) {
    ## Initialization

    # Read project tagline rules file
    #
    # The syntax is:
    #
    #       KIND[.LEVEL]  REGEXP  TYPE
    #  or:  tagline       TYPE    FORMAT
    #  or:  end-marker    TYPE    REGEXP
    #  or:  # COMMENT
    #
    # where KIND is "name" to match whole filenames, "ext" to match file
    # extensions, "header" to match the first line of the file, and "dir" to
    # match directories (in that order of priority).  LEVEL is a priority,
    # where lower levels are search first; default rules are level 9 or 10,
    # and user rules with no explicit priority are level 5.
    #
    # REGEXP is an egrep-style (`extended') regular expression used for
    # matching (within the context of KIND), and a positive match means a
    # tagline of type TYPE is used.
    #
    # FORMAT should contain the actual tagline to add for TYPE, with a
    # single %s where the actual tag value should be substituted (any other
    # occurances of % should be escaped by doubling them percent, e.g %%).
    #
    # Both REGEXP and FORMAT may contain the following special backslash
    # escape sequences: \n \t \s \\  (\s means a space)
    # other occurrences of backslash are left unchanged.
    #
    while ((getline line < "{arch}/=tagline-rules") > 0)
      if (line !~ /^ *(#.*)?$/) {
	split (line, parts)
	if (parts[1] == "tagline") {
	  sub (/^tagline[ \t]*[^ \t]*[ \t]*/, "", line)
	  file_type_tagline[parts[2]] = _file_tagline_unesc(line)
	} else if (parts[1] == "end-marker") {
	  sub (/^end-marker[ \t]*[^ \t]*[ \t]*/, "", line)
	  file_type_end_marker[parts[2]] = _file_tagline_unesc(line)
	} else {
	  level = 5
	  kind = parts[1]
	  regexp = _file_tagline_unesc(parts[2])
	  type = parts[3]
	  if (kind ~ /[.]/) {
	    split (kind, parts, /[.]/)
	    kind = parts[1]
	    level = parts[2] + 0
	  }
	  tagline_type[level, kind, regexp] = type
	}
      }
    close ("{arch}/=tagline-rules")

    _tagline_rules_initialized = 1
  }
}

function _file_tagline_init(  type,level,kind,regexp,idx,parts,line)
{
  if (! _file_tagline_initialized) {
    init_tagline_rules()

    for (idx in tagline_type) {
      split (idx, parts, SUBSEP)
      level = parts[1]
      kind = parts[2]
      regexp = parts[3]

      if (level > _file_tagline_max_level)
	_file_tagline_max_level = level
      _file_tagline_levels[level] = 1

      type = tagline_type[idx]
      if ((level, kind, type) in _file_tagline_type_re) {
	if (kind == "ext")
	  regexp = ".*\\." regexp
	regexp = _file_tagline_type_re[level, kind, type] "|" regexp
      } else if (kind == "ext")
	regexp = "^(.*\\." regexp
      else if (kind == "dir")
	regexp = "(^|/)(" regexp
      else
	regexp = "^(" regexp

      _file_tagline_type_re[level, kind, type] = regexp

      _file_tagline_types[type] = 1
    }

    for (idx in _file_tagline_type_re)
      _file_tagline_type_re[idx] = _file_tagline_type_re[idx] ")$"

    _file_tagline_initialized = 1
  }
}

function _file_tagline_unesc(string)
{
  gsub (/\\\\/, "\\q", string)  # Change \\ to \q to avoid confusion below
  gsub (/\\n/, "\n", string)	# Do substitutions
  gsub (/\\t/, "\t", string)
  gsub (/\\s/, " ", string)
  gsub (/\\q/, "\\", string)	# Finally change \q into plain \
  return string
}

function _file_tagline_find_type(string, level, kind  ,type)
{
  for (type in _file_tagline_types)
    if ((level, kind, type) in _file_tagline_type_re)
      if (string ~ _file_tagline_type_re[level, kind, type])
	return type
  return 0
}

function _file_tagline_find_header_type(file, level  ,type,header)
{

  for (type in _file_tagline_types)
    if ((level, "header", type) in _file_tagline_type_re) {
      if (! header)
	header = file_first_line(file)
      if (! header)
	return 0
      if (header ~ _file_tagline_type_re[level, "header", type])
	return type
    }
  return 0
}

# Return the `tagline type' of a file, which determines what commenting
# conventions to use for adding a tagline, or 0 if no tagline should be used
function file_tagline_type(file  ,base_name,dir,type,level)
{
  _file_tagline_init()

  if (file ~ /\//) {
    dir = file
    sub (/\/[^\/]*$/, "", dir)
    base_name = file
    sub (/.*\//, "", base_name)
  } else {
    dir = "."
    base_name = file
  }

  type = 0
  for (level = 0; level <= _file_tagline_max_level && !type; level++)
    if (level in _file_tagline_levels) {
      type = _file_tagline_find_type(base_name, level, "name")
      if (! type)
	type = _file_tagline_find_type(base_name, level, "ext")
      if (! type)
	type = _file_tagline_find_header_type(file, level)
      if (! type)
	type = _file_tagline_find_type(dir, level, "dir")
    }

  if (type == "explicit")
    type = 0

  return type
}

# arch-tag: e20e538e-6101-4bc6-a79d-7afc472eea3d
