#!/usr/bin/perl
# $Id: $
# Written by Adrian Mariano, additional features by Eric Backus and
# Jeff Conrad.

# Script to translate a texinfo file into an nroff/troff manual page.
# last revision: 15 October 2018 Jeff Conrad

$version="1.01u";

$html=0;
$example=0;
$ignore=0;
$tex=0;
$doman=0;
$title=0;
$diditem=0;
$justdidlp=1;
$noman=0;
$manprefix="";
$args=($#ARGV < 0) ? "stdin" : "@ARGV";

printf(".\\\"Do not edit this file.  It was created from %s\n", $args);
printf(".\\\"using texi2man version %s on %s", $version, `date`);

while(<>)
{
    # use font CW in tables
    if (/\@c man\s+l\s/)
    {
	s/\@c man //;
	s/l/lfCWp-1/;
	print;
	next;
    }
    if (s/\@c man //)
    {
	print;
	if (/\.TH/) { add_extensions(); }
	next;
    }
    if (/\@c noman/) { $noman=1; next; }
    if (/\@c end noman/) { $noman=0; next; }
    if ($noman) { next; }

    if (/\@c ifman\s*(.*)/) { $doman=1; $manprefix = $1; next; }
    if (/\@c end ifman/) { $doman=0; $manprefix = ""; next; }

    if (/^\@c [^m]/) { next; }

    if (/^\\input/) { next; }
    if (/^\*/) { next; }
    if (/^START-INFO-DIR-ENTRY/) { next; }
    if (/^END-INFO-DIR-ENTRY/) { next; }

    if (/\@titlepage/) { $title=1; next; }
    if (/\@end titlepage/) { $title=0; next; }
    if (/\@tex/) { $tex=1; next; }
    if (/\@end tex/) { $tex=0; next; }
    if (/\@ignore/) { $ignore=1; next; }
    if (/\@end ignore/) { $ignore=0; next; }
    if (/\@ifhtml/) { $html=1; next; }
    if (/\@end ifhtml/) { $html=0; next; }
    if (/\@html/) { $html=1; next; }
    if (/\@end html/) { $html=0; next; }
    if (!$doman && ($ignore || $html || $title || $tex)) { next; }
    if (/\@codequoteundirected/) { next; }

    s/\@\*$/\n\.br/g;
    s/^\@\*/.br/g;
    s/\@\*$/\n.br/g;
    s/\@ / /g;
    s/\@dmn\{([^}]*)}/\\|$1/g;
    s/\@tie\{}/\@no_break_space\{}/g;
    s/\@w\{}/\@no_break_space\{}/g;
    s/\@backslashchar\{}/\\e/g;

    # ellipsis, defined in extensions
    s/\@dots\{}/\\*(El/g;

    s/\@cite\{([^}]*)}/\@in_sgl_quotes\{$1}/g;
    s/\@url\{([^}]*)}/\@in_sgl_quotes\{$1}/g;
    s/\@email\{([^}]*)}/\@in_sgl_quotes\{$1}/g;

    s/\@dfn\{([^}]*)}/\@in_italics\{$1}/g;

    s/\@emph\{([^}]*)}/\@in_italics\{$1}/g;
    s/\@i\{([^}]*)}/\@in_italics\{$1}/g;
    s/\@r\{([^}]*)}/\@in_roman\{$1}/g;
    s/\@var\{([^}]*)}/\@in_italics\{$1}/g;

    s/\@b\{([^}]*)}/\@in_bold\{$1}/g;
    s/\@strong\{([^}]*)}/\@in_bold\{$1}/g;

    # remove trailing comma from xref because man won't include the page number
    s/\@xref\{([^}]*)},/\@xref\{$1}/g;
    s/\@xref\{([^}]*)}/See \@in_italics\{$1}/g;
    s/\@ref\{([^}]*)}/\@ref\{$1}/g;
    s/\@ref\{([^}]*)}/\@in_italics\{$1}/g;
    s/\@pxref\{([^}]*)}/see \@in_italics\{$1}/g;
    s/\@uref\{([^}]*)}/\@in_roman\{$1}/g;

    if (/\@chapter.*\@command/)
    {
	s/\@command\{([^}]*)}/\@in_italics\{$1}/g;
    }

    # show in constant-width font
    s/\@code\{([^}]*)}/\@constwid\{$1}/g;
    s/\@command\{([^}]*)}/\@constwid\{$1}/g;
    s/\@env\{([^}]*)}/\@constwid\{$1}/g;

    # show in constant-width oblique font
    s/\@kbd\{([^}]*)}/\@constwidI\{$1}/g;

    # show in constant-width font with single quotes
    s/\@file\{([^}]*)}/\@constwidQ\{$1}/g;
    s/\@option\{([^}]*)}/\@constwidQ\{$1}/g;
    # Pass ASCII double quotes to .CQ encoded as two double quotes
    # disallow single quotes here because groff converts them to
    # typographical closing quotes.
    # This substitution works only in very limited circumstances, and
    # needs extension to handle the general case of ASCII quotes in
    # sample text
    s/\@samp\{([^}]*)["']{2,2}}/\@samp\{$1""""}/g;
    s/\@samp\{(.*\@(tie|no_break_space)\{})["']{2,2}}/\@samp\{$1""""}/g;
    s/\@samp\{([^}]*)}/\@constwidQ\{$1}/g;

    s/\@sc\{([^}]*)}/\@to_upper\{$1}/g;

    s/\@key\{([^}]*)}/\@in_italics\{$1}/g;
    s/\@footnote\{([^}]*)}/\@in_square_br\{$1}/g;

    s/\@math\{([^}]*)}/\@no_decoration\{$1}/g;

    if (/\@w\{([^}]*)}/) {
	s/\@w\{([^}]*)}/\@no_break_word\{$1}/g;
    }

    # leave minus (dash) lists so they can be recognized later
    if (! /^\@itemize/) { s/\@minus\{}/\\-/g; }
    s/\@copyright\{}/\\(co/g;
    s/\@noindent//;
    s/\@\{/{/g;
    s/\@}/}/g;
    s/\@\@/@/g;
    s/---/\\(em/g;
    # allowable line break escape: groff only?
    s/\@\//\\:/g;
    s/^\@raggedright/.na/;
    s/^\@end raggedright/.ad b/;

    s/\@in_sgl_quotes\{([^}]+)}/`$1'/g;
    s/\@in_dbl_quotes\{([^}]+)}/\"$1\"/g;
    s/\@in_italics\{([^}]+)}/\\fI$1\\fP/g;
    s/\@in_roman\{([^}]+)}/\\fR$1\\fP/g;
    s/\@in_bold\{([^}]+)}/\\fB$1\\fP/g;
    s/\@to_upper\{([^}]*)}/\U$1\E/g;
    s/\@no_decoration\{([^}]*)}/$1/g;
    if (/\@no_break_word\{([^}]+)}(\S*)/) {
	$_ = no_break_word("$_", '@no_break_word');
    }
    s/\@no_break_space\{}/\\ /g;
    s/\@[ ]/ /g;
    s/\@in_angle_br\{([^}]*)}/<$1>/g;
    s/\@in_square_br\{([^}]*)}/[$1]/g;

    # set up to use CW, CI, and CQ macros
    # put every instance on a new line
    # ensure that prepended and appended macros go on separate lines
    # separate concatenated commands with spaces
    s/([}])(\@constwid[IQ]*)/$1 $2/g;
    s/(\@constwid[IQ]*\{[^}]+})(\@)/$1 $2/g;
    # space before -> newline
    s/\s+(\S*\@constwid[IQ]*\{[^}]+}\S*)/\n$1/g;
    # space after -> newline
    s/(\S*\@constwid[IQ]*\{[^}]+}\S*)\s+/$1\n/g;

    if (/(\S*)\@constwidI\{([^}]+)}(\S*)/) {
	$_ = CW_macro("$_", '@constwidI', ".CI");
    }
    if (/(\S*)\@constwidQ\{([^}]+)}(\S*)/) {
	$_ = CW_macro("$_", '@constwidQ', ".CQ");
    }
    if (/(\S*)\@constwid\{([^}]+)}(\S*)/) {
	$_ = CW_macro("$_", '@constwid', ".CW");
    }

    # handle backslash character in sample
    s/(\.C[IQW]\s+\S+\s+)"\\"/$1"\\e"/g;
    s/(\.C[IQW]\s+)"\\"/$1"\\e"/g;
    # handle backslash character in Windows pathname
    # starts with a drive specifier ...
    if (/(\.C[IQW]\s+"[[:alpha:]]:)/) {
	# don't change font switches or escaped spaces
	s/(\S)\\(?!(\s|f[RIBP]|f\([A-Z]{2}))/$1\\e/g;
    }
    # some versions of n/troff don't have \(en, so use \-
    # don't replace double hyphens in C[IQW] macros; assume true
    # en dashes will be closed up to previous word
    s/([^" ]+?)--/$1\\-/g;

    s/\@value\{([^\s]+)}/$value{$1}/eg;
    if (/\@set\s+([^\s]+)\s+(.*)$/) { $value{$1} = $2; next; }
    if (/\@clear\s+([^\s]+)\s+(.*)$/) { delete $value{$1}; next; }

    # tables of command-line options as used in units(1)
    if (/\@table (.*)/) { $intable = 1; next; }
    if (/\@end  *table/) { $intable = 0; next; }
    if ($intable == 1)
    {
	if (/\@itemx (.*)/)
	{
	    $samp = $1;
	    # add hair space to visually separate the hyphens in roman type
	    $samp =~ s/--/-\\^-/;
	    $samp =~ s/-([[:alnum:]])/-\\^$1/;
	    if (!$diditem)
		{ printf(".TP\n.BR \"$samp\""); }
	    else
		{ printf(" \", \" \"$samp\""); }
	    $diditem=1; next;
	}
	elsif ($diditem) { printf("\n"); $diditem=0; }
	if (/\@item (.*)/)
	{
	    $samp = $1;
	    # add hair space to visually separate the hyphens in roman type
	    $samp =~ s/--/-\\^-/;
	    $samp =~ s/-([[:alnum:]])/-\\^$1/;
	    printf("%s.TP\n%s.BR \"$samp\"", $manprefix, $manprefix);
	    $diditem=1;
	    next;
	}
    }

    # unordered list: bullet or minus
    if (/^\@itemize *$/ || /^\@itemize +@(bullet|minus)(\{})?/)
    {
	if ($1 =~ "minus") { $listmark = "\\-"; }
	else { $listmark = "\\(bu"; }
	$in_ulist = 1;
	next;
    }
    if ($in_ulist == 1 && /^\@end +itemize/) { $in_ulist = 0; next; }
    if ($in_ulist == 1)
    {
	if (/^\@item *$/) { printf("%s.IP \\h'1n'%s 4n\n", $manprefix, $listmark); }

    }

    if (s/\@chapter (.*)/.SH \U$1\E/)
    {
	# restore proper case on font switches
	s/\\FR/\\fR/g;
	s/\\FI/\\f(BI/g;	# chapter headings (SH in man) are bold
	s/\\FP/\\fP/g;
	printf("%s%s", $manprefix, $_);
	$justdidlp=1;
	next;
    }

    if (s/\@section (.*)/$1/)
    {
	printf("%s.SS %s", $manprefix, $_);
	next;
    }

    # FIXME? why do we need $manprefix for these?
    # input/output example macros
    if (/\@example/) { printf("%s.ES\n", $manprefix); $example=1; next; }
    if (/\@end example/) { printf("%s.EE\n", $manprefix); $example=0; $justdidlp=0; next; }

    if (/\@smallexample/) { printf("%s.ES S\n", $manprefix); $example=1; next; }
    if (/\@end smallexample/) { printf("%s.EE\n", $manprefix); $example=0; $justdidlp=0; next; }

    # no CW font
    if (/\@display/) { printf("%s.DS\n", $manprefix, $manprefix); $example=1; next; }
    if (/\@end display/) { printf("%s.DE\n", $manprefix, $manprefix); $example=0; next; }

    # no CW font, no indent
    if (/\@format/) { printf("%s.nf\n", $manprefix); $example=1; next; }
    if (/\@end format/) { printf("%s.fi\n", $manprefix); $example=0; next; }


    if ($example) { s/\\\s*$/\\e\n/ };

    if (!$example && /^\s*$/ && !$doman)
    {
	if ($justdidlp) { next; }
	printf(".PP\n");
	$justdidlp=1;
	next;
    }

    if (/^\@/) { next; }

    printf("%s%s", $manprefix, $_);

    if (!$doman) { $justdidlp=0; }
}

# Extensions to legacy man macro package. groff loads the man macro file
# after the call of TH, so these definitions must likewise follow that
# call of TH if they are overwrite any groff extensions with the same
# names that might be added in the future.

sub add_extensions
{
    # ensure that ASCII circumflex U+005E (^) is not remapped with groff
    printf(".\\\"\n");
    printf(".\\\" ensure that ASCII circumflex U+005E (^) is not remapped with groff\n");
    printf(".if \\n(.g .tr ^\\(ha\n");

    # ellipsis: space periods with troff but not with nroff
    printf(".\\\" ellipsis: space periods with troff but not with nroff\n");
    printf(".if n .ds El \\&...\n");
    printf(".if t .ds El \\&.\\ .\\ .\n");

    # constant-width font
    printf(".\\\"\n");
    printf(".\\\" Extensions to man macros\n");
    printf(".\\\"\n");
    printf(".\\\" Constant-width font\n");
    printf(".de CW\n");
    printf(".hy 0\n");
    # just single quotes with nroff
    printf(".if n \\{\\\n");
    printf(".ie \\\\n(.\$>2 \\&\\\\\$1'\\\\\$2'\\\\\$3\n");
    printf(".el \\&'\\\\\$1'\\\\\$2\n");
    printf(".\\}\n");
    # constant-width font with troff
    printf(".if t \\{\\\n");
    printf(".ie \\\\n(.\$>2 \\&\\\\\$1\\f(CW\\\\\$2\\fR\\\\\$3\n");
    printf(".el \\&\\f(CW\\\\\$1\\fR\\\\\$2\n");
    printf(".\\}\n");
    printf(".hy 14\n");
    printf("..\n");

    # constant-width oblique font
    printf(".\\\" Constant-width oblique font\n");
    printf(".de CI\n");
    printf(".hy 0\n");
    # single quotes with nroff
    printf(".if n \\{\\\n");
    printf(".ie \\\\n(.\$>2 \\&\\\\\$1'\\fI\\\\\$2\\fR'\\\\\$3\n");
    printf(".el \\&'\\fI\\\\\$1\\fR'\\\\\$2\n");
    printf(".\\}\n");
    # constant-width oblique font with troff
    printf(".if t \\{\\\n");
    printf(".ie \\\\n(.\$>2 \\&\\\\\$1\\f(CI\\\\\$2\\fR\\\\\$3\n");
    printf(".el \\&\\f(CI\\\\\$1\\fR\\\\\$2\n");
    printf(".\\}\n");
    printf(".hy 14\n");
    printf("..\n");

    # constant-width font with quotes with troff
    printf(".\\\" Constant-width font with quotes\n");
    printf(".de CQ\n");
    printf(".hy 0\n");
    # just single quotes with nroff
    printf(".if n \\{\\\n");
    printf(".ie \\\\n(.\$>2 \\&\\\\\$1'\\\\\$2'\\\\\$3\n");
    printf(".el \\&'\\\\\$1'\\\\\$2\n");
    printf(".\\}\n");
    # constant-width font with troff
    printf(".if t \\{\\\n");
    # quotes passed as literal text encoded as \(fm
    # make it a double quote because groff converts ` and ' to opening and
    # closing quotes
    printf(".ie \\\\n(.\$>2 \\&\\\\\$1`\\f(CW\\\\\$2\\fR'\\\\\$3\n");
    printf(".el \\&`\\f(CW\\\\\$1\\fR'\\\\\$2\n");
    printf(".\\}\n");
    printf(".hy 14\n");
    printf("..\n");

    # Display Start--indent, no fill
    printf(".\\\" Display start\n");
    printf(".de DS\n");
    printf(".hy 0\n");
    printf(".if t .in +4n\n");
    printf(".if n .in +3n\n");
    printf(".nf\n");
    printf("..\n");

    # Display End
    printf(".\\\" Display end\n");
    printf(".de DE\n");
    printf(".fi\n");
    printf(".in\n");
    printf(".hy 14\n");
    printf("..\n");

    # Example Start--like display, but with font CW
    printf(".\\\" Example start\n");
    printf(".de ES\n");
    # call before size or font change to get consistent indent
    printf(".DS\n");
    # CW font with troff; optionally reduce size
    printf(".if t \\{\\\n");
    printf(".if '\\\\\$1'S' \\{\\\n");
    printf(".nr Ex 1\n");
    printf(".ps -1\n");
    printf(".\\}\n");
    printf(".el .nr Ex 0\n");
    printf(".nr mE \\\\n(.f\n");
    printf(".ft CW\n");
    printf(".\\}\n");
    printf("..\n");

    # Example End
    printf(".\\\" Example end\n");
    printf(".de EE\n");
    # restore font and size with troff
    printf(".if t \\{\\\n");
    printf(".ft \\\\n(mE\n");
    printf(".if \\\\n(Ex=1 .ps\n");
    printf(".\\}\n");
    printf(".DE\n");
    printf("..\n");
}

# convert texinfo commands to .C[IQW] macros
sub CW_macro
{
    my $line = shift;
    my $from = shift;
    my $to = shift;

    # prepended and appended punctuation
    $line =~ s/(\S+)$from\{([^}]+)}(\S+)/$to $1 "$2" $3/g;
    # prepended punctuation
    $line =~ s/(\S+)$from\{([^}]+)}/$to $1 "$2" ""/g;
    # appended punctuation
    $line =~ s/$from\{([^}]+)}(\S+)/$to "$1" $2/g;
    # just the argument
    $line =~ s/$from\{([^}]+)}/$to "$1"/g;

    return $line;
}

# convert all spaces within @w{...} to unbreakable
sub no_break_word
{
    my $line = shift;
    my $pattern = (shift) . "\{";
    my $len = length($pattern);
    my $ndx = -1;
    my $bracelevel = 0;
    my $char;

    while (($ndx = index($line, $pattern, $ndx)) > -1) {
	# get rid of the @ command and opening brace
	substr($line, $ndx, $len, '');
	$bracelevel = 1;
	while ($bracelevel > 0) {
	    $char = substr($line, $ndx, 1);
	    # end of line and braces not closed
	    if ($char eq "") {
		last;
	    }
	    elsif ($char eq '{') {
		$bracelevel++;
	    }
	    elsif ($char eq '}') {
		$bracelevel--;
	    }
	    # make spaces nonbreaking
	    if ($char eq ' ') {
		substr($line, $ndx++, 1, '\ ');
		$ndx++;
		# assume multiple spaces are not wanted
		while (substr($line, $ndx, 1) eq ' ') {
		    substr($line, $ndx, 1, '');
		}
	    }
	    $ndx++;
	}
	# get rid of the closing brace for the @ command. This should
	# always be true unless there's an internal brace mismatch ...
	if (substr($line, $ndx - 1, 1) eq '}' ) {
	    substr($line, $ndx - 1, 1, '');
	}
	else {
	    die "Missing closing '}'";
	}
    }

    return $line;
}


