#!/usr/bin/env python3
#####
# findsym2.py
# Translated from findsym.pl
#
# This script scans through multiple C++ source files to find all occurrences
# of the SYM(symbol_name) macro. It collects these unique symbol names and
# generates a header file containing declarations for these symbols. The purpose
# is to ensure that each symbol is translated only once when creating the
# symbol table.
#####

import re
import sys
import textwrap

# Retrieve the first command-line argument, which should be the name of the
# output header file.
if len(sys.argv) < 2:
    print(
        "usage: ./findsym2.py out_symbols.h file1.cc file2.cc ...",
        file=sys.stderr,
    )
    sys.exit(1)

_, outname, *innames = sys.argv

# Attempt to open the output file in write mode.
with open(outname, "w", encoding="utf-8") as header:

    # Write a predefined header comment block to the output file.
    header.write(
        textwrap.dedent(
            """\
          /*****
           * This file is automatically generated by findsym.py
           * Changes will be overwritten.
           *****/

          // If the ADDSYMBOL macro is not already defined, define it with the default
          // purpose of referring to an external pre-translated symbol, such that
          // SYM(name) also refers to that symbol.
          #ifndef ADDSYMBOL
              #define ADDSYMBOL(name) extern sym::symbol PRETRANSLATED_SYMBOL_##name
              #define SYM(name) PRETRANSLATED_SYMBOL_##name
          #endif

          """
        )
    )

    # Initialize an empty set to store unique symbols.
    symbols = set()

    # Iterate over each remaining command-line argument, which should be the names
    # of C++ source files to process.
    for inname in innames:
        # Attempt to open the current input file in read mode.
        with open(inname, "r", encoding="utf-8") as infile:
            # Read the input file line by line.
            for line in infile:
                # Use a regular expression to find all occurrences of the SYM macro
                # with a valid symbol name inside parentheses.
                matches = re.findall(r"SYM\(([_A-Za-z][_A-Za-z0-9]*)\)", line)
                for match in matches:
                    # Add each matched symbol name to the symbols set.
                    symbols.add(match)

    # After collecting all unique symbols, iterate over the sorted list of symbol
    # names.
    for symbol in sorted(symbols):
        # Concatenate the ADDSYMBOL macro with the provided symbol name and a
        # newline character, then write it to the header file.
        header.write(f"ADDSYMBOL({symbol});\n")
