/* BEGIN software license
 *
 * MsXpertSuite - mass spectrometry software suite
 * -----------------------------------------------
 * Copyright(C) 2009,...,2018 Filippo Rusconi
 *
 * http://www.msxpertsuite.org
 *
 * This file is part of the MsXpertSuite project.
 *
 * The MsXpertSuite project is the successor of the massXpert project. This
 * project now includes various independent modules:
 *
 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 *
 * END software license
 */


/////////////////////// Qt includes
#include <QObject>
#include <QFile>


/////////////////////// Local includes
#include "MsXpS/libXpertMassCore/MonomerDictionary.hpp"
#include "MsXpS/libXpertMassCore/Sequence.hpp"

namespace MsXpS
{
namespace libXpertMassCore
{

/*!
\class MsXpS::libXpertMassCore::MonomerDictionary
\inmodule libXpertMassCore
\ingroup XpertMassCoreUtilities
\inheaderfile MonomerDictionary.hpp

\brief The MonomerDictionary class provides a Monomer code dictionary allowing
the user to automatically translate Monomer codes from x-letter codes to
y-letter codes. For example, a monomer dictionary file can define how to
translate 3-letter monomer codes to 1-letter codes. This is typically useful
when working on Protein Database (PDB) file.

The format of the dictionary file is the following:

\code
# Converts from code on the left of '>' to code on the right.
# Number of letters allowed in each code is
# described with syntax 3>1 and that line should be the first
# non-comment line in this file.
3>1
ALA>A
CYS>C
ASP>D
\endcode

There might be more than one \e{section} in the file, with, for example, 3>1
translations and then 1>3 translations.
*/

/*!
\variable MsXpS::libXpertMassCore::MonomerDictionary::m_filePath

\brief Path to the file documenting the translations.
*/

/*!
\variable MsXpS::libXpertMassCore::MonomerDictionary::m_dictionaryHash

\brief The hash that documents the translations.
 */

/*!
\variable MsXpS::libXpertMassCore::MonomerDictionary::m_dictionaryLoaded

\brief Indicates if the dictionary file has been loaded already.
 */

/*!
\variable MsXpS::libXpertMassCore::MonomerDictionary::m_inputChainStringList

\brief The list of sequences to be converted.
 */

/*!
\variable MsXpS::libXpertMassCore::MonomerDictionary::m_inputCodeLength

\brief The count of letters in the input Monomer code.

In a dictionary file that has a section

\code
3>1
\endcode

this value would be \e 3.
 */

/*!
\variable MsXpS::libXpertMassCore::MonomerDictionary::m_outputCodeLength

\brief The count of letters in the output Monomer code.

In a dictionary file that has a section

\code
3>1
\endcode

this value would be \e 1.
 */

/*!
 \brief Constructs a MonomerDictionary instance.

 \list
 \li \a file_path: the path to the file containing the Monomer dictionary.
 \li \a input_chain_string_list: the list of sequences to be converted.
 \li \a input_code_length: the count of letters in the Monomer codes in the input string.
 \li \a output_code_length: the count of letters in the Monomer codes in the output string.
 \endlist
 */
MonomerDictionary::MonomerDictionary(QString file_path,
                                     const QStringList &input_chain_string_list,
                                     int input_code_length,
                                     int output_code_length)
  : m_filePath(file_path),
    m_inputChainStringList(input_chain_string_list),
    m_inputCodeLength(input_code_length),
    m_outputCodeLength(output_code_length)
{
}

/*!
\brief Destructs this MonomerDictionary instance.
*/
MonomerDictionary::~MonomerDictionary()
{
}

/*!
\brief Sets the \a file_path to the Monomer dictionary file.
*/
void
MonomerDictionary::setFilePath(QString &file_path)
{
  m_filePath = file_path;
}

/*!
\brief Sets the list of input sequences to \a input_chain_string_list.
*/
void
MonomerDictionary::setInputChainStringList(
  const QStringList &input_chain_string_list)
{
  m_inputChainStringList = input_chain_string_list;
}

/*!
\brief Set the count of letters in the input Monomer codes to \a code_length.
*/
void
MonomerDictionary::setInputCodeLength(int code_length)
{
  m_inputCodeLength = code_length;
}

/*!
\brief Set the count of letters in the output Monomer codes to \a code_length.
*/
void
MonomerDictionary::setOutputCodeLength(int code_length)
{
  m_outputCodeLength = code_length;
}

/*!
\brief Return true if the \a line parsed is in the form X>Y, that is, that it specifies the kind of Monomer code translation.
*/
bool
MonomerDictionary::isLineProperSectionDivider(const QString &line)
{
  // Section dividers in the monomer dictionary file format are
  // lines containing the following syntax: X>Y, that is for example
  // 3>1. This means that the following translation rules (like
  // ILE>I) should convert 3-letter codes into 1-letter codes.

  // However, this line should only be considered proper if X is
  // actually the value of m_inputCodeLength and Y the value of
  // m_outputCodeLength.

  //     qDebug() << __FILE__ << __LINE__
  // 	     << "Checking if line is proper section divider :" << line;

  if(line.contains(QRegularExpression("[0-9]+>[0-9]+")))
    {
      // We are opening a new section, get the input/output code
      // lengths and if they math what we expect, then set the
      // current stream position and call the section parser.

      int greaterThanIndex = line.indexOf('>');

      QString codeLengthString = line.left(greaterThanIndex);

      // 	qDebug() << __FILE__ << __LINE__
      // 		 << "Left codeLengthString:" << codeLengthString
      // 		 << "m_inputCodeLength:" << m_inputCodeLength;

      bool ok        = false;
      int codeLength = codeLengthString.toInt(&ok, 10);

      if(!codeLength && !ok)
        {
          qDebug() << __FILE__ << __LINE__ << "Monomer dictionary"
                   << "Failed to parse file " << m_filePath << "at line "
                   << line;

          return false;
        }

      if(codeLength != m_inputCodeLength)
        {
          return false;
        }

      codeLengthString = line.mid(greaterThanIndex + 1, -1);

      // 	qDebug() << __FILE__ << __LINE__
      // 		 << "Right codeLengthString:" << codeLengthString
      // 		 << "m_outputCodeLength:" << m_outputCodeLength;

      ok         = false;
      codeLength = codeLengthString.toInt(&ok, 10);

      if(!codeLength && !ok)
        {
          qDebug() << __FILE__ << __LINE__ << "Monomer dictionary"
                   << "Failed to parse file " << m_filePath << "at line "
                   << line;

          return false;
        }

      if(codeLength != m_outputCodeLength)
        {
          return false;
        }

      // At this point, it seems we are in the proper
      // section.

      return true;
    }

  // If we are here, that means that the section is not for us.

  //     qDebug() << __FILE__ << __LINE__
  // 	     << "Line is no proper section divider.";

  return false;
}

void
MonomerDictionary::skipSection(QTextStream *stream)
{
  // We have entered a section, all we have to do is go through it
  // and return when we have found either the end of the stream or
  // the {END} marker.

  qint64 lineLength = 1024;
  QString line;

  while(!stream->atEnd())
    {
      line = stream->readLine(lineLength);

      if(!line.contains("{END}"))
        {
          line = stream->readLine(lineLength);
        }
      else
        return;
    }
}

/*!
\brief Parses the Monomer dictionary file section in \a stream and fills in the \l m_dictionaryHash with the translation pair.
*/
int
MonomerDictionary::parseSection(QTextStream *stream)
{
  Q_ASSERT(stream);

  qint64 lineLength = 1024;
  QString line;

  // Iterate in the file using the stream and for each line create
  // an item to insert into the dictionary hash.

  while(!stream->atEnd())
    {
      line = stream->readLine(lineLength);

      // We might encounter the end of the section, that is a line
      // having {END} as its sole content.

      if(line.contains("{END}"))
        break;

      QStringList stringList = line.split('>');

      QString inputCode  = stringList.first();
      QString outputCode = stringList.last();

      // Check that the monomer codes have the proper length.

      if(inputCode.length() != m_inputCodeLength ||
         outputCode.length() != m_outputCodeLength)
        {
          qDebug() << __FILE__ << __LINE__ << QObject::tr("Monomer dictionary:")
                   << QObject::tr("Failed to load dictionary.")
                   << QObject::tr("Monomer code lengths do not match:")
                   << QObject::tr("inputCode:") << inputCode
                   << QObject::tr("outputCode:") << outputCode;


          // We have to empty the hash
          m_dictionaryHash.clear();

          break;
        }

      m_dictionaryHash.insert(inputCode, outputCode);

      // 	qDebug() << __FILE__ << __LINE__
      // 		 << stringList.first () << stringList.last ();
    }

  // At this point the parsing is finished, either because we
  // encountered the {END} section-ending delimiter, or because we
  // reached the en of file.

  int hashSize = m_dictionaryHash.size();

  if(hashSize)
    m_dictionaryLoaded = true;
  else
    {
      qDebug() << __FILE__ << __LINE__ << QObject::tr("Monomer dictionary:")
               << QObject::tr("Failed to load dictionary.");

      m_dictionaryLoaded = false;
    }

  return hashSize;
}

/*!
\brief Returns true if the Monomer dictionary file could be loaded successfully, false otherwise.
*/
bool
MonomerDictionary::loadDictionary()
{
  // Load the file and for each line deconstruct the item into two
  // QString objects that are used to make a QHash entry in
  // QHash<QString, QString> m_dictionaryHash.
  bool success      = true;
  qint64 lineLength = 1024;
  QString line;

  QFile file(m_filePath);

  if(!file.open(QIODevice::ReadOnly))
    {

      m_dictionaryLoaded = false;

      qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:"
               << "Failed to open file" << m_filePath << "for writing.";

      return false;
    }

  if(m_inputCodeLength < 1 || m_outputCodeLength < 1)
    {
      qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:"
               << "Failed to parse file " << m_filePath
               << "Please, set the m_inputCodeLength and "
                  "m_ouputCodeLength variables first.";

      return false;
    }

  QTextStream *stream = new QTextStream(&file);
  stream->setEncoding(QStringConverter::Utf8);

  while(!stream->atEnd())
    {
      line = stream->readLine(lineLength);

      // 	  qDebug() << __FILE__ << __LINE__
      // 		   << "line: " << line;

      // Remove spaces from start and end of line.
      line = line.simplified();

      if(line.startsWith('#') || line.isEmpty())
        {
          line = stream->readLine(lineLength);
          continue;
        }

      // There might be any number of sections in the file, all
      // delimited with a X>Y directive, indicating how many
      // characters are allowed for the input code and for the
      // output code.

      if(!isLineProperSectionDivider(line))
        {
          // 	    qDebug() << __FILE__ << __LINE__
          // 		     << "skipping line:" << line;

          line = stream->readLine(lineLength);
          continue;
        }
      else
        {
          // 	    qDebug() << __FILE__ << __LINE__
          // 		     << "parsing section: " << line;

          if(parseSection(stream) < 1)
            {
              qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:"
                       << "Failed to parse file " << m_filePath;

              success = false;
              break;
            }
          else
            {
              // We successfully parsed the section. Our work is done.

              success = true;
              break;
            }
        }
    }

  delete stream;

  return success;
}

/*!
\brief Perform the actual translation from the input Monomer code form to the output Monomer code form on all the strings contained in \a chain_string_list.
*/
QStringList *
MonomerDictionary::translate(const QStringList &chain_string_list)
{
  // The string in sequence is a space-separated list of monomer
  // codes in the original monomer code format. We have to translate
  // that to the proper monomer code format using the hash in this
  // dictionary.

  QStringList *outputChainStringList = new QStringList();

  if(!chain_string_list.isEmpty())
    m_inputChainStringList = chain_string_list;

  // If there is nothing to do return an empty string list so that
  // caller knows nothing is actually wrong, only there is no
  // sequence to translate.
  if(m_inputChainStringList.isEmpty())
    return outputChainStringList;

  // Iterate in each chain string of the list and perform the
  // translation.

  for(int iter = 0; iter < m_inputChainStringList.size(); ++iter)
    {
      QString iterString = chain_string_list.at(iter);

      // 	qDebug() << __FILE__ << __LINE__
      // 		 << "translating sequence:" << iterString;

      QStringList codeList =
        iterString.split(QRegularExpression("\\s+"), Qt::SkipEmptyParts);

      // 	qDebug() << __FILE__ << __LINE__
      // 		 << "codeList:" << codeList;

      // 	qDebug() << __FILE__ << __LINE__
      // 		 << "hash:"
      // 		 << m_dictionaryHash;

      for(int jter = 0; jter < codeList.size(); ++jter)
        {
          QString code = codeList.at(jter);

          QHash<QString, QString>::const_iterator hashIter =
            m_dictionaryHash.find(code);

          if(hashIter != m_dictionaryHash.end())
            codeList.replace(jter, hashIter.value());
          else
            {
              // Delete the string list, set the pointer to 0 and
              // return that pointer so that caller knows something
              // has gone wrong.

              qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:"
                       << "Failed to convert monomer code " << code;

              outputChainStringList->clear();

              delete outputChainStringList;
              outputChainStringList = nullptr;

              return outputChainStringList;
            }
        }

      // At this point the sequence codes have been translated. Join all
      // the item of the codeList into one single string.

      outputChainStringList->append(codeList.join(QString("")));
    }

  // End of
  // for (int iter = 0; iter < chainStringList.size(); ++iter)

  // If no translation could be performed, return a n

  if(!outputChainStringList->size())
    {
      outputChainStringList->clear();

      delete outputChainStringList;
      outputChainStringList = 0;
    }

  return outputChainStringList;
}


} // namespace libXpertMassCore
} // namespace MsXpS
