// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Mark Borgerding mark a borgerding net
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_FFT_MODULE_H
#define EIGEN_FFT_MODULE_H

#include <complex>
#include <vector>
#include <map>
#include "../../Eigen/Core"

/**
 * \defgroup FFT_Module Fast Fourier Transform module
 *
 * \code
 * #include <unsupported/Eigen/FFT>
 * \endcode
 *
 * This module provides Fast Fourier transformation, with a configurable backend
 * implementation.
 *
 * The default implementation is based on kissfft. It is a small, free, and
 * reasonably efficient default.
 *
 * There are currently four implementation backend:
 *
 * - kissfft(https://github.com/mborgerding/kissfft) : Simple and not so fast, BSD-3-Clause.
 *   It is a mixed-radix Fast Fourier Transform based up on the principle, "Keep It Simple, Stupid."
 *   Notice that:kissfft fails to handle "atypically-sized" inputs(i.e., sizes with large factors),a workaround is using
 * fftw or pocketfft.
 * - fftw (http://www.fftw.org) : faster, GPL -- incompatible with Eigen in LGPL form, bigger code size.
 * - MKL (https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-download.html) : fastest, free -- may be
 * incompatible with Eigen in GPL form.
 * - pocketfft (https://gitlab.mpcdf.mpg.de/mtr/pocketfft) : faster than kissfft, BSD 3-clause.
 *   It is a heavily modified implementation of FFTPack, with the following advantages:
 *   1.strictly C++11 compliant
 *   2.more accurate twiddle factor computation
 *   3.very fast plan generation
 *   4.worst case complexity for transform sizes with large prime factors is N*log(N), because Bluestein's algorithm is
 * used for these cases
 *
 * \section FFTDesign Design
 *
 * The following design decisions were made concerning scaling and
 * half-spectrum for real FFT.
 *
 * The intent is to facilitate generic programming and ease migrating code
 * from  Matlab/octave.
 * We think the default behavior of Eigen/FFT should favor correctness and
 * generality over speed. Of course, the caller should be able to "opt-out" from this
 * behavior and get the speed increase if they want it.
 *
 * 1) %Scaling:
 * Other libraries (FFTW,IMKL,KISSFFT)  do not perform scaling, so there
 * is a constant gain incurred after the forward&inverse transforms , so
 * IFFT(FFT(x)) = Kx;  this is done to avoid a vector-by-value multiply.
 * The downside is that algorithms that worked correctly in Matlab/octave
 * don't behave the same way once implemented in C++.
 *
 * How Eigen/FFT differs: invertible scaling is performed so IFFT( FFT(x) ) = x.
 *
 * 2) Real FFT half-spectrum
 * Other libraries use only half the frequency spectrum (plus one extra
 * sample for the Nyquist bin) for a real FFT, the other half is the
 * conjugate-symmetric of the first half.  This saves them a copy and some
 * memory.  The downside is the caller needs to have special logic for the
 * number of bins in complex vs real.
 *
 * How Eigen/FFT differs: The full spectrum is returned from the forward
 * transform.  This facilitates generic template programming by obviating
 * separate specializations for real vs complex.  On the inverse
 * transform, only half the spectrum is actually used if the output type is real.
 */

#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"

// IWYU pragma: begin_exports

#ifdef EIGEN_FFTW_DEFAULT
// FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size
#include <fftw3.h>
#include "src/FFT/ei_fftw_impl.h"
namespace Eigen {
// template <typename T> typedef struct internal::fftw_impl  default_fft_impl; this does not work
template <typename T>
struct default_fft_impl : public internal::fftw_impl<T> {};
}  // namespace Eigen
#elif defined EIGEN_MKL_DEFAULT
// intel Math Kernel Library: fastest, free -- may be incompatible with Eigen in GPL form
#include "src/FFT/ei_imklfft_impl.h"
namespace Eigen {
template <typename T>
struct default_fft_impl : public internal::imklfft::imklfft_impl<T> {};
}  // namespace Eigen
#elif defined EIGEN_POCKETFFT_DEFAULT
// internal::pocketfft_impl: a heavily modified implementation of FFTPack, with many advantages.
#include <pocketfft_hdronly.h>
#include "src/FFT/ei_pocketfft_impl.h"
namespace Eigen {
template <typename T>
struct default_fft_impl : public internal::pocketfft_impl<T> {};
}  // namespace Eigen
#else
// internal::kissfft_impl:  small, free, reasonably efficient default, derived from kissfft
#include "src/FFT/ei_kissfft_impl.h"
namespace Eigen {
template <typename T>
struct default_fft_impl : public internal::kissfft_impl<T> {};
}  // namespace Eigen
#endif

// IWYU pragma: end_exports

namespace Eigen {

//
template <typename T_SrcMat, typename T_FftIfc>
struct fft_fwd_proxy;
template <typename T_SrcMat, typename T_FftIfc>
struct fft_inv_proxy;

namespace internal {
template <typename T_SrcMat, typename T_FftIfc>
struct traits<fft_fwd_proxy<T_SrcMat, T_FftIfc> > {
  typedef typename T_SrcMat::PlainObject ReturnType;
};
template <typename T_SrcMat, typename T_FftIfc>
struct traits<fft_inv_proxy<T_SrcMat, T_FftIfc> > {
  typedef typename T_SrcMat::PlainObject ReturnType;
};
}  // namespace internal

template <typename T_SrcMat, typename T_FftIfc>
struct fft_fwd_proxy : public ReturnByValue<fft_fwd_proxy<T_SrcMat, T_FftIfc> > {
  typedef DenseIndex Index;

  fft_fwd_proxy(const T_SrcMat& src, T_FftIfc& fft, Index nfft) : m_src(src), m_ifc(fft), m_nfft(nfft) {}

  template <typename T_DestMat>
  void evalTo(T_DestMat& dst) const;

  Index rows() const { return m_src.rows(); }
  Index cols() const { return m_src.cols(); }

 protected:
  const T_SrcMat& m_src;
  T_FftIfc& m_ifc;
  Index m_nfft;
};

template <typename T_SrcMat, typename T_FftIfc>
struct fft_inv_proxy : public ReturnByValue<fft_inv_proxy<T_SrcMat, T_FftIfc> > {
  typedef DenseIndex Index;

  fft_inv_proxy(const T_SrcMat& src, T_FftIfc& fft, Index nfft) : m_src(src), m_ifc(fft), m_nfft(nfft) {}

  template <typename T_DestMat>
  void evalTo(T_DestMat& dst) const;

  Index rows() const { return m_src.rows(); }
  Index cols() const { return m_src.cols(); }

 protected:
  const T_SrcMat& m_src;
  T_FftIfc& m_ifc;
  Index m_nfft;
};

template <typename T_Scalar, typename T_Impl = default_fft_impl<T_Scalar> >
class FFT {
 public:
  typedef T_Impl impl_type;
  typedef DenseIndex Index;
  typedef typename impl_type::Scalar Scalar;
  typedef typename impl_type::Complex Complex;

  using Flag = int;
  static constexpr Flag Default = 0;
  static constexpr Flag Unscaled = 1;
  static constexpr Flag HalfSpectrum = 2;
  static constexpr Flag Speedy = 32767;

  FFT(const impl_type& impl = impl_type(), Flag flags = Default) : m_impl(impl), m_flag(flags) {
    eigen_assert((flags == Default || flags == Unscaled || flags == HalfSpectrum || flags == Speedy) &&
                 "invalid flags argument");
  }

  inline bool HasFlag(Flag f) const { return (m_flag & (int)f) == f; }

  inline void SetFlag(Flag f) { m_flag |= (int)f; }

  inline void ClearFlag(Flag f) { m_flag &= (~(int)f); }

  inline void fwd(Complex* dst, const Scalar* src, Index nfft) {
    m_impl.fwd(dst, src, static_cast<int>(nfft));
    if (HasFlag(HalfSpectrum) == false) ReflectSpectrum(dst, nfft);
  }

  inline void fwd(Complex* dst, const Complex* src, Index nfft) { m_impl.fwd(dst, src, static_cast<int>(nfft)); }

#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
  inline void fwd2(Complex* dst, const Complex* src, int n0, int n1) { m_impl.fwd2(dst, src, n0, n1); }
#endif

  template <typename Input_>
  inline void fwd(std::vector<Complex>& dst, const std::vector<Input_>& src) {
    if (NumTraits<Input_>::IsComplex == 0 && HasFlag(HalfSpectrum))
      dst.resize((src.size() >> 1) + 1);  // half the bins + Nyquist bin
    else
      dst.resize(src.size());
    fwd(&dst[0], &src[0], src.size());
  }

  template <typename InputDerived, typename ComplexDerived>
  inline void fwd(MatrixBase<ComplexDerived>& dst, const MatrixBase<InputDerived>& src, Index nfft = -1) {
    typedef typename ComplexDerived::Scalar dst_type;
    typedef typename InputDerived::Scalar src_type;
    EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived)
    EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
    EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived, InputDerived)  // size at compile-time
    EIGEN_STATIC_ASSERT(
        (internal::is_same<dst_type, Complex>::value),
        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
    EIGEN_STATIC_ASSERT(int(InputDerived::Flags) & int(ComplexDerived::Flags) & DirectAccessBit,
                        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)

    if (nfft < 1) nfft = src.size();

    Index dst_size = nfft;
    if (NumTraits<src_type>::IsComplex == 0 && HasFlag(HalfSpectrum)) {
      dst_size = (nfft >> 1) + 1;
    }
    dst.derived().resize(dst_size);

    if (src.innerStride() != 1 || src.size() < nfft) {
      Matrix<src_type, 1, Dynamic> tmp;
      if (src.size() < nfft) {
        tmp.setZero(nfft);
        tmp.block(0, 0, src.size(), 1) = src;
      } else {
        tmp = src;
      }
      if (dst.innerStride() != 1) {
        Matrix<dst_type, 1, Dynamic> out(1, dst_size);
        fwd(&out[0], &tmp[0], nfft);
        dst.derived() = out;
      } else {
        fwd(&dst[0], &tmp[0], nfft);
      }
    } else {
      if (dst.innerStride() != 1) {
        Matrix<dst_type, 1, Dynamic> out(1, dst_size);
        fwd(&out[0], &src[0], nfft);
        dst.derived() = out;
      } else {
        fwd(&dst[0], &src[0], nfft);
      }
    }
  }

  template <typename InputDerived>
  inline fft_fwd_proxy<MatrixBase<InputDerived>, FFT<T_Scalar, T_Impl> > fwd(const MatrixBase<InputDerived>& src,
                                                                             Index nfft = -1) {
    return fft_fwd_proxy<MatrixBase<InputDerived>, FFT<T_Scalar, T_Impl> >(src, *this, nfft);
  }

  template <typename InputDerived>
  inline fft_inv_proxy<MatrixBase<InputDerived>, FFT<T_Scalar, T_Impl> > inv(const MatrixBase<InputDerived>& src,
                                                                             Index nfft = -1) {
    return fft_inv_proxy<MatrixBase<InputDerived>, FFT<T_Scalar, T_Impl> >(src, *this, nfft);
  }

  inline void inv(Complex* dst, const Complex* src, Index nfft) {
    m_impl.inv(dst, src, static_cast<int>(nfft));
    if (HasFlag(Unscaled) == false) scale(dst, Scalar(1. / nfft), nfft);  // scale the time series
  }

  inline void inv(Scalar* dst, const Complex* src, Index nfft) {
    m_impl.inv(dst, src, static_cast<int>(nfft));
    if (HasFlag(Unscaled) == false) scale(dst, Scalar(1. / nfft), nfft);  // scale the time series
  }

  template <typename OutputDerived, typename ComplexDerived>
  inline void inv(MatrixBase<OutputDerived>& dst, const MatrixBase<ComplexDerived>& src, Index nfft = -1) {
    typedef typename ComplexDerived::Scalar src_type;
    typedef typename ComplexDerived::RealScalar real_type;
    typedef typename OutputDerived::Scalar dst_type;
    const bool realfft = (NumTraits<dst_type>::IsComplex == 0);
    EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived)
    EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
    EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived, OutputDerived)  // size at compile-time
    EIGEN_STATIC_ASSERT(
        (internal::is_same<src_type, Complex>::value),
        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
    EIGEN_STATIC_ASSERT(int(OutputDerived::Flags) & int(ComplexDerived::Flags) & DirectAccessBit,
                        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)

    if (nfft < 1) {  // automatic FFT size determination
      if (realfft && HasFlag(HalfSpectrum))
        nfft = 2 * (src.size() - 1);  // assume even fft size
      else
        nfft = src.size();
    }
    dst.derived().resize(nfft);

    // check for nfft that does not fit the input data size
    Index resize_input = (realfft && HasFlag(HalfSpectrum)) ? ((nfft / 2 + 1) - src.size()) : (nfft - src.size());

    if (src.innerStride() != 1 || resize_input) {
      // if the vector is strided, then we need to copy it to a packed temporary
      Matrix<src_type, 1, Dynamic> tmp;
      if (resize_input) {
        size_t ncopy = (std::min)(src.size(), src.size() + resize_input);
        tmp.setZero(src.size() + resize_input);
        if (realfft && HasFlag(HalfSpectrum)) {
          // pad at the Nyquist bin
          tmp.head(ncopy) = src.head(ncopy);
          tmp(ncopy - 1) = real(tmp(ncopy - 1));  // enforce real-only Nyquist bin
        } else {
          size_t nhead, ntail;
          nhead = 1 + ncopy / 2 - 1;  // range  [0:pi)
          ntail = ncopy / 2 - 1;      // range (-pi:0)
          tmp.head(nhead) = src.head(nhead);
          tmp.tail(ntail) = src.tail(ntail);
          if (resize_input <
              0) {  // shrinking -- create the Nyquist bin as the average of the two bins that fold into it
            tmp(nhead) = (src(nfft / 2) + src(src.size() - nfft / 2)) * real_type(.5);
          } else {  // expanding -- split the old Nyquist bin into two halves
            tmp(nhead) = src(nhead) * real_type(.5);
            tmp(tmp.size() - nhead) = tmp(nhead);
          }
        }
      } else {
        tmp = src;
      }

      if (dst.innerStride() != 1) {
        Matrix<dst_type, 1, Dynamic> out(1, nfft);
        inv(&out[0], &tmp[0], nfft);
        dst.derived() = out;
      } else {
        inv(&dst[0], &tmp[0], nfft);
      }
    } else {
      if (dst.innerStride() != 1) {
        Matrix<dst_type, 1, Dynamic> out(1, nfft);
        inv(&out[0], &src[0], nfft);
        dst.derived() = out;
      } else {
        inv(&dst[0], &src[0], nfft);
      }
    }
  }

  template <typename Output_>
  inline void inv(std::vector<Output_>& dst, const std::vector<Complex>& src, Index nfft = -1) {
    if (nfft < 1)
      nfft = (NumTraits<Output_>::IsComplex == 0 && HasFlag(HalfSpectrum)) ? 2 * (src.size() - 1) : src.size();
    dst.resize(nfft);
    inv(&dst[0], &src[0], nfft);
  }

#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
  inline void inv2(Complex* dst, const Complex* src, int n0, int n1) {
    m_impl.inv2(dst, src, n0, n1);
    if (HasFlag(Unscaled) == false) scale(dst, 1. / (n0 * n1), n0 * n1);
  }
#endif

  inline impl_type& impl() { return m_impl; }

 private:
  template <typename T_Data>
  inline void scale(T_Data* x, Scalar s, Index nx) {
#if 1
    for (int k = 0; k < nx; ++k) *x++ *= s;
#else
    if (((ptrdiff_t)x) & 15)
      Matrix<T_Data, Dynamic, 1>::Map(x, nx) *= s;
    else
      Matrix<T_Data, Dynamic, 1>::MapAligned(x, nx) *= s;
      // Matrix<T_Data, Dynamic, Dynamic>::Map(x,nx) * s;
#endif
  }

  inline void ReflectSpectrum(Complex* freq, Index nfft) {
    // create the implicit right-half spectrum (conjugate-mirror of the left-half)
    Index nhbins = (nfft >> 1) + 1;
    for (Index k = nhbins; k < nfft; ++k) freq[k] = conj(freq[nfft - k]);
  }

  impl_type m_impl;
  int m_flag;
};

template <typename T_SrcMat, typename T_FftIfc>
template <typename T_DestMat>
inline void fft_fwd_proxy<T_SrcMat, T_FftIfc>::evalTo(T_DestMat& dst) const {
  m_ifc.fwd(dst, m_src, m_nfft);
}

template <typename T_SrcMat, typename T_FftIfc>
template <typename T_DestMat>
inline void fft_inv_proxy<T_SrcMat, T_FftIfc>::evalTo(T_DestMat& dst) const {
  m_ifc.inv(dst, m_src, m_nfft);
}

}  // namespace Eigen

#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"

#endif
