kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

dft.cpp (5697B)


      1 /** @addtogroup dft
      2  *  @{
      3  */
      4 /*
      5   Copyright (C) 2016-2023 Dan Cazarin (https://www.kfrlib.com)
      6   This file is part of KFR
      7 
      8   KFR is free software: you can redistribute it and/or modify
      9   it under the terms of the GNU General Public License as published by
     10   the Free Software Foundation, either version 2 of the License, or
     11   (at your option) any later version.
     12 
     13   KFR is distributed in the hope that it will be useful,
     14   but WITHOUT ANY WARRANTY; without even the implied warranty of
     15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16   GNU General Public License for more details.
     17 
     18   You should have received a copy of the GNU General Public License
     19   along with KFR.
     20 
     21   If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
     22   Buying a commercial license is mandatory as soon as you develop commercial activities without
     23   disclosing the source code of your own applications.
     24   See https://www.kfrlib.com for details.
     25  */
     26 
     27 #include <kfr/dft/fft.hpp>
     28 #include <kfr/multiarch.h>
     29 
     30 namespace kfr
     31 {
     32 
     33 template <typename T>
     34 void dft_plan<T>::dump() const
     35 {
     36     for (const std::unique_ptr<dft_stage<T>>& s : all_stages)
     37     {
     38         s->dump();
     39     }
     40 }
     41 
     42 template <typename T>
     43 void dft_plan<T>::calc_disposition()
     44 {
     45     for (bool inverse : { false, true })
     46     {
     47         auto&& stages = this->stages[inverse];
     48         bitset can_inplace_per_stage;
     49         for (int i = 0; i < stages.size(); ++i)
     50         {
     51             can_inplace_per_stage[i] = stages[i]->can_inplace;
     52         }
     53 
     54         disposition_inplace[static_cast<int>(inverse)] =
     55             precompute_disposition(stages.size(), can_inplace_per_stage, true);
     56         disposition_outofplace[static_cast<int>(inverse)] =
     57             precompute_disposition(stages.size(), can_inplace_per_stage, false);
     58     }
     59 }
     60 
     61 template <typename T>
     62 typename dft_plan<T>::bitset dft_plan<T>::precompute_disposition(int num_stages, bitset can_inplace_per_stage,
     63                                                                  bool inplace_requested)
     64 {
     65     static bitset even{ 0x5555555555555555ull };
     66     bitset mask = ~bitset() >> (DFT_MAX_STAGES - num_stages);
     67     bitset result;
     68     // disposition indicates where is input for corresponding stage
     69     // first bit : 0 - input,  1 - scratch
     70     // other bits: 0 - output, 1 - scratch
     71 
     72     // build disposition that works always
     73     if (num_stages % 2 == 0)
     74     { // even
     75         result = ~even & mask;
     76     }
     77     else
     78     { // odd
     79         result = even & mask;
     80     }
     81 
     82     int num_inplace = can_inplace_per_stage.count();
     83 
     84 #ifdef KFR_DFT_ELIMINATE_MEMCPY
     85     if (num_inplace > 0 && inplace_requested)
     86     {
     87         if (result.test(0)) // input is in scratch
     88         {
     89             // num_inplace must be odd
     90             if (num_inplace % 2 == 0)
     91                 --num_inplace;
     92         }
     93         else
     94         {
     95             // num_inplace must be even
     96             if (num_inplace % 2 != 0)
     97                 --num_inplace;
     98         }
     99     }
    100 #endif
    101     if (num_inplace > 0)
    102     {
    103         for (int i = num_stages - 1; i >= 0; --i)
    104         {
    105             if (can_inplace_per_stage.test(i))
    106             {
    107                 result ^= ~bitset() >> (DFT_MAX_STAGES - (i + 1));
    108 
    109                 if (--num_inplace == 0)
    110                     break;
    111             }
    112         }
    113     }
    114 
    115     if (!inplace_requested) // out-of-place first stage; IN->OUT
    116         result.reset(0);
    117 
    118     return result;
    119 }
    120 
    121 template struct dft_plan<float>;
    122 template struct dft_plan<double>;
    123 
    124 CMT_MULTI_PROTO(namespace impl {
    125     template <typename T>
    126     void dft_initialize(dft_plan<T> & plan);
    127     template <typename T>
    128     void dft_real_initialize(dft_plan_real<T> & plan);
    129     template <typename T, bool inverse>
    130     void dft_execute(const dft_plan<T>& plan, cbool_t<inverse>, complex<T>* out, const complex<T>* in,
    131                      u8* temp);
    132     template <typename T>
    133     void dft_initialize_transpose(internal_generic::fn_transpose<T> & transpose);
    134 })
    135 
    136 #ifdef CMT_MULTI_NEEDS_GATE
    137 
    138 namespace internal_generic
    139 {
    140 
    141 template <typename T>
    142 void dft_initialize(dft_plan<T>& plan)
    143 {
    144     CMT_MULTI_GATE(ns::impl::dft_initialize(plan));
    145 }
    146 template <typename T>
    147 void dft_real_initialize(dft_plan_real<T>& plan)
    148 {
    149     CMT_MULTI_GATE(ns::impl::dft_real_initialize(plan));
    150 }
    151 template <typename T, bool inverse>
    152 void dft_execute(const dft_plan<T>& plan, cbool_t<inverse>, complex<T>* out, const complex<T>* in, u8* temp)
    153 {
    154     CMT_MULTI_GATE(ns::impl::dft_execute(plan, cbool<inverse>, out, in, temp));
    155 }
    156 template <typename T>
    157 void dft_initialize_transpose(fn_transpose<T>& transpose)
    158 {
    159     CMT_MULTI_GATE(ns::impl::dft_initialize_transpose(transpose));
    160 }
    161 
    162 template void dft_initialize<float>(dft_plan<float>&);
    163 template void dft_initialize<double>(dft_plan<double>&);
    164 template void dft_real_initialize<float>(dft_plan_real<float>&);
    165 template void dft_real_initialize<double>(dft_plan_real<double>&);
    166 template void dft_execute<float>(const dft_plan<float>&, cbool_t<false>, complex<float>*,
    167                                  const complex<float>*, u8*);
    168 template void dft_execute<float>(const dft_plan<float>&, cbool_t<true>, complex<float>*,
    169                                  const complex<float>*, u8*);
    170 template void dft_execute<double>(const dft_plan<double>&, cbool_t<false>, complex<double>*,
    171                                   const complex<double>*, u8*);
    172 template void dft_execute<double>(const dft_plan<double>&, cbool_t<true>, complex<double>*,
    173                                   const complex<double>*, u8*);
    174 template void dft_initialize_transpose<float>(fn_transpose<float>&);
    175 template void dft_initialize_transpose<double>(fn_transpose<double>&);
    176 
    177 } // namespace internal_generic
    178 
    179 #endif
    180 
    181 } // namespace kfr