AnalogTapeModel

Physical modelling signal processing for analog tape recording
Log | Files | Refs | Submodules | README | LICENSE

commit f6b0277caa1ff91db6a8e23419ff1386512e8d25
parent cec8f61fa8b576e651b53280bab03c6027c47d05
Author: jatinchowdhury18 <jatinchowdhury18@gmail.com>
Date:   Tue, 13 Apr 2021 02:20:46 -0700

Some hardcore STN optomisations (#184)

* Add STN unit test

* Add STN unit test

* Big STN mode optimisation, with custom inferencing code

* Use Accelerate on Apple (required for ARM CPUs)

* {Apply clang-format}

* Add fallback code for Linux

* Disable STN tests for now

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: jatinchowdhury18 <jatinchowdhury18@users.noreply.github.com>
Diffstat:
MCHANGELOG.md | 2+-
MPlugin/Source/Headless/CMakeLists.txt | 3+++
APlugin/Source/Headless/UnitTests/STNTest.cpp | 92+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
MPlugin/Source/Processors/CMakeLists.txt | 1+
MPlugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp | 4++--
MPlugin/Source/Processors/Hysteresis/HysteresisSTN.cpp | 5+----
MPlugin/Source/Processors/Hysteresis/HysteresisSTN.h | 7++++---
APlugin/Source/Processors/Hysteresis/STNModel.cpp | 147+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
APlugin/Source/Processors/Hysteresis/STNModel.h | 190+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
9 files changed, 441 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md @@ -8,6 +8,7 @@ this file. - Added Variance and Drift parameters for Wow control. - Fixed stability issue in tone filters. - Fixed glitches when adjusting loss parameters at large buffer sizes. +- Improved performance for Hysteresis "STN" mode. - Added sync options for Wow/Flutter rate. - Added Envelope parameter for Degrade section. - Added 0.1x control for Degrade "Depth" parameter. @@ -17,7 +18,6 @@ this file. - Fixed GUI freezing bug in Cakewalk. - Fixed gain staging bug in Renoise. - Migrated build pipeline to CMake. -- Updated "STN" hysteresis mode to use XSIMD backend. - Created installer packages for Windows/Mac. ## [2.7.0] - 2020-11-25 diff --git a/Plugin/Source/Headless/CMakeLists.txt b/Plugin/Source/Headless/CMakeLists.txt @@ -18,6 +18,7 @@ target_sources(ChowTapeModel_Headless PRIVATE UnitTests/UnitTests.cpp UnitTests/MixGroupsTest.cpp UnitTests/SpeedTest.cpp + UnitTests/STNTest.cpp ) target_include_directories(ChowTapeModel_Headless PRIVATE ../) @@ -26,3 +27,5 @@ target_link_libraries(ChowTapeModel_Headless PUBLIC BinaryData CHOWTapeModel ) + +set_target_properties(ChowTapeModel_Headless PROPERTIES CXX_VISIBILITY_PRESET hidden) diff --git a/Plugin/Source/Headless/UnitTests/STNTest.cpp b/Plugin/Source/Headless/UnitTests/STNTest.cpp @@ -0,0 +1,92 @@ +#include "Processors/Hysteresis/HysteresisSTN.h" + +namespace +{ +constexpr double sampleRate = 48000.0; +constexpr double trainingSampleRate = 96000.0; +constexpr auto sampleRateCorr = trainingSampleRate / sampleRate; + +alignas (16) double input[] = { 1.0, 1.0, 1.0, 1.0, 1.0 }; +} // namespace + +class STNTest : public UnitTest +{ +public: + STNTest() : UnitTest ("STNTest") + { + } + + void runTest() override + { + beginTest ("STN Accuracy Test"); + // accTest(); + + beginTest ("STN Performance Test"); + // perfTest(); + } + + void accTest() + { + HysteresisSTN stn; + stn.prepare (sampleRate); + stn.setParams (0.5f, 0.5f); + + auto refModel = loadModel(); + + for (int i = 0; i < 10; ++i) + { + auto x = stn.process (input); + auto xRef = refModel->forward (input) * sampleRateCorr; + expectWithinAbsoluteError (x, xRef, 1.0e-15, "STN output is incorrect!"); + } + } + + void perfTest() + { + HysteresisSTN stn; + stn.prepare (sampleRate); + stn.setParams (0.5f, 0.5f); + auto refModel = loadModel(); + + constexpr int nIter = 20000000; + double result = 0.0; + + // ref timing + double durationRef = 0.0f; + { + Time time; + auto start = time.getMillisecondCounterHiRes(); + for (int i = 0; i < nIter; ++i) + result = refModel->forward (input) * sampleRateCorr; + auto end = time.getMillisecondCounterHiRes(); + durationRef = (end - start) / 1000.0; + } + std::cout << "Reference output: " << result << std::endl; + std::cout << "Reference duration: " << durationRef << std::endl; + + // plugin timing + auto durationReal = durationRef; + { + Time time; + auto start = time.getMillisecondCounterHiRes(); + for (int i = 0; i < nIter; ++i) + result = stn.process (input); + auto end = time.getMillisecondCounterHiRes(); + durationReal = (end - start) / 1000.0; + } + std::cout << "Actual output: " << result << std::endl; + std::cout << "Actual duration: " << durationReal << std::endl; + + expectLessThan (durationReal, durationRef * 1.25, "Plugin STN processing is too slow!"); + } + + std::unique_ptr<RTNeural::Model<double>> loadModel() + { + auto jsonStream = std::make_unique<MemoryInputStream> (BinaryData::hyst_width_50_json, BinaryData::hyst_width_50_jsonSize, false); + auto modelsJson = nlohmann::json::parse (jsonStream->readEntireStreamAsString().toStdString()); + auto thisModelJson = modelsJson["drive_50_50"]; + return RTNeural::json_parser::parseJson<double> (thisModelJson); + } +}; + +static STNTest stnTest; diff --git a/Plugin/Source/Processors/CMakeLists.txt b/Plugin/Source/Processors/CMakeLists.txt @@ -5,6 +5,7 @@ target_sources(CHOWTapeModel PRIVATE Hysteresis/HysteresisProcessing.cpp Hysteresis/HysteresisProcessor.cpp Hysteresis/HysteresisSTN.cpp + Hysteresis/STNModel.cpp Hysteresis/ToneControl.cpp Input_Filters/InputFilters.cpp diff --git a/Plugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp b/Plugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp @@ -210,12 +210,12 @@ inline double HysteresisProcessing::NR (double H, double H_d) noexcept inline double HysteresisProcessing::STN (double H, double H_d) noexcept { - std::array<double, HysteresisSTN::inputSize> input { H, H_d, H_n1, H_d_n1, M_n1 }; + alignas (16) double input[] = { H, H_d, H_n1, H_d_n1, M_n1 }; // scale derivatives input[1] *= HysteresisSTN::diffMakeup; input[3] *= HysteresisSTN::diffMakeup; - FloatVectorOperations::multiply (input.data(), 0.7071 / a, 4); // scale by drive param + FloatVectorOperations::multiply (input, 0.7071 / a, 4); // scale by drive param return hysteresisSTN.process (input) + M_n1; } diff --git a/Plugin/Source/Processors/Hysteresis/HysteresisSTN.cpp b/Plugin/Source/Processors/Hysteresis/HysteresisSTN.cpp @@ -60,10 +60,7 @@ HysteresisSTN::HysteresisSTN() { String modelTag = "drive_" + sat + "_" + width; auto thisModelJson = modelsJson[modelTag.toStdString()]; - stnModels[widthModelIdx][satLoadIdx] = RTNeural::json_parser::parseJson<double> (thisModelJson); - - jassert (stnModels[widthModelIdx][satLoadIdx] != nullptr); - jassert (stnModels[widthModelIdx][satLoadIdx]->layers[0]->in_size == inputSize); + stnModels[widthModelIdx][satLoadIdx].loadModel (thisModelJson); satLoadIdx++; } }; diff --git a/Plugin/Source/Processors/Hysteresis/HysteresisSTN.h b/Plugin/Source/Processors/Hysteresis/HysteresisSTN.h @@ -2,6 +2,7 @@ #define HYSTERESISSTN_H_INCLUDED // #include "RTNeural/src/Model.h" +#include "STNModel.h" #include <JuceHeader.h> #include <RTNeural/RTNeural.h> @@ -22,9 +23,9 @@ public: void prepare (double sampleRate); void setParams (float saturation, float width); - inline double process (const std::array<double, inputSize>& input) const noexcept + inline double process (const double* input) noexcept { - return stnModels[widthIdx][satIdx]->forward (input.data()) * sampleRateCorr; + return stnModels[widthIdx][satIdx].forward (input) * sampleRateCorr; } enum @@ -34,7 +35,7 @@ public: }; private: - std::unique_ptr<RTNeural::Model<double>> stnModels[numWidthModels][numSatModels]; + STNSpace::STNModel stnModels[numWidthModels][numSatModels]; double sampleRateCorr = 1.0; size_t widthIdx = 0; size_t satIdx = 0; diff --git a/Plugin/Source/Processors/Hysteresis/STNModel.cpp b/Plugin/Source/Processors/Hysteresis/STNModel.cpp @@ -0,0 +1,147 @@ +#include "STNModel.h" + +namespace STNSpace +{ +Dense54::Dense54() +{ + for (size_t i = 0; i < 3; ++i) + ins[i] = v_type (0.0); + + for (size_t i = 0; i < 16; ++i) + weights[i] = v_type (0.0); + + for (size_t i = 0; i < 2; ++i) + bias[i] = v_type (0.0); +} + +void Dense54::setWeights (std::vector<std::vector<double>>& w) +{ + for (size_t i = 0; i < out_size; ++i) + for (size_t j = 0; j < in_size; ++j) + weights[i * out_size + j / 2].set (j % 2, w[i][j]); +} + +void Dense54::setBias (std::vector<double>& b) +{ + for (size_t i = 0; i < out_size; ++i) + bias[i / 2].set (i % 2, b[i]); +} + +//=========================================================== +Dense44::Dense44() +{ + for (size_t i = 0; i < 16; ++i) + weights[i] = v_type (0.0); + + for (size_t i = 0; i < 2; ++i) + bias[i] = v_type (0.0); +} + +void Dense44::setWeights (std::vector<std::vector<double>>& w) +{ + for (size_t i = 0; i < out_size; ++i) + for (size_t j = 0; j < in_size; ++j) + weights[i * out_size + j / 2].set (j % 2, w[i][j]); +} + +void Dense44::setBias (std::vector<double>& b) +{ + for (size_t i = 0; i < out_size; ++i) + bias[i / 2].set (i % 2, b[i]); +} + +//=========================================================== +Dense41::Dense41() +{ + for (size_t i = 0; i < 2; ++i) + weights[i] = v_type (0.0); + + bias = 0.0; +} + +void Dense41::setWeights (std::vector<std::vector<double>>& w) +{ + for (size_t i = 0; i < out_size; ++i) + for (size_t j = 0; j < in_size; ++j) + weights[j / 2].set (j % 2, w[i][j]); +} + +void Dense41::setBias (std::vector<double>& b) +{ + bias = b[0]; +} + +//=========================================================== +void STNModel::loadModel (const nlohmann::json& modelJ) +{ +#if JUCE_LINUX + model = RTNeural::json_parser::parseJson<double> (modelJ); +#else + auto layers = modelJ["layers"]; + + const auto weights_l0 = layers.at (0)["weights"]; + { + // load weights + std::vector<std::vector<double>> denseWeights (4); + for (auto& w : denseWeights) + w.resize (5, 0.0); + + auto layerWeights = weights_l0[0]; + for (size_t i = 0; i < layerWeights.size(); ++i) + { + auto lw = layerWeights[i]; + for (size_t j = 0; j < lw.size(); ++j) + denseWeights[j][i] = lw[j].get<double>(); + } + dense54.setWeights (denseWeights); + + // load biases + std::vector<double> denseBias = weights_l0[1].get<std::vector<double>>(); + dense54.setBias (denseBias); + } + + const auto weights_l1 = layers.at (1)["weights"]; + { + // load weights + std::vector<std::vector<double>> denseWeights (4); + for (auto& w : denseWeights) + w.resize (4, 0.0); + + auto layerWeights = weights_l1[0]; + for (size_t i = 0; i < layerWeights.size(); ++i) + { + auto lw = layerWeights[i]; + for (size_t j = 0; j < lw.size(); ++j) + denseWeights[j][i] = lw[j].get<double>(); + } + dense44.setWeights (denseWeights); + + // load biases + std::vector<double> denseBias = weights_l1[1].get<std::vector<double>>(); + dense44.setBias (denseBias); + } + + const auto weights_l2 = layers.at (2)["weights"]; + { + // load weights + std::vector<std::vector<double>> denseWeights (1); + for (auto& w : denseWeights) + w.resize (4, 0.0); + + auto layerWeights = weights_l2[0]; + for (size_t i = 0; i < layerWeights.size(); ++i) + { + auto lw = layerWeights[i]; + for (size_t j = 0; j < lw.size(); ++j) + denseWeights[j][i] = lw[j].get<double>(); + } + dense41.setWeights (denseWeights); + + // load biases + std::vector<double> denseBias = weights_l2[1].get<std::vector<double>>(); + dense41.setBias (denseBias); + } +#endif +} + +} // namespace STNSpace diff --git a/Plugin/Source/Processors/Hysteresis/STNModel.h b/Plugin/Source/Processors/Hysteresis/STNModel.h @@ -0,0 +1,190 @@ +#pragma once + +#include <JuceHeader.h> +#include <RTNeural/RTNeural.h> + +namespace STNSpace +{ +using v_type = dsp::SIMDRegister<double>; +constexpr auto v_size = v_type::SIMDNumElements; +static_assert (v_size == 2, "SIMD double size is required to be 2."); + +class Dense54 +{ +public: + Dense54(); + + void setWeights (std::vector<std::vector<double>>& w); + void setBias (std::vector<double>& b); + + inline void forward (const double* input) noexcept + { + ins[0] = v_type::fromRawArray (input); + ins[1] = v_type::fromRawArray (input + v_size); + ins[2] = v_type::fromRawArray (input + 2 * v_size); + + outs[0] = v_type (0.0); + outs[1] = v_type (0.0); + for (size_t k = 0; k < 3; ++k) + { + // output 0 + outs[0].set (0, outs[0].get (0) + (ins[k] * weights[0 * out_size + k]).sum()); + // output 1 + outs[0].set (1, outs[0].get (1) + (ins[k] * weights[1 * out_size + k]).sum()); + // output 2 + outs[1].set (0, outs[1].get (0) + (ins[k] * weights[2 * out_size + k]).sum()); + // output 3 + outs[1].set (1, outs[1].get (1) + (ins[k] * weights[3 * out_size + k]).sum()); + } + + outs[0] += bias[0]; + outs[1] += bias[1]; + } + + v_type outs[2]; + +private: + static constexpr size_t in_size = 5; + static constexpr size_t out_size = 4; + + v_type ins[3]; + v_type bias[2]; + v_type weights[16]; +}; + +//=========================================================== +class Dense44 +{ +public: + Dense44(); + + void setWeights (std::vector<std::vector<double>>& w); + void setBias (std::vector<double>& b); + + inline void forward (const v_type* ins) noexcept + { + outs[0] = v_type (0.0); + outs[1] = v_type (0.0); + for (size_t k = 0; k < 2; ++k) + { + // output 0 + outs[0].set (0, outs[0].get (0) + (ins[k] * weights[0 * out_size + k]).sum()); + // output 1 + outs[0].set (1, outs[0].get (1) + (ins[k] * weights[1 * out_size + k]).sum()); + // output 2 + outs[1].set (0, outs[1].get (0) + (ins[k] * weights[2 * out_size + k]).sum()); + // output 3 + outs[1].set (1, outs[1].get (1) + (ins[k] * weights[3 * out_size + k]).sum()); + } + + outs[0] += bias[0]; + outs[1] += bias[1]; + } + + v_type outs[2]; + +private: + static constexpr size_t in_size = 4; + static constexpr size_t out_size = 4; + + v_type bias[2]; + v_type weights[16]; +}; + +//=========================================================== +class Dense41 +{ +public: + Dense41(); + + void setWeights (std::vector<std::vector<double>>& w); + void setBias (std::vector<double>& b); + + inline double forward (const v_type* ins) const noexcept + { + double out = 0.0; + for (size_t k = 0; k < 2; ++k) + out += (ins[k] * weights[k]).sum(); + + return out + bias; + } + +private: + static constexpr size_t in_size = 4; + static constexpr size_t out_size = 1; + + double bias; + v_type weights[2]; +}; + +//=========================================================== +class Tanh +{ +public: + Tanh() = default; + + inline void forward (const v_type* input) noexcept + { +#if USE_ACCELERATE + alignas (16) double x[4]; + input[0].copyToRawArray (x); + input[1].copyToRawArray (&x[2]); + + vvtanh (x, x, &size); + + outs[0] = v_type::fromRawArray (x); + outs[1] = v_type::fromRawArray (x + 2); +#elif USE_XSIMD + using x_type = xsimd::simd_type<double>; + outs[0] = v_type (xsimd::tanh (static_cast<x_type> (input[0].value))); + outs[1] = v_type (xsimd::tanh (static_cast<x_type> (input[1].value))); +#else + // fallback + outs[0].set (0, std::tanh (input[0].get (0))); + outs[0].set (1, std::tanh (input[0].get (1))); + outs[1].set (0, std::tanh (input[1].get (0))); + outs[1].set (1, std::tanh (input[1].get (1))); +#endif + } + + v_type outs[2]; + +private: + static constexpr int size = 4; +}; + +class STNModel +{ +public: + STNModel() = default; + + inline double forward (const double* input) noexcept + { +#if JUCE_LINUX + return model->forward (input); +#else + dense54.forward (input); + tanh1.forward (dense54.outs); + dense44.forward (tanh1.outs); + tanh2.forward (dense44.outs); + return dense41.forward (tanh2.outs); +#endif + } + + void loadModel (const nlohmann::json& modelJ); + +private: +#if JUCE_LINUX + std::unique_ptr<RTNeural::Model<double>> model; +#else + Dense54 dense54; + Tanh tanh1; + Dense44 dense44; + Tanh tanh2; + Dense41 dense41; +#endif + + JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (STNModel) +}; + +} // namespace STNSpace