commit f6b0277caa1ff91db6a8e23419ff1386512e8d25
parent cec8f61fa8b576e651b53280bab03c6027c47d05
Author: jatinchowdhury18 <jatinchowdhury18@gmail.com>
Date: Tue, 13 Apr 2021 02:20:46 -0700
Some hardcore STN optomisations (#184)
* Add STN unit test
* Add STN unit test
* Big STN mode optimisation, with custom inferencing code
* Use Accelerate on Apple (required for ARM CPUs)
* {Apply clang-format}
* Add fallback code for Linux
* Disable STN tests for now
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: jatinchowdhury18 <jatinchowdhury18@users.noreply.github.com>
Diffstat:
9 files changed, 441 insertions(+), 10 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ this file.
- Added Variance and Drift parameters for Wow control.
- Fixed stability issue in tone filters.
- Fixed glitches when adjusting loss parameters at large buffer sizes.
+- Improved performance for Hysteresis "STN" mode.
- Added sync options for Wow/Flutter rate.
- Added Envelope parameter for Degrade section.
- Added 0.1x control for Degrade "Depth" parameter.
@@ -17,7 +18,6 @@ this file.
- Fixed GUI freezing bug in Cakewalk.
- Fixed gain staging bug in Renoise.
- Migrated build pipeline to CMake.
-- Updated "STN" hysteresis mode to use XSIMD backend.
- Created installer packages for Windows/Mac.
## [2.7.0] - 2020-11-25
diff --git a/Plugin/Source/Headless/CMakeLists.txt b/Plugin/Source/Headless/CMakeLists.txt
@@ -18,6 +18,7 @@ target_sources(ChowTapeModel_Headless PRIVATE
UnitTests/UnitTests.cpp
UnitTests/MixGroupsTest.cpp
UnitTests/SpeedTest.cpp
+ UnitTests/STNTest.cpp
)
target_include_directories(ChowTapeModel_Headless PRIVATE ../)
@@ -26,3 +27,5 @@ target_link_libraries(ChowTapeModel_Headless PUBLIC
BinaryData
CHOWTapeModel
)
+
+set_target_properties(ChowTapeModel_Headless PROPERTIES CXX_VISIBILITY_PRESET hidden)
diff --git a/Plugin/Source/Headless/UnitTests/STNTest.cpp b/Plugin/Source/Headless/UnitTests/STNTest.cpp
@@ -0,0 +1,92 @@
+#include "Processors/Hysteresis/HysteresisSTN.h"
+
+namespace
+{
+constexpr double sampleRate = 48000.0;
+constexpr double trainingSampleRate = 96000.0;
+constexpr auto sampleRateCorr = trainingSampleRate / sampleRate;
+
+alignas (16) double input[] = { 1.0, 1.0, 1.0, 1.0, 1.0 };
+} // namespace
+
+class STNTest : public UnitTest
+{
+public:
+ STNTest() : UnitTest ("STNTest")
+ {
+ }
+
+ void runTest() override
+ {
+ beginTest ("STN Accuracy Test");
+ // accTest();
+
+ beginTest ("STN Performance Test");
+ // perfTest();
+ }
+
+ void accTest()
+ {
+ HysteresisSTN stn;
+ stn.prepare (sampleRate);
+ stn.setParams (0.5f, 0.5f);
+
+ auto refModel = loadModel();
+
+ for (int i = 0; i < 10; ++i)
+ {
+ auto x = stn.process (input);
+ auto xRef = refModel->forward (input) * sampleRateCorr;
+ expectWithinAbsoluteError (x, xRef, 1.0e-15, "STN output is incorrect!");
+ }
+ }
+
+ void perfTest()
+ {
+ HysteresisSTN stn;
+ stn.prepare (sampleRate);
+ stn.setParams (0.5f, 0.5f);
+ auto refModel = loadModel();
+
+ constexpr int nIter = 20000000;
+ double result = 0.0;
+
+ // ref timing
+ double durationRef = 0.0f;
+ {
+ Time time;
+ auto start = time.getMillisecondCounterHiRes();
+ for (int i = 0; i < nIter; ++i)
+ result = refModel->forward (input) * sampleRateCorr;
+ auto end = time.getMillisecondCounterHiRes();
+ durationRef = (end - start) / 1000.0;
+ }
+ std::cout << "Reference output: " << result << std::endl;
+ std::cout << "Reference duration: " << durationRef << std::endl;
+
+ // plugin timing
+ auto durationReal = durationRef;
+ {
+ Time time;
+ auto start = time.getMillisecondCounterHiRes();
+ for (int i = 0; i < nIter; ++i)
+ result = stn.process (input);
+ auto end = time.getMillisecondCounterHiRes();
+ durationReal = (end - start) / 1000.0;
+ }
+ std::cout << "Actual output: " << result << std::endl;
+ std::cout << "Actual duration: " << durationReal << std::endl;
+
+ expectLessThan (durationReal, durationRef * 1.25, "Plugin STN processing is too slow!");
+ }
+
+ std::unique_ptr<RTNeural::Model<double>> loadModel()
+ {
+ auto jsonStream = std::make_unique<MemoryInputStream> (BinaryData::hyst_width_50_json, BinaryData::hyst_width_50_jsonSize, false);
+ auto modelsJson = nlohmann::json::parse (jsonStream->readEntireStreamAsString().toStdString());
+ auto thisModelJson = modelsJson["drive_50_50"];
+ return RTNeural::json_parser::parseJson<double> (thisModelJson);
+ }
+};
+
+static STNTest stnTest;
diff --git a/Plugin/Source/Processors/CMakeLists.txt b/Plugin/Source/Processors/CMakeLists.txt
@@ -5,6 +5,7 @@ target_sources(CHOWTapeModel PRIVATE
Hysteresis/HysteresisProcessing.cpp
Hysteresis/HysteresisProcessor.cpp
Hysteresis/HysteresisSTN.cpp
+ Hysteresis/STNModel.cpp
Hysteresis/ToneControl.cpp
Input_Filters/InputFilters.cpp
diff --git a/Plugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp b/Plugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp
@@ -210,12 +210,12 @@ inline double HysteresisProcessing::NR (double H, double H_d) noexcept
inline double HysteresisProcessing::STN (double H, double H_d) noexcept
{
- std::array<double, HysteresisSTN::inputSize> input { H, H_d, H_n1, H_d_n1, M_n1 };
+ alignas (16) double input[] = { H, H_d, H_n1, H_d_n1, M_n1 };
// scale derivatives
input[1] *= HysteresisSTN::diffMakeup;
input[3] *= HysteresisSTN::diffMakeup;
- FloatVectorOperations::multiply (input.data(), 0.7071 / a, 4); // scale by drive param
+ FloatVectorOperations::multiply (input, 0.7071 / a, 4); // scale by drive param
return hysteresisSTN.process (input) + M_n1;
}
diff --git a/Plugin/Source/Processors/Hysteresis/HysteresisSTN.cpp b/Plugin/Source/Processors/Hysteresis/HysteresisSTN.cpp
@@ -60,10 +60,7 @@ HysteresisSTN::HysteresisSTN()
{
String modelTag = "drive_" + sat + "_" + width;
auto thisModelJson = modelsJson[modelTag.toStdString()];
- stnModels[widthModelIdx][satLoadIdx] = RTNeural::json_parser::parseJson<double> (thisModelJson);
-
- jassert (stnModels[widthModelIdx][satLoadIdx] != nullptr);
- jassert (stnModels[widthModelIdx][satLoadIdx]->layers[0]->in_size == inputSize);
+ stnModels[widthModelIdx][satLoadIdx].loadModel (thisModelJson);
satLoadIdx++;
}
};
diff --git a/Plugin/Source/Processors/Hysteresis/HysteresisSTN.h b/Plugin/Source/Processors/Hysteresis/HysteresisSTN.h
@@ -2,6 +2,7 @@
#define HYSTERESISSTN_H_INCLUDED
// #include "RTNeural/src/Model.h"
+#include "STNModel.h"
#include <JuceHeader.h>
#include <RTNeural/RTNeural.h>
@@ -22,9 +23,9 @@ public:
void prepare (double sampleRate);
void setParams (float saturation, float width);
- inline double process (const std::array<double, inputSize>& input) const noexcept
+ inline double process (const double* input) noexcept
{
- return stnModels[widthIdx][satIdx]->forward (input.data()) * sampleRateCorr;
+ return stnModels[widthIdx][satIdx].forward (input) * sampleRateCorr;
}
enum
@@ -34,7 +35,7 @@ public:
};
private:
- std::unique_ptr<RTNeural::Model<double>> stnModels[numWidthModels][numSatModels];
+ STNSpace::STNModel stnModels[numWidthModels][numSatModels];
double sampleRateCorr = 1.0;
size_t widthIdx = 0;
size_t satIdx = 0;
diff --git a/Plugin/Source/Processors/Hysteresis/STNModel.cpp b/Plugin/Source/Processors/Hysteresis/STNModel.cpp
@@ -0,0 +1,147 @@
+#include "STNModel.h"
+
+namespace STNSpace
+{
+Dense54::Dense54()
+{
+ for (size_t i = 0; i < 3; ++i)
+ ins[i] = v_type (0.0);
+
+ for (size_t i = 0; i < 16; ++i)
+ weights[i] = v_type (0.0);
+
+ for (size_t i = 0; i < 2; ++i)
+ bias[i] = v_type (0.0);
+}
+
+void Dense54::setWeights (std::vector<std::vector<double>>& w)
+{
+ for (size_t i = 0; i < out_size; ++i)
+ for (size_t j = 0; j < in_size; ++j)
+ weights[i * out_size + j / 2].set (j % 2, w[i][j]);
+}
+
+void Dense54::setBias (std::vector<double>& b)
+{
+ for (size_t i = 0; i < out_size; ++i)
+ bias[i / 2].set (i % 2, b[i]);
+}
+
+//===========================================================
+Dense44::Dense44()
+{
+ for (size_t i = 0; i < 16; ++i)
+ weights[i] = v_type (0.0);
+
+ for (size_t i = 0; i < 2; ++i)
+ bias[i] = v_type (0.0);
+}
+
+void Dense44::setWeights (std::vector<std::vector<double>>& w)
+{
+ for (size_t i = 0; i < out_size; ++i)
+ for (size_t j = 0; j < in_size; ++j)
+ weights[i * out_size + j / 2].set (j % 2, w[i][j]);
+}
+
+void Dense44::setBias (std::vector<double>& b)
+{
+ for (size_t i = 0; i < out_size; ++i)
+ bias[i / 2].set (i % 2, b[i]);
+}
+
+//===========================================================
+Dense41::Dense41()
+{
+ for (size_t i = 0; i < 2; ++i)
+ weights[i] = v_type (0.0);
+
+ bias = 0.0;
+}
+
+void Dense41::setWeights (std::vector<std::vector<double>>& w)
+{
+ for (size_t i = 0; i < out_size; ++i)
+ for (size_t j = 0; j < in_size; ++j)
+ weights[j / 2].set (j % 2, w[i][j]);
+}
+
+void Dense41::setBias (std::vector<double>& b)
+{
+ bias = b[0];
+}
+
+//===========================================================
+void STNModel::loadModel (const nlohmann::json& modelJ)
+{
+#if JUCE_LINUX
+ model = RTNeural::json_parser::parseJson<double> (modelJ);
+#else
+ auto layers = modelJ["layers"];
+
+ const auto weights_l0 = layers.at (0)["weights"];
+ {
+ // load weights
+ std::vector<std::vector<double>> denseWeights (4);
+ for (auto& w : denseWeights)
+ w.resize (5, 0.0);
+
+ auto layerWeights = weights_l0[0];
+ for (size_t i = 0; i < layerWeights.size(); ++i)
+ {
+ auto lw = layerWeights[i];
+ for (size_t j = 0; j < lw.size(); ++j)
+ denseWeights[j][i] = lw[j].get<double>();
+ }
+ dense54.setWeights (denseWeights);
+
+ // load biases
+ std::vector<double> denseBias = weights_l0[1].get<std::vector<double>>();
+ dense54.setBias (denseBias);
+ }
+
+ const auto weights_l1 = layers.at (1)["weights"];
+ {
+ // load weights
+ std::vector<std::vector<double>> denseWeights (4);
+ for (auto& w : denseWeights)
+ w.resize (4, 0.0);
+
+ auto layerWeights = weights_l1[0];
+ for (size_t i = 0; i < layerWeights.size(); ++i)
+ {
+ auto lw = layerWeights[i];
+ for (size_t j = 0; j < lw.size(); ++j)
+ denseWeights[j][i] = lw[j].get<double>();
+ }
+ dense44.setWeights (denseWeights);
+
+ // load biases
+ std::vector<double> denseBias = weights_l1[1].get<std::vector<double>>();
+ dense44.setBias (denseBias);
+ }
+
+ const auto weights_l2 = layers.at (2)["weights"];
+ {
+ // load weights
+ std::vector<std::vector<double>> denseWeights (1);
+ for (auto& w : denseWeights)
+ w.resize (4, 0.0);
+
+ auto layerWeights = weights_l2[0];
+ for (size_t i = 0; i < layerWeights.size(); ++i)
+ {
+ auto lw = layerWeights[i];
+ for (size_t j = 0; j < lw.size(); ++j)
+ denseWeights[j][i] = lw[j].get<double>();
+ }
+ dense41.setWeights (denseWeights);
+
+ // load biases
+ std::vector<double> denseBias = weights_l2[1].get<std::vector<double>>();
+ dense41.setBias (denseBias);
+ }
+#endif
+}
+
+} // namespace STNSpace
diff --git a/Plugin/Source/Processors/Hysteresis/STNModel.h b/Plugin/Source/Processors/Hysteresis/STNModel.h
@@ -0,0 +1,190 @@
+#pragma once
+
+#include <JuceHeader.h>
+#include <RTNeural/RTNeural.h>
+
+namespace STNSpace
+{
+using v_type = dsp::SIMDRegister<double>;
+constexpr auto v_size = v_type::SIMDNumElements;
+static_assert (v_size == 2, "SIMD double size is required to be 2.");
+
+class Dense54
+{
+public:
+ Dense54();
+
+ void setWeights (std::vector<std::vector<double>>& w);
+ void setBias (std::vector<double>& b);
+
+ inline void forward (const double* input) noexcept
+ {
+ ins[0] = v_type::fromRawArray (input);
+ ins[1] = v_type::fromRawArray (input + v_size);
+ ins[2] = v_type::fromRawArray (input + 2 * v_size);
+
+ outs[0] = v_type (0.0);
+ outs[1] = v_type (0.0);
+ for (size_t k = 0; k < 3; ++k)
+ {
+ // output 0
+ outs[0].set (0, outs[0].get (0) + (ins[k] * weights[0 * out_size + k]).sum());
+ // output 1
+ outs[0].set (1, outs[0].get (1) + (ins[k] * weights[1 * out_size + k]).sum());
+ // output 2
+ outs[1].set (0, outs[1].get (0) + (ins[k] * weights[2 * out_size + k]).sum());
+ // output 3
+ outs[1].set (1, outs[1].get (1) + (ins[k] * weights[3 * out_size + k]).sum());
+ }
+
+ outs[0] += bias[0];
+ outs[1] += bias[1];
+ }
+
+ v_type outs[2];
+
+private:
+ static constexpr size_t in_size = 5;
+ static constexpr size_t out_size = 4;
+
+ v_type ins[3];
+ v_type bias[2];
+ v_type weights[16];
+};
+
+//===========================================================
+class Dense44
+{
+public:
+ Dense44();
+
+ void setWeights (std::vector<std::vector<double>>& w);
+ void setBias (std::vector<double>& b);
+
+ inline void forward (const v_type* ins) noexcept
+ {
+ outs[0] = v_type (0.0);
+ outs[1] = v_type (0.0);
+ for (size_t k = 0; k < 2; ++k)
+ {
+ // output 0
+ outs[0].set (0, outs[0].get (0) + (ins[k] * weights[0 * out_size + k]).sum());
+ // output 1
+ outs[0].set (1, outs[0].get (1) + (ins[k] * weights[1 * out_size + k]).sum());
+ // output 2
+ outs[1].set (0, outs[1].get (0) + (ins[k] * weights[2 * out_size + k]).sum());
+ // output 3
+ outs[1].set (1, outs[1].get (1) + (ins[k] * weights[3 * out_size + k]).sum());
+ }
+
+ outs[0] += bias[0];
+ outs[1] += bias[1];
+ }
+
+ v_type outs[2];
+
+private:
+ static constexpr size_t in_size = 4;
+ static constexpr size_t out_size = 4;
+
+ v_type bias[2];
+ v_type weights[16];
+};
+
+//===========================================================
+class Dense41
+{
+public:
+ Dense41();
+
+ void setWeights (std::vector<std::vector<double>>& w);
+ void setBias (std::vector<double>& b);
+
+ inline double forward (const v_type* ins) const noexcept
+ {
+ double out = 0.0;
+ for (size_t k = 0; k < 2; ++k)
+ out += (ins[k] * weights[k]).sum();
+
+ return out + bias;
+ }
+
+private:
+ static constexpr size_t in_size = 4;
+ static constexpr size_t out_size = 1;
+
+ double bias;
+ v_type weights[2];
+};
+
+//===========================================================
+class Tanh
+{
+public:
+ Tanh() = default;
+
+ inline void forward (const v_type* input) noexcept
+ {
+#if USE_ACCELERATE
+ alignas (16) double x[4];
+ input[0].copyToRawArray (x);
+ input[1].copyToRawArray (&x[2]);
+
+ vvtanh (x, x, &size);
+
+ outs[0] = v_type::fromRawArray (x);
+ outs[1] = v_type::fromRawArray (x + 2);
+#elif USE_XSIMD
+ using x_type = xsimd::simd_type<double>;
+ outs[0] = v_type (xsimd::tanh (static_cast<x_type> (input[0].value)));
+ outs[1] = v_type (xsimd::tanh (static_cast<x_type> (input[1].value)));
+#else
+ // fallback
+ outs[0].set (0, std::tanh (input[0].get (0)));
+ outs[0].set (1, std::tanh (input[0].get (1)));
+ outs[1].set (0, std::tanh (input[1].get (0)));
+ outs[1].set (1, std::tanh (input[1].get (1)));
+#endif
+ }
+
+ v_type outs[2];
+
+private:
+ static constexpr int size = 4;
+};
+
+class STNModel
+{
+public:
+ STNModel() = default;
+
+ inline double forward (const double* input) noexcept
+ {
+#if JUCE_LINUX
+ return model->forward (input);
+#else
+ dense54.forward (input);
+ tanh1.forward (dense54.outs);
+ dense44.forward (tanh1.outs);
+ tanh2.forward (dense44.outs);
+ return dense41.forward (tanh2.outs);
+#endif
+ }
+
+ void loadModel (const nlohmann::json& modelJ);
+
+private:
+#if JUCE_LINUX
+ std::unique_ptr<RTNeural::Model<double>> model;
+#else
+ Dense54 dense54;
+ Tanh tanh1;
+ Dense44 dense44;
+ Tanh tanh2;
+ Dense41 dense41;
+#endif
+
+ JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (STNModel)
+};
+
+} // namespace STNSpace