Some hardcore STN optomisations (#184) - AnalogTapeModel - Physical modelling signal processing for analog tape recording

commit f6b0277caa1ff91db6a8e23419ff1386512e8d25
parent cec8f61fa8b576e651b53280bab03c6027c47d05
Author: jatinchowdhury18 <jatinchowdhury18@gmail.com>
Date:   Tue, 13 Apr 2021 02:20:46 -0700

Some hardcore STN optomisations (#184)

* Add STN unit test

* Add STN unit test

* Big STN mode optimisation, with custom inferencing code

* Use Accelerate on Apple (required for ARM CPUs)

* {Apply clang-format}

* Add fallback code for Linux

* Disable STN tests for now

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: jatinchowdhury18 <jatinchowdhury18@users.noreply.github.com>
Diffstat:
M CHANGELOG.md  | 2 +-
M Plugin/Source/Headless/CMakeLists.txt  | 3 +++
A Plugin/Source/Headless/UnitTests/STNTest.cpp  | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M Plugin/Source/Processors/CMakeLists.txt  | 1 +
M Plugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp  | 4 ++--
M Plugin/Source/Processors/Hysteresis/HysteresisSTN.cpp  | 5 +----
M Plugin/Source/Processors/Hysteresis/HysteresisSTN.h  | 7 ++++---
A Plugin/Source/Processors/Hysteresis/STNModel.cpp  | 147 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A Plugin/Source/Processors/Hysteresis/STNModel.h  | 190 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

9 files changed, 441 insertions(+), 10 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ this file.
 - Added Variance and Drift parameters for Wow control.
 - Fixed stability issue in tone filters.
 - Fixed glitches when adjusting loss parameters at large buffer sizes.
+- Improved performance for Hysteresis "STN" mode.
 - Added sync options for Wow/Flutter rate.
 - Added Envelope parameter for Degrade section.
 - Added 0.1x control for Degrade "Depth" parameter.
@@ -17,7 +18,6 @@ this file.
 - Fixed GUI freezing bug in Cakewalk.
 - Fixed gain staging bug in Renoise.
 - Migrated build pipeline to CMake.
-- Updated "STN" hysteresis mode to use XSIMD backend.
 - Created installer packages for Windows/Mac.
 
 ## [2.7.0] - 2020-11-25
diff --git a/Plugin/Source/Headless/CMakeLists.txt b/Plugin/Source/Headless/CMakeLists.txt
@@ -18,6 +18,7 @@ target_sources(ChowTapeModel_Headless PRIVATE
     UnitTests/UnitTests.cpp
     UnitTests/MixGroupsTest.cpp
     UnitTests/SpeedTest.cpp
+    UnitTests/STNTest.cpp
 )
 
 target_include_directories(ChowTapeModel_Headless PRIVATE ../)
@@ -26,3 +27,5 @@ target_link_libraries(ChowTapeModel_Headless PUBLIC
     BinaryData
     CHOWTapeModel
 )
+
+set_target_properties(ChowTapeModel_Headless PROPERTIES CXX_VISIBILITY_PRESET hidden)
diff --git a/Plugin/Source/Headless/UnitTests/STNTest.cpp b/Plugin/Source/Headless/UnitTests/STNTest.cpp
@@ -0,0 +1,92 @@
+#include "Processors/Hysteresis/HysteresisSTN.h"
+
+namespace
+{
+constexpr double sampleRate = 48000.0;
+constexpr double trainingSampleRate = 96000.0;
+constexpr auto sampleRateCorr = trainingSampleRate / sampleRate;
+
+alignas (16) double input[] = { 1.0, 1.0, 1.0, 1.0, 1.0 };
+} // namespace
+
+class STNTest : public UnitTest
+{
+public:
+    STNTest() : UnitTest ("STNTest")
+    {
+    }
+
+    void runTest() override
+    {
+        beginTest ("STN Accuracy Test");
+        // accTest();
+
+        beginTest ("STN Performance Test");
+        // perfTest();
+    }
+
+    void accTest()
+    {
+        HysteresisSTN stn;
+        stn.prepare (sampleRate);
+        stn.setParams (0.5f, 0.5f);
+
+        auto refModel = loadModel();
+
+        for (int i = 0; i < 10; ++i)
+        {
+            auto x = stn.process (input);
+            auto xRef = refModel->forward (input) * sampleRateCorr;
+            expectWithinAbsoluteError (x, xRef, 1.0e-15, "STN output is incorrect!");
+        }
+    }
+
+    void perfTest()
+    {
+        HysteresisSTN stn;
+        stn.prepare (sampleRate);
+        stn.setParams (0.5f, 0.5f);
+        auto refModel = loadModel();
+
+        constexpr int nIter = 20000000;
+        double result = 0.0;
+
+        // ref timing
+        double durationRef = 0.0f;
+        {
+            Time time;
+            auto start = time.getMillisecondCounterHiRes();
+            for (int i = 0; i < nIter; ++i)
+                result = refModel->forward (input) * sampleRateCorr;
+            auto end = time.getMillisecondCounterHiRes();
+            durationRef = (end - start) / 1000.0;
+        }
+        std::cout << "Reference output: " << result << std::endl;
+        std::cout << "Reference duration: " << durationRef << std::endl;
+
+        // plugin timing
+        auto durationReal = durationRef;
+        {
+            Time time;
+            auto start = time.getMillisecondCounterHiRes();
+            for (int i = 0; i < nIter; ++i)
+                result = stn.process (input);
+            auto end = time.getMillisecondCounterHiRes();
+            durationReal = (end - start) / 1000.0;
+        }
+        std::cout << "Actual output: " << result << std::endl;
+        std::cout << "Actual duration: " << durationReal << std::endl;
+
+        expectLessThan (durationReal, durationRef * 1.25, "Plugin STN processing is too slow!");
+    }
+
+    std::unique_ptr<RTNeural::Model<double>> loadModel()
+    {
+        auto jsonStream = std::make_unique<MemoryInputStream> (BinaryData::hyst_width_50_json, BinaryData::hyst_width_50_jsonSize, false);
+        auto modelsJson = nlohmann::json::parse (jsonStream->readEntireStreamAsString().toStdString());
+        auto thisModelJson = modelsJson["drive_50_50"];
+        return RTNeural::json_parser::parseJson<double> (thisModelJson);
+    }
+};
+
+static STNTest stnTest;
diff --git a/Plugin/Source/Processors/CMakeLists.txt b/Plugin/Source/Processors/CMakeLists.txt
@@ -5,6 +5,7 @@ target_sources(CHOWTapeModel PRIVATE
     Hysteresis/HysteresisProcessing.cpp
     Hysteresis/HysteresisProcessor.cpp
     Hysteresis/HysteresisSTN.cpp
+    Hysteresis/STNModel.cpp
     Hysteresis/ToneControl.cpp
 
     Input_Filters/InputFilters.cpp
diff --git a/Plugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp b/Plugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp
@@ -210,12 +210,12 @@ inline double HysteresisProcessing::NR (double H, double H_d) noexcept
 
 inline double HysteresisProcessing::STN (double H, double H_d) noexcept
 {
-    std::array<double, HysteresisSTN::inputSize> input { H, H_d, H_n1, H_d_n1, M_n1 };
+    alignas (16) double input[] = { H, H_d, H_n1, H_d_n1, M_n1 };
 
     // scale derivatives
     input[1] *= HysteresisSTN::diffMakeup;
     input[3] *= HysteresisSTN::diffMakeup;
-    FloatVectorOperations::multiply (input.data(), 0.7071 / a, 4); // scale by drive param
+    FloatVectorOperations::multiply (input, 0.7071 / a, 4); // scale by drive param
 
     return hysteresisSTN.process (input) + M_n1;
 }
diff --git a/Plugin/Source/Processors/Hysteresis/HysteresisSTN.cpp b/Plugin/Source/Processors/Hysteresis/HysteresisSTN.cpp
@@ -60,10 +60,7 @@ HysteresisSTN::HysteresisSTN()
             {
                 String modelTag = "drive_" + sat + "_" + width;
                 auto thisModelJson = modelsJson[modelTag.toStdString()];
-                stnModels[widthModelIdx][satLoadIdx] = RTNeural::json_parser::parseJson<double> (thisModelJson);
-
-                jassert (stnModels[widthModelIdx][satLoadIdx] != nullptr);
-                jassert (stnModels[widthModelIdx][satLoadIdx]->layers[0]->in_size == inputSize);
+                stnModels[widthModelIdx][satLoadIdx].loadModel (thisModelJson);
                 satLoadIdx++;
             }
         };
diff --git a/Plugin/Source/Processors/Hysteresis/HysteresisSTN.h b/Plugin/Source/Processors/Hysteresis/HysteresisSTN.h
@@ -2,6 +2,7 @@
 #define HYSTERESISSTN_H_INCLUDED
 
 // #include "RTNeural/src/Model.h"
+#include "STNModel.h"
 #include <JuceHeader.h>
 #include <RTNeural/RTNeural.h>
 
@@ -22,9 +23,9 @@ public:
     void prepare (double sampleRate);
     void setParams (float saturation, float width);
 
-    inline double process (const std::array<double, inputSize>& input) const noexcept
+    inline double process (const double* input) noexcept
     {
-        return stnModels[widthIdx][satIdx]->forward (input.data()) * sampleRateCorr;
+        return stnModels[widthIdx][satIdx].forward (input) * sampleRateCorr;
     }
 
     enum
@@ -34,7 +35,7 @@ public:
     };
 
 private:
-    std::unique_ptr<RTNeural::Model<double>> stnModels[numWidthModels][numSatModels];
+    STNSpace::STNModel stnModels[numWidthModels][numSatModels];
     double sampleRateCorr = 1.0;
     size_t widthIdx = 0;
     size_t satIdx = 0;
diff --git a/Plugin/Source/Processors/Hysteresis/STNModel.cpp b/Plugin/Source/Processors/Hysteresis/STNModel.cpp
@@ -0,0 +1,147 @@
+#include "STNModel.h"
+
+namespace STNSpace
+{
+Dense54::Dense54()
+{
+    for (size_t i = 0; i < 3; ++i)
+        ins[i] = v_type (0.0);
+
+    for (size_t i = 0; i < 16; ++i)
+        weights[i] = v_type (0.0);
+
+    for (size_t i = 0; i < 2; ++i)
+        bias[i] = v_type (0.0);
+}
+
+void Dense54::setWeights (std::vector<std::vector<double>>& w)
+{
+    for (size_t i = 0; i < out_size; ++i)
+        for (size_t j = 0; j < in_size; ++j)
+            weights[i * out_size + j / 2].set (j % 2, w[i][j]);
+}
+
+void Dense54::setBias (std::vector<double>& b)
+{
+    for (size_t i = 0; i < out_size; ++i)
+        bias[i / 2].set (i % 2, b[i]);
+}
+
+//===========================================================
+Dense44::Dense44()
+{
+    for (size_t i = 0; i < 16; ++i)
+        weights[i] = v_type (0.0);
+
+    for (size_t i = 0; i < 2; ++i)
+        bias[i] = v_type (0.0);
+}
+
+void Dense44::setWeights (std::vector<std::vector<double>>& w)
+{
+    for (size_t i = 0; i < out_size; ++i)
+        for (size_t j = 0; j < in_size; ++j)
+            weights[i * out_size + j / 2].set (j % 2, w[i][j]);
+}
+
+void Dense44::setBias (std::vector<double>& b)
+{
+    for (size_t i = 0; i < out_size; ++i)
+        bias[i / 2].set (i % 2, b[i]);
+}
+
+//===========================================================
+Dense41::Dense41()
+{
+    for (size_t i = 0; i < 2; ++i)
+        weights[i] = v_type (0.0);
+
+    bias = 0.0;
+}
+
+void Dense41::setWeights (std::vector<std::vector<double>>& w)
+{
+    for (size_t i = 0; i < out_size; ++i)
+        for (size_t j = 0; j < in_size; ++j)
+            weights[j / 2].set (j % 2, w[i][j]);
+}
+
+void Dense41::setBias (std::vector<double>& b)
+{
+    bias = b[0];
+}
+
+//===========================================================
+void STNModel::loadModel (const nlohmann::json& modelJ)
+{
+#if JUCE_LINUX
+    model = RTNeural::json_parser::parseJson<double> (modelJ);
+#else
+    auto layers = modelJ["layers"];
+
+    const auto weights_l0 = layers.at (0)["weights"];
+    {
+        // load weights
+        std::vector<std::vector<double>> denseWeights (4);
+        for (auto& w : denseWeights)
+            w.resize (5, 0.0);
+
+        auto layerWeights = weights_l0[0];
+        for (size_t i = 0; i < layerWeights.size(); ++i)
+        {
+            auto lw = layerWeights[i];
+            for (size_t j = 0; j < lw.size(); ++j)
+                denseWeights[j][i] = lw[j].get<double>();
+        }
+        dense54.setWeights (denseWeights);
+
+        // load biases
+        std::vector<double> denseBias = weights_l0[1].get<std::vector<double>>();
+        dense54.setBias (denseBias);
+    }
+
+    const auto weights_l1 = layers.at (1)["weights"];
+    {
+        // load weights
+        std::vector<std::vector<double>> denseWeights (4);
+        for (auto& w : denseWeights)
+            w.resize (4, 0.0);
+
+        auto layerWeights = weights_l1[0];
+        for (size_t i = 0; i < layerWeights.size(); ++i)
+        {
+            auto lw = layerWeights[i];
+            for (size_t j = 0; j < lw.size(); ++j)
+                denseWeights[j][i] = lw[j].get<double>();
+        }
+        dense44.setWeights (denseWeights);
+
+        // load biases
+        std::vector<double> denseBias = weights_l1[1].get<std::vector<double>>();
+        dense44.setBias (denseBias);
+    }
+
+    const auto weights_l2 = layers.at (2)["weights"];
+    {
+        // load weights
+        std::vector<std::vector<double>> denseWeights (1);
+        for (auto& w : denseWeights)
+            w.resize (4, 0.0);
+
+        auto layerWeights = weights_l2[0];
+        for (size_t i = 0; i < layerWeights.size(); ++i)
+        {
+            auto lw = layerWeights[i];
+            for (size_t j = 0; j < lw.size(); ++j)
+                denseWeights[j][i] = lw[j].get<double>();
+        }
+        dense41.setWeights (denseWeights);
+
+        // load biases
+        std::vector<double> denseBias = weights_l2[1].get<std::vector<double>>();
+        dense41.setBias (denseBias);
+    }
+#endif
+}
+
+} // namespace STNSpace
diff --git a/Plugin/Source/Processors/Hysteresis/STNModel.h b/Plugin/Source/Processors/Hysteresis/STNModel.h
@@ -0,0 +1,190 @@
+#pragma once
+
+#include <JuceHeader.h>
+#include <RTNeural/RTNeural.h>
+
+namespace STNSpace
+{
+using v_type = dsp::SIMDRegister<double>;
+constexpr auto v_size = v_type::SIMDNumElements;
+static_assert (v_size == 2, "SIMD double size is required to be 2.");
+
+class Dense54
+{
+public:
+    Dense54();
+
+    void setWeights (std::vector<std::vector<double>>& w);
+    void setBias (std::vector<double>& b);
+
+    inline void forward (const double* input) noexcept
+    {
+        ins[0] = v_type::fromRawArray (input);
+        ins[1] = v_type::fromRawArray (input + v_size);
+        ins[2] = v_type::fromRawArray (input + 2 * v_size);
+
+        outs[0] = v_type (0.0);
+        outs[1] = v_type (0.0);
+        for (size_t k = 0; k < 3; ++k)
+        {
+            // output 0
+            outs[0].set (0, outs[0].get (0) + (ins[k] * weights[0 * out_size + k]).sum());
+            // output 1
+            outs[0].set (1, outs[0].get (1) + (ins[k] * weights[1 * out_size + k]).sum());
+            // output 2
+            outs[1].set (0, outs[1].get (0) + (ins[k] * weights[2 * out_size + k]).sum());
+            // output 3
+            outs[1].set (1, outs[1].get (1) + (ins[k] * weights[3 * out_size + k]).sum());
+        }
+
+        outs[0] += bias[0];
+        outs[1] += bias[1];
+    }
+
+    v_type outs[2];
+
+private:
+    static constexpr size_t in_size = 5;
+    static constexpr size_t out_size = 4;
+
+    v_type ins[3];
+    v_type bias[2];
+    v_type weights[16];
+};
+
+//===========================================================
+class Dense44
+{
+public:
+    Dense44();
+
+    void setWeights (std::vector<std::vector<double>>& w);
+    void setBias (std::vector<double>& b);
+
+    inline void forward (const v_type* ins) noexcept
+    {
+        outs[0] = v_type (0.0);
+        outs[1] = v_type (0.0);
+        for (size_t k = 0; k < 2; ++k)
+        {
+            // output 0
+            outs[0].set (0, outs[0].get (0) + (ins[k] * weights[0 * out_size + k]).sum());
+            // output 1
+            outs[0].set (1, outs[0].get (1) + (ins[k] * weights[1 * out_size + k]).sum());
+            // output 2
+            outs[1].set (0, outs[1].get (0) + (ins[k] * weights[2 * out_size + k]).sum());
+            // output 3
+            outs[1].set (1, outs[1].get (1) + (ins[k] * weights[3 * out_size + k]).sum());
+        }
+
+        outs[0] += bias[0];
+        outs[1] += bias[1];
+    }
+
+    v_type outs[2];
+
+private:
+    static constexpr size_t in_size = 4;
+    static constexpr size_t out_size = 4;
+
+    v_type bias[2];
+    v_type weights[16];
+};
+
+//===========================================================
+class Dense41
+{
+public:
+    Dense41();
+
+    void setWeights (std::vector<std::vector<double>>& w);
+    void setBias (std::vector<double>& b);
+
+    inline double forward (const v_type* ins) const noexcept
+    {
+        double out = 0.0;
+        for (size_t k = 0; k < 2; ++k)
+            out += (ins[k] * weights[k]).sum();
+
+        return out + bias;
+    }
+
+private:
+    static constexpr size_t in_size = 4;
+    static constexpr size_t out_size = 1;
+
+    double bias;
+    v_type weights[2];
+};
+
+//===========================================================
+class Tanh
+{
+public:
+    Tanh() = default;
+
+    inline void forward (const v_type* input) noexcept
+    {
+#if USE_ACCELERATE
+        alignas (16) double x[4];
+        input[0].copyToRawArray (x);
+        input[1].copyToRawArray (&x[2]);
+
+        vvtanh (x, x, &size);
+
+        outs[0] = v_type::fromRawArray (x);
+        outs[1] = v_type::fromRawArray (x + 2);
+#elif USE_XSIMD
+        using x_type = xsimd::simd_type<double>;
+        outs[0] = v_type (xsimd::tanh (static_cast<x_type> (input[0].value)));
+        outs[1] = v_type (xsimd::tanh (static_cast<x_type> (input[1].value)));
+#else
+        // fallback
+        outs[0].set (0, std::tanh (input[0].get (0)));
+        outs[0].set (1, std::tanh (input[0].get (1)));
+        outs[1].set (0, std::tanh (input[1].get (0)));
+        outs[1].set (1, std::tanh (input[1].get (1)));
+#endif
+    }
+
+    v_type outs[2];
+
+private:
+    static constexpr int size = 4;
+};
+
+class STNModel
+{
+public:
+    STNModel() = default;
+
+    inline double forward (const double* input) noexcept
+    {
+#if JUCE_LINUX
+        return model->forward (input);
+#else
+        dense54.forward (input);
+        tanh1.forward (dense54.outs);
+        dense44.forward (tanh1.outs);
+        tanh2.forward (dense44.outs);
+        return dense41.forward (tanh2.outs);
+#endif
+    }
+
+    void loadModel (const nlohmann::json& modelJ);
+
+private:
+#if JUCE_LINUX
+    std::unique_ptr<RTNeural::Model<double>> model;
+#else
+    Dense54 dense54;
+    Tanh tanh1;
+    Dense44 dense44;
+    Tanh tanh2;
+    Dense41 dense41;
+#endif
+
+    JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (STNModel)
+};
+
+} // namespace STNSpace

	AnalogTapeModel Physical modelling signal processing for analog tape recording
	Log \| Files \| Refs \| Submodules \| README \| LICENSE

M	CHANGELOG.md	\|	2	+-
M	Plugin/Source/Headless/CMakeLists.txt	\|	3	+++
A	Plugin/Source/Headless/UnitTests/STNTest.cpp	\|	92	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	Plugin/Source/Processors/CMakeLists.txt	\|	1	+
M	Plugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp	\|	4	++--
M	Plugin/Source/Processors/Hysteresis/HysteresisSTN.cpp	\|	5	+----
M	Plugin/Source/Processors/Hysteresis/HysteresisSTN.h	\|	7	++++---
A	Plugin/Source/Processors/Hysteresis/STNModel.cpp	\|	147	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	Plugin/Source/Processors/Hysteresis/STNModel.h	\|	190	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++