Use RTNeural static model for STN (#196) - AnalogTapeModel - Physical modelling signal processing for analog tape recording

commit a865d64efc21feb848bb536acd89818a927f2050
parent eb958a89f1de0272f627a493899ad72f4e0b5ce0
Author: jatinchowdhury18 <[email protected]>
Date:   Sun, 16 May 2021 11:14:52 -0700

Use RTNeural static model for STN (#196)

* Use RTNeural static model for STN

* {Apply clang-format}

* Try to fix compilation issues on ARM

* Add <Accelerate> include

* Change aligned allocation flags on ARM Mac

* Update RTNeural submodule

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Diffstat:
M .github/workflows/cmake.yml  | 1 +
M Plugin/Source/Headless/UnitTests/STNTest.cpp  | 25 ++++++++++++++++++++++---
M Plugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp  | 2 +-
M Plugin/Source/Processors/Hysteresis/STNModel.cpp  | 4 +++-
M Plugin/Source/Processors/Hysteresis/STNModel.h  | 39 +++++++++++++++++++++++++++++++++++----
M Plugin/modules/CMakeLists.txt  | 7 ++-----
M Plugin/modules/cmake/WarningFlags.cmake  | 2 ++

7 files changed, 66 insertions(+), 14 deletions(-)
diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
@@ -56,6 +56,7 @@ jobs:
       run: cmake --build build --config Release --parallel 4
 
     - name: Unit Tests
+      shell: bash
       working-directory: ${{github.workspace}}/Plugin
       run: build/ChowTapeModel --unit-tests --all
 
diff --git a/Plugin/Source/Headless/UnitTests/STNTest.cpp b/Plugin/Source/Headless/UnitTests/STNTest.cpp
@@ -6,7 +6,7 @@ constexpr double sampleRate = 48000.0;
 constexpr double trainingSampleRate = 96000.0;
 constexpr auto sampleRateCorr = trainingSampleRate / sampleRate;
 
-alignas (16) double input[] = { 1.0, 1.0, 1.0, 1.0, 1.0 };
+double input alignas (16)[5] = { 1.0, 1.0, 1.0, 1.0, 1.0 };
 } // namespace STNTestUtils
 
 class STNTest : public UnitTest
@@ -25,7 +25,7 @@ public:
         accTest();
 
         beginTest ("STN Performance Test");
-        // perfTest(); // Keep this disabled most of the time for CI
+        perfTest(); // Keep this disabled most of the time for CI
     }
 
     void accTest()
@@ -55,7 +55,7 @@ public:
         stn.setParams (0.5f, 0.5f);
         auto refModel = loadModel();
 
-        constexpr int nIter = 5000000;
+        constexpr int nIter = 400000;
         double result = 0.0;
 
         // ref timing
@@ -71,6 +71,25 @@ public:
         std::cout << "Reference output: " << result << std::endl;
         std::cout << "Reference duration: " << durationRef << std::endl;
 
+        // static STN timing
+        auto durationStatic = durationRef;
+        {
+            auto jsonStream = std::make_unique<MemoryInputStream> (BinaryData::hyst_width_50_json, BinaryData::hyst_width_50_jsonSize, false);
+            auto modelsJson = nlohmann::json::parse (jsonStream->readEntireStreamAsString().toStdString());
+            auto thisModelJson = modelsJson["drive_50_50"];
+            RTNeural::ModelT<double, 5, 1, RTNeural::DenseT<double, 5, 4>, RTNeural::TanhActivationT<double, 4>, RTNeural::DenseT<double, 4, 4>, RTNeural::TanhActivationT<double, 4>, RTNeural::DenseT<double, 4, 1>> staticModel;
+            staticModel.parseJson (thisModelJson);
+
+            Time time;
+            auto start = time.getMillisecondCounterHiRes();
+            for (int i = 0; i < nIter; ++i)
+                result = staticModel.forward (input) * sampleRateCorr;
+            auto end = time.getMillisecondCounterHiRes();
+            durationStatic = (end - start) / 1000.0;
+        }
+        std::cout << "Static output: " << result << std::endl;
+        std::cout << "Static duration: " << durationStatic << std::endl;
+
         // plugin timing
         auto durationReal = durationRef;
         {
diff --git a/Plugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp b/Plugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp
@@ -210,7 +210,7 @@ inline double HysteresisProcessing::NR (double H, double H_d) noexcept
 
 inline double HysteresisProcessing::STN (double H, double H_d) noexcept
 {
-    alignas (16) double input[] = { H, H_d, H_n1, H_d_n1, M_n1 };
+    double input alignas (16)[5] = { H, H_d, H_n1, H_d_n1, M_n1 };
 
     // scale derivatives
     input[1] *= HysteresisSTN::diffMakeup;
diff --git a/Plugin/Source/Processors/Hysteresis/STNModel.cpp b/Plugin/Source/Processors/Hysteresis/STNModel.cpp
@@ -74,7 +74,9 @@ void Dense41::setBias (std::vector<double>& b)
 //===========================================================
 void STNModel::loadModel (const nlohmann::json& modelJ)
 {
-#if JUCE_LINUX
+#if USE_RTNEURAL_STATIC
+    model.parseJson (modelJ);
+#elif USE_RTNEURAL_POLY
     model = RTNeural::json_parser::parseJson<double> (modelJ);
 #else
     auto layers = modelJ["layers"];
diff --git a/Plugin/Source/Processors/Hysteresis/STNModel.h b/Plugin/Source/Processors/Hysteresis/STNModel.h
@@ -3,6 +3,18 @@
 #include <JuceHeader.h>
 #include <RTNeural/RTNeural.h>
 
+// include <Accelerate> on Apple devices so we can use vvtanh
+#if JUCE_MAC || JUCE_IOS
+#define Point CarbonDummyPointName
+#define Component CarbonDummyCompName
+#include <Accelerate/Accelerate.h>
+#undef Point
+#undef Component
+#endif
+
+#define USE_RTNEURAL_POLY 0
+#define USE_RTNEURAL_STATIC 1
+
 namespace STNSpace
 {
 using v_type = dsp::SIMDRegister<double>;
@@ -125,7 +137,7 @@ public:
 
     inline void forward (const v_type* input) noexcept
     {
-#if USE_ACCELERATE
+#if defined(_M_ARM64) || defined(__arm64__) || defined(__aarch64__)
         alignas (16) double x[4];
         input[0].copyToRawArray (x);
         input[1].copyToRawArray (&x[2]);
@@ -153,14 +165,31 @@ private:
     static constexpr int size = 4;
 };
 
+static bool printed = false;
+
 class STNModel
 {
 public:
-    STNModel() = default;
+    STNModel()
+    {
+        if (! printed)
+        {
+#if USE_RTNEURAL_STATIC
+            std::cout << "Using RTNeural ModelT STN" << std::endl;
+#elif USE_RTNEURAL_POLY
+            std::cout << "Using RTNeural polymorphic STN" << std::endl;
+#else
+            std::cout << "Using hand-coded STN" << std::endl;
+#endif
+            printed = true;
+        }
+    }
 
     inline double forward (const double* input) noexcept
     {
-#if JUCE_LINUX
+#if USE_RTNEURAL_STATIC
+        return model.forward (input);
+#elif USE_RTNEURAL_POLY
         return model->forward (input);
 #else
         dense54.forward (input);
@@ -174,7 +203,9 @@ public:
     void loadModel (const nlohmann::json& modelJ);
 
 private:
-#if JUCE_LINUX
+#if USE_RTNEURAL_STATIC
+    RTNeural::ModelT<double, 5, 1, RTNeural::DenseT<double, 5, 4>, RTNeural::TanhActivationT<double, 4>, RTNeural::DenseT<double, 4, 4>, RTNeural::TanhActivationT<double, 4>, RTNeural::DenseT<double, 4, 1>> model;
+#elif USE_RTNEURAL_POLY
     std::unique_ptr<RTNeural::Model<double>> model;
 #else
     Dense54 dense54;
diff --git a/Plugin/modules/CMakeLists.txt b/Plugin/modules/CMakeLists.txt
@@ -6,12 +6,9 @@ message(STATUS "VERSION for JUCE: ${juce_version}")
 juce_add_modules(foleys_gui_magic)
 juce_add_modules(chowdsp_utils)
 
-# Using RTNeural with XSimd or Accelerate backend
-if(APPLE)
-set(RTNEURAL_ACCELERATE ON CACHE BOOL "Use RTNeural with Accelerate backend" FORCE)
-else()
+# Using RTNeural with XSimd backend
 set(RTNEURAL_XSIMD ON CACHE BOOL "Use RTNeural with XSimd backend" FORCE)
-endif()
+add_definitions(-DUSE_XSIMD=1)
 add_subdirectory(RTNeural)
 
 include(cmake/WarningFlags.cmake)
diff --git a/Plugin/modules/cmake/WarningFlags.cmake b/Plugin/modules/cmake/WarningFlags.cmake
@@ -26,6 +26,8 @@ elseif((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQU
         # For XSIMD
         -Wno-cast-align -Wno-shadow -Wno-implicit-int-conversion
         -Wno-zero-as-null-pointer-constant
+        # For aligned allocation on ARM
+        -faligned-allocation
     )
 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     target_compile_options(warning_flags INTERFACE

	AnalogTapeModel Physical modelling signal processing for analog tape recording
	Log \| Files \| Refs \| Submodules \| README \| LICENSE

M	.github/workflows/cmake.yml	\|	1	+
M	Plugin/Source/Headless/UnitTests/STNTest.cpp	\|	25	++++++++++++++++++++++---
M	Plugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp	\|	2	+-
M	Plugin/Source/Processors/Hysteresis/STNModel.cpp	\|	4	+++-
M	Plugin/Source/Processors/Hysteresis/STNModel.h	\|	39	+++++++++++++++++++++++++++++++++++----
M	Plugin/modules/CMakeLists.txt	\|	7	++-----
M	Plugin/modules/cmake/WarningFlags.cmake	\|	2	++