commit a865d64efc21feb848bb536acd89818a927f2050
parent eb958a89f1de0272f627a493899ad72f4e0b5ce0
Author: jatinchowdhury18 <[email protected]>
Date: Sun, 16 May 2021 11:14:52 -0700
Use RTNeural static model for STN (#196)
* Use RTNeural static model for STN
* {Apply clang-format}
* Try to fix compilation issues on ARM
* Add <Accelerate> include
* Change aligned allocation flags on ARM Mac
* Update RTNeural submodule
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Diffstat:
7 files changed, 66 insertions(+), 14 deletions(-)
diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
@@ -56,6 +56,7 @@ jobs:
run: cmake --build build --config Release --parallel 4
- name: Unit Tests
+ shell: bash
working-directory: ${{github.workspace}}/Plugin
run: build/ChowTapeModel --unit-tests --all
diff --git a/Plugin/Source/Headless/UnitTests/STNTest.cpp b/Plugin/Source/Headless/UnitTests/STNTest.cpp
@@ -6,7 +6,7 @@ constexpr double sampleRate = 48000.0;
constexpr double trainingSampleRate = 96000.0;
constexpr auto sampleRateCorr = trainingSampleRate / sampleRate;
-alignas (16) double input[] = { 1.0, 1.0, 1.0, 1.0, 1.0 };
+double input alignas (16)[5] = { 1.0, 1.0, 1.0, 1.0, 1.0 };
} // namespace STNTestUtils
class STNTest : public UnitTest
@@ -25,7 +25,7 @@ public:
accTest();
beginTest ("STN Performance Test");
- // perfTest(); // Keep this disabled most of the time for CI
+ perfTest(); // Keep this disabled most of the time for CI
}
void accTest()
@@ -55,7 +55,7 @@ public:
stn.setParams (0.5f, 0.5f);
auto refModel = loadModel();
- constexpr int nIter = 5000000;
+ constexpr int nIter = 400000;
double result = 0.0;
// ref timing
@@ -71,6 +71,25 @@ public:
std::cout << "Reference output: " << result << std::endl;
std::cout << "Reference duration: " << durationRef << std::endl;
+ // static STN timing
+ auto durationStatic = durationRef;
+ {
+ auto jsonStream = std::make_unique<MemoryInputStream> (BinaryData::hyst_width_50_json, BinaryData::hyst_width_50_jsonSize, false);
+ auto modelsJson = nlohmann::json::parse (jsonStream->readEntireStreamAsString().toStdString());
+ auto thisModelJson = modelsJson["drive_50_50"];
+ RTNeural::ModelT<double, 5, 1, RTNeural::DenseT<double, 5, 4>, RTNeural::TanhActivationT<double, 4>, RTNeural::DenseT<double, 4, 4>, RTNeural::TanhActivationT<double, 4>, RTNeural::DenseT<double, 4, 1>> staticModel;
+ staticModel.parseJson (thisModelJson);
+
+ Time time;
+ auto start = time.getMillisecondCounterHiRes();
+ for (int i = 0; i < nIter; ++i)
+ result = staticModel.forward (input) * sampleRateCorr;
+ auto end = time.getMillisecondCounterHiRes();
+ durationStatic = (end - start) / 1000.0;
+ }
+ std::cout << "Static output: " << result << std::endl;
+ std::cout << "Static duration: " << durationStatic << std::endl;
+
// plugin timing
auto durationReal = durationRef;
{
diff --git a/Plugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp b/Plugin/Source/Processors/Hysteresis/HysteresisProcessing.cpp
@@ -210,7 +210,7 @@ inline double HysteresisProcessing::NR (double H, double H_d) noexcept
inline double HysteresisProcessing::STN (double H, double H_d) noexcept
{
- alignas (16) double input[] = { H, H_d, H_n1, H_d_n1, M_n1 };
+ double input alignas (16)[5] = { H, H_d, H_n1, H_d_n1, M_n1 };
// scale derivatives
input[1] *= HysteresisSTN::diffMakeup;
diff --git a/Plugin/Source/Processors/Hysteresis/STNModel.cpp b/Plugin/Source/Processors/Hysteresis/STNModel.cpp
@@ -74,7 +74,9 @@ void Dense41::setBias (std::vector<double>& b)
//===========================================================
void STNModel::loadModel (const nlohmann::json& modelJ)
{
-#if JUCE_LINUX
+#if USE_RTNEURAL_STATIC
+ model.parseJson (modelJ);
+#elif USE_RTNEURAL_POLY
model = RTNeural::json_parser::parseJson<double> (modelJ);
#else
auto layers = modelJ["layers"];
diff --git a/Plugin/Source/Processors/Hysteresis/STNModel.h b/Plugin/Source/Processors/Hysteresis/STNModel.h
@@ -3,6 +3,18 @@
#include <JuceHeader.h>
#include <RTNeural/RTNeural.h>
+// include <Accelerate> on Apple devices so we can use vvtanh
+#if JUCE_MAC || JUCE_IOS
+#define Point CarbonDummyPointName
+#define Component CarbonDummyCompName
+#include <Accelerate/Accelerate.h>
+#undef Point
+#undef Component
+#endif
+
+#define USE_RTNEURAL_POLY 0
+#define USE_RTNEURAL_STATIC 1
+
namespace STNSpace
{
using v_type = dsp::SIMDRegister<double>;
@@ -125,7 +137,7 @@ public:
inline void forward (const v_type* input) noexcept
{
-#if USE_ACCELERATE
+#if defined(_M_ARM64) || defined(__arm64__) || defined(__aarch64__)
alignas (16) double x[4];
input[0].copyToRawArray (x);
input[1].copyToRawArray (&x[2]);
@@ -153,14 +165,31 @@ private:
static constexpr int size = 4;
};
+static bool printed = false;
+
class STNModel
{
public:
- STNModel() = default;
+ STNModel()
+ {
+ if (! printed)
+ {
+#if USE_RTNEURAL_STATIC
+ std::cout << "Using RTNeural ModelT STN" << std::endl;
+#elif USE_RTNEURAL_POLY
+ std::cout << "Using RTNeural polymorphic STN" << std::endl;
+#else
+ std::cout << "Using hand-coded STN" << std::endl;
+#endif
+ printed = true;
+ }
+ }
inline double forward (const double* input) noexcept
{
-#if JUCE_LINUX
+#if USE_RTNEURAL_STATIC
+ return model.forward (input);
+#elif USE_RTNEURAL_POLY
return model->forward (input);
#else
dense54.forward (input);
@@ -174,7 +203,9 @@ public:
void loadModel (const nlohmann::json& modelJ);
private:
-#if JUCE_LINUX
+#if USE_RTNEURAL_STATIC
+ RTNeural::ModelT<double, 5, 1, RTNeural::DenseT<double, 5, 4>, RTNeural::TanhActivationT<double, 4>, RTNeural::DenseT<double, 4, 4>, RTNeural::TanhActivationT<double, 4>, RTNeural::DenseT<double, 4, 1>> model;
+#elif USE_RTNEURAL_POLY
std::unique_ptr<RTNeural::Model<double>> model;
#else
Dense54 dense54;
diff --git a/Plugin/modules/CMakeLists.txt b/Plugin/modules/CMakeLists.txt
@@ -6,12 +6,9 @@ message(STATUS "VERSION for JUCE: ${juce_version}")
juce_add_modules(foleys_gui_magic)
juce_add_modules(chowdsp_utils)
-# Using RTNeural with XSimd or Accelerate backend
-if(APPLE)
-set(RTNEURAL_ACCELERATE ON CACHE BOOL "Use RTNeural with Accelerate backend" FORCE)
-else()
+# Using RTNeural with XSimd backend
set(RTNEURAL_XSIMD ON CACHE BOOL "Use RTNeural with XSimd backend" FORCE)
-endif()
+add_definitions(-DUSE_XSIMD=1)
add_subdirectory(RTNeural)
include(cmake/WarningFlags.cmake)
diff --git a/Plugin/modules/cmake/WarningFlags.cmake b/Plugin/modules/cmake/WarningFlags.cmake
@@ -26,6 +26,8 @@ elseif((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQU
# For XSIMD
-Wno-cast-align -Wno-shadow -Wno-implicit-int-conversion
-Wno-zero-as-null-pointer-constant
+ # For aligned allocation on ARM
+ -faligned-allocation
)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
target_compile_options(warning_flags INTERFACE