kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit a7c7a5a715441b16fef0c1eddfaa4da88a654dd1
parent a38bef7ed41b72f71eb08848c3203e25e318a11c
Author: [email protected] <[email protected]>
Date:   Tue, 19 Nov 2019 17:16:07 +0000

Reading MP3 (using dr_libs)

Diffstat:
MCMakeLists.txt | 3+--
Mformat-all.py | 2+-
Minclude/kfr/dft/impl/fft-impl.hpp | 16++++++++--------
Minclude/kfr/dft/impl/ft.hpp | 38+++++++++++++++++++-------------------
Minclude/kfr/io/audiofile.hpp | 201++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Minclude/kfr/io/dr/README.txt | 5+++--
Minclude/kfr/io/dr/dr_flac.h | 11909+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Ainclude/kfr/io/dr/dr_mp3.h | 4203+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/io/dr/dr_wav.h | 5336+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
Minclude/kfr/io/impl/audiofile-impl.cpp | 9+++++++--
Minclude/kfr/simd/impl/backend_generic.hpp | 22++++++++++++++++++----
Minclude/kfr/simd/shuffle.hpp | 4++--
Mtests/CMakeLists.txt | 9++++++++-
Mtests/asm_test.cpp | 6++++++
Mtests/io_test.cpp | 24++++++++++++++++++++----
Atests/test-audio/sine.mp3 | 0
16 files changed, 16255 insertions(+), 5532 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt @@ -132,7 +132,7 @@ if (NOT IOS) endif () endif () if (MSVC) - target_compile_options(kfr INTERFACE -bigobj) + target_compile_options(kfr INTERFACE -bigobj -EHsc) else () target_link_libraries(kfr INTERFACE ${STD_LIB} ${PTHREAD_LIB} m) endif () @@ -190,7 +190,6 @@ endif() add_library(kfr_io ${KFR_IO_SRC}) target_link_libraries(kfr_io kfr) -target_compile_definitions(kfr_io PUBLIC KFR_ENABLE_FLAC=1) install(TARGETS kfr kfr_io ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) diff --git a/format-all.py b/format-all.py @@ -10,7 +10,7 @@ import glob path = os.path.dirname(os.path.realpath(__file__)) masks = ['*.hpp', '*.h', '*.cpp', '*.c', '*.cxx'] -ignore = ['build/*', 'build-*', 'cmake-*', '.*'] +ignore = ['build/*', 'build-*', 'cmake-*', '.*', 'include/kfr/io/dr'] filenames = [] for root, dirnames, files in os.walk(path, path): diff --git a/include/kfr/dft/impl/fft-impl.hpp b/include/kfr/dft/impl/fft-impl.hpp @@ -113,8 +113,8 @@ KFR_INTRINSIC cvec<T, width> radix4_apply_twiddle(csize_t<width>, ctrue_t /*spli const cvec<T, width>& w, const cvec<T, width>& tw) { vec<T, width> re1, im1, twre, twim; - split(w, re1, im1); - split(tw, twre, twim); + split<T, 2 * width>(w, re1, im1); + split<T, 2 * width>(tw, twre, twim); const vec<T, width> b1re = re1 * twre; const vec<T, width> b1im = im1 * twre; @@ -136,10 +136,10 @@ KFR_INTRINSIC void radix4_body(size_t N, csize_t<width>, ctrue_t, cbool_t<splito vec<T, width> re0, im0, re1, im1, re2, im2, re3, im3; - split(cread_split<width, aligned, read_split>(in + N4 * 0), re0, im0); - split(cread_split<width, aligned, read_split>(in + N4 * 1), re1, im1); - split(cread_split<width, aligned, read_split>(in + N4 * 2), re2, im2); - split(cread_split<width, aligned, read_split>(in + N4 * 3), re3, im3); + split<T, 2 * width>(cread_split<width, aligned, read_split>(in + N4 * 0), re0, im0); + split<T, 2 * width>(cread_split<width, aligned, read_split>(in + N4 * 1), re1, im1); + split<T, 2 * width>(cread_split<width, aligned, read_split>(in + N4 * 2), re2, im2); + split<T, 2 * width>(cread_split<width, aligned, read_split>(in + N4 * 3), re3, im3); const vec<T, width> sum02re = re0 + re2; const vec<T, width> sum02im = im0 + im2; @@ -586,7 +586,7 @@ struct fft_specialization<T, 1> : dft_stage<T> KFR_MEM_INTRINSIC void do_execute(complex<T>* out, const complex<T>* in, u8*) { cvec<T, 1> a0, a1; - split(cread<2, aligned>(in), a0, a1); + split<T, 4>(cread<2, aligned>(in), a0, a1); cwrite<2, aligned>(out, concat(a0 + a1, a0 - a1)); } }; @@ -602,7 +602,7 @@ struct fft_specialization<T, 2> : dft_stage<T> KFR_MEM_INTRINSIC void do_execute(complex<T>* out, const complex<T>* in, u8*) { cvec<T, 1> a0, a1, a2, a3; - split(cread<4>(in), a0, a1, a2, a3); + split<T, 8>(cread<4>(in), a0, a1, a2, a3); butterfly(cbool_t<inverse>(), a0, a1, a2, a3, a0, a1, a2, a3); cwrite<4>(out, concat(concat(a0, a1), concat(a2, a3))); } diff --git a/include/kfr/dft/impl/ft.hpp b/include/kfr/dft/impl/ft.hpp @@ -222,7 +222,7 @@ inline void cwrite_split<8, false, true, f32>(complex<f32>* dest, const cvec<f32 concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x))); cvec<f32, 2> a, b, c, d; - split(xx, a, b, c, d); + split<f32, 16>(xx, a, b, c, d); cwrite<2>(dest, a); cwrite<2>(dest + 4, b); cwrite<2>(dest + 2, c); @@ -235,7 +235,7 @@ inline void cwrite_split<8, true, true, f32>(complex<f32>* dest, const cvec<f32, concat(shuffle<0, 8 + 0, 1, 8 + 1>(low(x), high(x)), shuffle<2, 8 + 2, 3, 8 + 3>(low(x), high(x))); cvec<f32, 2> a, b, c, d; - split(xx, a, b, c, d); + split<f32, 16>(xx, a, b, c, d); cwrite<2, true>(dest + 0, a); cwrite<2, true>(dest + 4, b); cwrite<2, true>(dest + 2, c); @@ -415,10 +415,10 @@ KFR_INTRINSIC void transpose4(cvec<T, 16>& a, cvec<T, 16>& b, cvec<T, 16>& c, cv cvec<T, 4> c0, c1, c2, c3; cvec<T, 4> d0, d1, d2, d3; - split(a, a0, a1, a2, a3); - split(b, b0, b1, b2, b3); - split(c, c0, c1, c2, c3); - split(d, d0, d1, d2, d3); + split<T, 32>(a, a0, a1, a2, a3); + split<T, 32>(b, b0, b1, b2, b3); + split<T, 32>(c, c0, c1, c2, c3); + split<T, 32>(d, d0, d1, d2, d3); a = concat(a0, b0, c0, d0); b = concat(a1, b1, c1, d1); @@ -434,10 +434,10 @@ KFR_INTRINSIC void transpose4(cvec<T, 16>& a, cvec<T, 16>& b, cvec<T, 16>& c, cv cvec<T, 4> c0, c1, c2, c3; cvec<T, 4> d0, d1, d2, d3; - split(a, a0, a1, a2, a3); - split(b, b0, b1, b2, b3); - split(c, c0, c1, c2, c3); - split(d, d0, d1, d2, d3); + split<T, 32>(a, a0, a1, a2, a3); + split<T, 32>(b, b0, b1, b2, b3); + split<T, 32>(c, c0, c1, c2, c3); + split<T, 32>(d, d0, d1, d2, d3); aa = concat(a0, b0, c0, d0); bb = concat(a1, b1, c1, d1); @@ -662,7 +662,7 @@ KFR_INTRINSIC void butterfly8(cvec<T, 2>& a01, cvec<T, 2>& a23, cvec<T, 2>& a45, cvec<T, 8> b01234567 = concat(b01, b23, b45, b67); cvec<T, 8> b02461357 = concat(even<2>(b01234567), odd<2>(b01234567)); - split(b02461357, b02, b46, b13, b57); + split<T, 16>(b02461357, b02, b46, b13, b57); b13 = cmul(b13, fixed_twiddle<T, 2, 8, 0, 1, inverse>()); b57 = cmul(b57, fixed_twiddle<T, 2, 8, 2, 1, inverse>()); @@ -676,7 +676,7 @@ template <bool inverse = false, typename T> KFR_INTRINSIC void butterfly8(cvec<T, 8>& v8) { cvec<T, 2> w0, w1, w2, w3; - split(v8, w0, w1, w2, w3); + split<T, 16>(v8, w0, w1, w2, w3); butterfly8<inverse>(w0, w1, w2, w3); v8 = concat(w0, w1, w2, w3); } @@ -685,7 +685,7 @@ template <bool inverse = false, typename T> KFR_INTRINSIC void butterfly32(cvec<T, 32>& v32) { cvec<T, 4> w0, w1, w2, w3, w4, w5, w6, w7; - split(v32, w0, w1, w2, w3, w4, w5, w6, w7); + split<T, 64>(v32, w0, w1, w2, w3, w4, w5, w6, w7); butterfly8<4, inverse>(w0, w1, w2, w3, w4, w5, w6, w7); w1 = cmul(w1, fixed_twiddle<T, 4, 32, 0, 1, inverse>()); @@ -710,7 +710,7 @@ KFR_INTRINSIC void butterfly4(cvec<T, N * 4>& a0123) cvec<T, N> a1; cvec<T, N> a2; cvec<T, N> a3; - split(a0123, a0, a1, a2, a3); + split<T, N * 4 * 2>(a0123, a0, a1, a2, a3); butterfly4<N, inverse>(cfalse, a0, a1, a2, a3, a0, a1, a2, a3); a0123 = concat(a0, a1, a2, a3); } @@ -731,8 +731,8 @@ KFR_INTRINSIC void apply_twiddle(const cvec<T, N>& a1, const cvec<T, N>& tw1, cv if (split_format) { vec<T, N> re1, im1, tw1re, tw1im; - split(a1, re1, im1); - split(tw1, tw1re, tw1im); + split<T, 2 * N>(a1, re1, im1); + split<T, 2 * N>(tw1, tw1re, tw1im); vec<T, N> b1re = re1 * tw1re; vec<T, N> b1im = im1 * tw1re; if (inverse) @@ -821,7 +821,7 @@ KFR_INTRINSIC void apply_twiddles4(cvec<T, N * 4>& __restrict a0123) cvec<T, N> a1; cvec<T, N> a2; cvec<T, N> a3; - split(a0123, a0, a1, a2, a3); + split<T, 2 * N * 4>(a0123, a0, a1, a2, a3); cvec<T, N> tw1 = fixed_twiddle<T, N, 64, n2 * nnstep * 1, nnstep * 1, inverse>(), tw2 = fixed_twiddle<T, N, 64, n2 * nnstep * 2, nnstep * 2, inverse>(), @@ -1472,7 +1472,7 @@ KFR_INTRINSIC void cread_transposed(cbool_t<true>, const complex<f32>* ptr, cvec cvec<f32, 4> w3; cvec<f32, 16> v16 = concat(cread<4>(ptr), cread<4>(ptr + 3), cread<4>(ptr + 6), cread<4>(ptr + 9)); v16 = digitreverse4<2>(v16); - split(v16, w0, w1, w2, w3); + split<f32, 32>(v16, w0, w1, w2, w3); } KFR_INTRINSIC void cread_transposed(cbool_t<true>, const complex<f32>* ptr, cvec<f32, 4>& w0, @@ -1480,7 +1480,7 @@ KFR_INTRINSIC void cread_transposed(cbool_t<true>, const complex<f32>* ptr, cvec { cvec<f32, 16> v16 = concat(cread<4>(ptr), cread<4>(ptr + 5), cread<4>(ptr + 10), cread<4>(ptr + 15)); v16 = digitreverse4<2>(v16); - split(v16, w0, w1, w2, w3); + split<f32, 32>(v16, w0, w1, w2, w3); w4 = cgather<4, 5>(ptr + 4); } diff --git a/include/kfr/io/audiofile.hpp b/include/kfr/io/audiofile.hpp @@ -32,23 +32,21 @@ #include "../simd/vec.hpp" #include "file.hpp" -#ifndef KFR_ENABLE_WAV -#define KFR_ENABLE_WAV 1 -#endif -#ifndef KFR_ENABLE_FLAC -#define KFR_ENABLE_FLAC 0 -#endif - -#if KFR_ENABLE_WAV +#ifndef KFR_DISABLE_WAV #define DR_WAV_NO_STDIO #define DR_WAV_NO_CONVERSION_API #include "dr/dr_wav.h" #endif -#if KFR_ENABLE_FLAC +#ifndef KFR_DISABLE_FLAC #define DR_FLAC_NO_STDIO #define DR_FLAC_NO_CONVERSION_API #include "dr/dr_flac.h" #endif +#ifndef KFR_DISABLE_MP3 +#define DR_MP3_NO_STDIO +#define DR_MP3_NO_CONVERSION_API +#include "dr/dr_mp3.h" +#endif namespace kfr { @@ -113,7 +111,7 @@ struct audio_writer : public abstract_writer<T> namespace internal_generic { -#if KFR_ENABLE_WAV +#ifndef KFR_DISABLE_WAV static inline size_t drwav_writer_write_proc(abstract_writer<void>* file, const void* pData, size_t bytesToWrite) { @@ -134,7 +132,7 @@ static inline drwav_bool32 drwav_reader_seek_proc(abstract_reader<void>* file, i return file->seek(offset, origin == drwav_seek_origin_start ? seek_origin::begin : seek_origin::current); } #endif -#if KFR_ENABLE_FLAC +#ifndef KFR_DISABLE_FLAC static inline size_t drflac_reader_read_proc(abstract_reader<void>* file, void* pBufferOut, size_t bytesToRead) { @@ -146,17 +144,28 @@ static inline drflac_bool32 drflac_reader_seek_proc(abstract_reader<void>* file, return file->seek(offset, origin == drflac_seek_origin_start ? seek_origin::begin : seek_origin::current); } #endif +#ifndef KFR_DISABLE_MP3 +static inline size_t drmp3_reader_read_proc(abstract_reader<void>* file, void* pBufferOut, size_t bytesToRead) +{ + return file->read(pBufferOut, bytesToRead); +} +static inline drmp3_bool32 drmp3_reader_seek_proc(abstract_reader<void>* file, int offset, + drmp3_seek_origin origin) +{ + return file->seek(offset, origin == drmp3_seek_origin_start ? seek_origin::begin : seek_origin::current); +} +#endif } // namespace internal_generic -#if KFR_ENABLE_WAV +#ifndef KFR_DISABLE_WAV /// @brief WAV format writer template <typename T> struct audio_writer_wav : audio_writer<T> { /// @brief Constructs WAV writer using target writer and format audio_writer_wav(std::shared_ptr<abstract_writer<>>&& writer, const audio_format& fmt) - : writer(std::move(writer)), f(nullptr), fmt(fmt) + : writer(std::move(writer)), fmt(fmt) { drwav_data_format wav_fmt; wav_fmt.channels = static_cast<drwav_uint32>(fmt.channels); @@ -165,41 +174,47 @@ struct audio_writer_wav : audio_writer<T> fmt.type >= audio_sample_type::first_float ? DR_WAVE_FORMAT_IEEE_FLOAT : DR_WAVE_FORMAT_PCM; wav_fmt.bitsPerSample = static_cast<drwav_uint32>(audio_sample_bit_depth(fmt.type)); wav_fmt.container = fmt.use_w64 ? drwav_container_w64 : drwav_container_riff; - f = drwav_open_write(&wav_fmt, (drwav_write_proc)&internal_generic::drwav_writer_write_proc, - (drwav_seek_proc)&internal_generic::drwav_writer_seek_proc, this->writer.get()); + closed = !drwav_init_write(&f, &wav_fmt, (drwav_write_proc)&internal_generic::drwav_writer_write_proc, + (drwav_seek_proc)&internal_generic::drwav_writer_seek_proc, + this->writer.get(), nullptr); } ~audio_writer_wav() { close(); } using audio_writer<T>::write; /// @brief Write data to underlying binary writer + /// data is PCM samples in interleaved format + /// size is the number of samples (PCM frames * channels) size_t write(const T* data, size_t size) override { - if (!f) + if (closed) return 0; if (fmt.type == audio_sample_type::unknown) return 0; if (fmt.type == audio_sample_traits<T>::type) { - const size_t sz = drwav_write(f, size, data); - fmt.length += sz / fmt.channels; - return sz; + const size_t sz = drwav_write_pcm_frames_le(&f, size, data); + fmt.length += sz; + return sz * fmt.channels; } else { univector<uint8_t> native(size * audio_sample_sizeof(fmt.type)); convert(native.data(), fmt.type, data, size); - const size_t sz = drwav_write(f, size, native.data()); - fmt.length += sz / fmt.channels; - return sz; + const size_t sz = drwav_write_pcm_frames_le(&f, size / fmt.channels, native.data()); + fmt.length += sz; + return sz * fmt.channels; } } void close() override { - drwav_close(f); - f = nullptr; - writer.reset(); + if (!closed) + { + drwav_uninit(&f); + writer.reset(); + closed = true; + } } const audio_format_and_length& format() const override { return fmt; } @@ -210,8 +225,9 @@ struct audio_writer_wav : audio_writer<T> private: std::shared_ptr<abstract_writer<>> writer; - drwav* f; + drwav f; audio_format_and_length fmt; + bool closed = false; }; /// @brief WAV format reader @@ -223,15 +239,15 @@ struct audio_reader_wav : audio_reader<T> /// @brief Constructs WAV reader audio_reader_wav(std::shared_ptr<abstract_reader<>>&& reader) : reader(std::move(reader)) { - f = drwav_open((drwav_read_proc)&internal_generic::drwav_reader_read_proc, - (drwav_seek_proc)&internal_generic::drwav_reader_seek_proc, this->reader.get()); - fmt.channels = f->channels; - fmt.samplerate = f->sampleRate; - fmt.length = f->totalSampleCount / fmt.channels; - switch (f->translatedFormatTag) + drwav_init(&f, (drwav_read_proc)&internal_generic::drwav_reader_read_proc, + (drwav_seek_proc)&internal_generic::drwav_reader_seek_proc, this->reader.get(), nullptr); + fmt.channels = f.channels; + fmt.samplerate = f.sampleRate; + fmt.length = f.totalPCMFrameCount; + switch (f.translatedFormatTag) { case DR_WAVE_FORMAT_IEEE_FLOAT: - switch (f->bitsPerSample) + switch (f.bitsPerSample) { case 32: fmt.type = audio_sample_type::f32; @@ -245,7 +261,7 @@ struct audio_reader_wav : audio_reader<T> } break; case DR_WAVE_FORMAT_PCM: - switch (f->bitsPerSample) + switch (f.bitsPerSample) { case 8: fmt.type = audio_sample_type::i8; @@ -272,7 +288,7 @@ struct audio_reader_wav : audio_reader<T> break; } } - ~audio_reader_wav() { drwav_close(f); } + ~audio_reader_wav() { drwav_uninit(&f); } /// @brief Returns audio format description const audio_format_and_length& format() const override { return fmt; } @@ -284,14 +300,17 @@ struct audio_reader_wav : audio_reader<T> return 0; if (fmt.type == audio_sample_traits<T>::type) { - return drwav_read(f, size, data); + const size_t sz = drwav_read_pcm_frames(&f, size / fmt.channels, data); + position += sz; + return sz * fmt.channels; } else { univector<uint8_t> native(size * audio_sample_sizeof(fmt.type)); - const size_t sz = drwav_read(f, size, native.data()); - convert(data, native.data(), fmt.type, sz); - return sz; + const size_t sz = drwav_read_pcm_frames(&f, size / fmt.channels, native.data()); + position += sz; + convert(data, native.data(), fmt.type, sz * fmt.channels); + return sz * fmt.channels; } } @@ -304,11 +323,11 @@ struct audio_reader_wav : audio_reader<T> switch (origin) { case seek_origin::current: - return drwav_seek_to_sample(f, this->position + offset); + return drwav_seek_to_pcm_frame(&f, this->position + offset); case seek_origin::begin: - return drwav_seek_to_sample(f, offset); + return drwav_seek_to_pcm_frame(&f, offset); case seek_origin::end: - return drwav_seek_to_sample(f, fmt.length + offset); + return drwav_seek_to_pcm_frame(&f, fmt.length + offset); default: return false; } @@ -316,13 +335,13 @@ struct audio_reader_wav : audio_reader<T> private: std::shared_ptr<abstract_reader<>> reader; - drwav* f; + drwav f; audio_format_and_length fmt; imax position = 0; }; #endif -#if KFR_ENABLE_FLAC +#ifndef KFR_DISABLE_FLAC /// @brief FLAC format reader template <typename T> @@ -332,10 +351,11 @@ struct audio_reader_flac : audio_reader<T> audio_reader_flac(std::shared_ptr<abstract_reader<>>&& reader) : reader(std::move(reader)) { f = drflac_open((drflac_read_proc)&internal_generic::drflac_reader_read_proc, - (drflac_seek_proc)&internal_generic::drflac_reader_seek_proc, this->reader.get()); + (drflac_seek_proc)&internal_generic::drflac_reader_seek_proc, this->reader.get(), + nullptr); fmt.channels = f->channels; fmt.samplerate = f->sampleRate; - fmt.length = f->totalSampleCount / fmt.channels; + fmt.length = f->totalPCMFrameCount; fmt.type = audio_sample_type::i32; } ~audio_reader_flac() { drflac_close(f); } @@ -350,14 +370,17 @@ struct audio_reader_flac : audio_reader<T> return 0; if (audio_sample_traits<T>::type == audio_sample_type::i32) { - return drflac_read_s32(f, size, reinterpret_cast<i32*>(data)); + const size_t sz = drflac_read_pcm_frames_s32(f, size / fmt.channels, reinterpret_cast<i32*>(data)); + position += sz; + return sz * fmt.channels; } else { univector<i32> native(size * sizeof(i32)); - const size_t sz = drflac_read_s32(f, size, native.data()); - convert(data, native.data(), sz); - return sz; + const size_t sz = drflac_read_pcm_frames_s32(f, size / fmt.channels, native.data()); + position += sz; + convert(data, native.data(), sz * fmt.channels); + return sz * fmt.channels; } } @@ -370,11 +393,11 @@ struct audio_reader_flac : audio_reader<T> switch (origin) { case seek_origin::current: - return drflac_seek_to_sample(f, this->position + offset); + return drflac_seek_to_pcm_frame(f, this->position + offset); case seek_origin::begin: - return drflac_seek_to_sample(f, offset); + return drflac_seek_to_pcm_frame(f, offset); case seek_origin::end: - return drflac_seek_to_sample(f, fmt.length + offset); + return drflac_seek_to_pcm_frame(f, fmt.length + offset); default: return false; } @@ -388,4 +411,76 @@ private: }; #endif +#ifndef KFR_DISABLE_MP3 + +/// @brief MP3 format reader +template <typename T> +struct audio_reader_mp3 : audio_reader<T> +{ + /// @brief Constructs MP3 reader + audio_reader_mp3(std::shared_ptr<abstract_reader<>>&& reader) : reader(std::move(reader)) + { + drmp3_init(&f, (drmp3_read_proc)&internal_generic::drmp3_reader_read_proc, + (drmp3_seek_proc)&internal_generic::drmp3_reader_seek_proc, this->reader.get(), &config, + nullptr); + fmt.channels = f.channels; + fmt.samplerate = f.sampleRate; + fmt.length = drmp3_get_pcm_frame_count(&f); + fmt.type = audio_sample_type::i16; + } + ~audio_reader_mp3() { drmp3_uninit(&f); } + + drmp3_config config{ 0, 0 }; + + /// @brief Returns audio format description + const audio_format_and_length& format() const override { return fmt; } + + /// @brief Reads and decodes audio data + size_t read(T* data, size_t size) override + { + if (fmt.type == audio_sample_type::unknown) + return 0; + if (audio_sample_traits<T>::type == audio_sample_type::i16) + { + const size_t sz = drmp3_read_pcm_frames_s16(&f, size / fmt.channels, reinterpret_cast<i16*>(data)); + position += sz; + return sz * fmt.channels; + } + else + { + univector<i16> native(size * sizeof(i16)); + const size_t sz = drmp3_read_pcm_frames_s16(&f, size / fmt.channels, native.data()); + position += sz; + convert(data, native.data(), sz * fmt.channels); + return sz * fmt.channels; + } + } + + /// @brief Returns current position + imax tell() const override { return position; } + + /// @brief Seeks to specific sample + bool seek(imax offset, seek_origin origin) override + { + switch (origin) + { + case seek_origin::current: + return drmp3_seek_to_pcm_frame(&f, this->position + offset); + case seek_origin::begin: + return drmp3_seek_to_pcm_frame(&f, offset); + case seek_origin::end: + return drmp3_seek_to_pcm_frame(&f, fmt.length + offset); + default: + return false; + } + } + +private: + std::shared_ptr<abstract_reader<>> reader; + drmp3 f; + audio_format_and_length fmt; + imax position = 0; +}; +#endif + } // namespace kfr diff --git a/include/kfr/io/dr/README.txt b/include/kfr/io/dr/README.txt @@ -1,8 +1,9 @@ -Code for reading/writing wav and reading flac files. +Code for reading/writing wav and reading flac and mp3 files. -These two files are released to public domain by the author: +These 3 files are released to public domain by the author: dr_flac.h dr_wav.h +dr_mp3.h See also information in these files. diff --git a/include/kfr/io/dr/dr_flac.h b/include/kfr/io/dr/dr_flac.h @@ -1,113 +1,228 @@ -// clang-format off -// FLAC audio decoder. Public domain. See "unlicense" statement at the end of this file. -// dr_flac - v0.10.0 - 2018-09-11 -// -// David Reid - [email protected] - -// USAGE -// -// dr_flac is a single-file library. To use it, do something like the following in one .c file. -// #define DR_FLAC_IMPLEMENTATION -// #include "dr_flac.h" -// -// You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, -// do something like the following: -// -// drflac* pFlac = drflac_open_file("MySong.flac"); -// if (pFlac == NULL) { -// // Failed to open FLAC file -// } -// -// drflac_int32* pSamples = malloc(pFlac->totalSampleCount * sizeof(drflac_int32)); -// drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_s32(pFlac, pFlac->totalSampleCount, pSamples); -// -// The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of -// channels and the bits per sample, should be directly accessible - just make sure you don't change their values. Samples are -// always output as interleaved signed 32-bit PCM. In the example above a native FLAC stream was opened, however dr_flac has -// seamless support for Ogg encapsulated FLAC streams as well. -// -// You do not need to decode the entire stream in one go - you just specify how many samples you'd like at any given time and -// the decoder will give you as many samples as it can, up to the amount requested. Later on when you need the next batch of -// samples, just call it again. Example: -// -// while (drflac_read_s32(pFlac, chunkSize, pChunkSamples) > 0) { -// do_something(); -// } -// -// You can seek to a specific sample with drflac_seek_to_sample(). The given sample is based on interleaving. So for example, -// if you were to seek to the sample at index 0 in a stereo stream, you'll be seeking to the first sample of the left channel. -// The sample at index 1 will be the first sample of the right channel. The sample at index 2 will be the second sample of the -// left channel, etc. -// -// -// If you just want to quickly decode an entire FLAC file in one go you can do something like this: -// -// unsigned int channels; -// unsigned int sampleRate; -// drflac_uint64 totalSampleCount; -// drflac_int32* pSampleData = drflac_open_and_decode_file_s32("MySong.flac", &channels, &sampleRate, &totalSampleCount); -// if (pSampleData == NULL) { -// // Failed to open and decode FLAC file. -// } -// -// ... -// -// drflac_free(pSampleData); -// -// -// You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs -// respectively, but note that these should be considered lossy. -// -// -// If you need access to metadata (album art, etc.), use drflac_open_with_metadata(), drflac_open_file_with_metdata() or -// drflac_open_memory_with_metadata(). The rationale for keeping these APIs separate is that they're slightly slower than the -// normal versions and also just a little bit harder to use. -// -// dr_flac reports metadata to the application through the use of a callback, and every metadata block is reported before -// drflac_open_with_metdata() returns. -// -// -// The main opening APIs (drflac_open(), etc.) will fail if the header is not present. The presents a problem in certain -// scenarios such as broadcast style streams like internet radio where the header may not be present because the user has -// started playback mid-stream. To handle this, use the relaxed APIs: drflac_open_relaxed() and drflac_open_with_metadata_relaxed(). -// -// It is not recommended to use these APIs for file based streams because a missing header would usually indicate a -// corrupted or perverse file. In addition, these APIs can take a long time to initialize because they may need to spend -// a lot of time finding the first frame. -// -// -// -// OPTIONS -// #define these options before including this file. -// -// #define DR_FLAC_NO_STDIO -// Disable drflac_open_file() and family. -// -// #define DR_FLAC_NO_OGG -// Disables support for Ogg/FLAC streams. -// -// #define DR_FLAC_BUFFER_SIZE <number> -// Defines the size of the internal buffer to store data from onRead(). This buffer is used to reduce the number of calls -// back to the client for more data. Larger values means more memory, but better performance. My tests show diminishing -// returns after about 4KB (which is the default). Consider reducing this if you have a very efficient implementation of -// onRead(), or increase it if it's very inefficient. Must be a multiple of 8. -// -// #define DR_FLAC_NO_CRC -// Disables CRC checks. This will offer a performance boost when CRC is unnecessary. -// -// #define DR_FLAC_NO_SIMD -// Disables SIMD optimizations (SSE on x86/x64 architectures). Use this if you are having compatibility issues with your -// compiler. -// -// -// -// QUICK NOTES -// - dr_flac does not currently support changing the sample rate nor channel count mid stream. -// - Audio data is output as signed 32-bit PCM, regardless of the bits per sample the FLAC stream is encoded as. -// - This has not been tested on big-endian architectures. -// - dr_flac is not thread-safe, but its APIs can be called from any thread so long as you do your own synchronization. -// - When using Ogg encapsulation, a corrupted metadata block will result in drflac_open_with_metadata() and drflac_open() -// returning inconsistent samples. +/* +FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file. +dr_flac - v0.12.2 - 2019-10-07 + +David Reid - [email protected] +*/ + +/* +RELEASE NOTES - v0.12.0 +======================= +Version 0.12.0 has breaking API changes including changes to the existing API and the removal of deprecated APIs. + + +Improved Client-Defined Memory Allocation +----------------------------------------- +The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The +existing system of DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE are still in place and will be used by default when no custom +allocation callbacks are specified. + +To use the new system, you pass in a pointer to a drflac_allocation_callbacks object to drflac_open() and family, like this: + + void* my_malloc(size_t sz, void* pUserData) + { + return malloc(sz); + } + void* my_realloc(void* p, size_t sz, void* pUserData) + { + return realloc(p, sz); + } + void my_free(void* p, void* pUserData) + { + free(p); + } + + ... + + drflac_allocation_callbacks allocationCallbacks; + allocationCallbacks.pUserData = &myData; + allocationCallbacks.onMalloc = my_malloc; + allocationCallbacks.onRealloc = my_realloc; + allocationCallbacks.onFree = my_free; + drflac* pFlac = drflac_open_file("my_file.flac", &allocationCallbacks); + +The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines. + +Passing in null for the allocation callbacks object will cause dr_flac to use defaults which is the same as DRFLAC_MALLOC, +DRFLAC_REALLOC and DRFLAC_FREE and the equivalent of how it worked in previous versions. + +Every API that opens a drflac object now takes this extra parameter. These include the following: + + drflac_open() + drflac_open_relaxed() + drflac_open_with_metadata() + drflac_open_with_metadata_relaxed() + drflac_open_file() + drflac_open_file_with_metadata() + drflac_open_memory() + drflac_open_memory_with_metadata() + drflac_open_and_read_pcm_frames_s32() + drflac_open_and_read_pcm_frames_s16() + drflac_open_and_read_pcm_frames_f32() + drflac_open_file_and_read_pcm_frames_s32() + drflac_open_file_and_read_pcm_frames_s16() + drflac_open_file_and_read_pcm_frames_f32() + drflac_open_memory_and_read_pcm_frames_s32() + drflac_open_memory_and_read_pcm_frames_s16() + drflac_open_memory_and_read_pcm_frames_f32() + + + +Optimizations +------------- +Seeking performance has been greatly improved. A new binary search based seeking algorithm has been introduced which significantly +improves performance over the brute force method which was used when no seek table was present. Seek table based seeking also takes +advantage of the new binary search seeking system to further improve performance there as well. Note that this depends on CRC which +means it will be disabled when DR_FLAC_NO_CRC is used. + +The SSE4.1 pipeline has been cleaned up and optimized. You should see some improvements with decoding speed of 24-bit files in +particular. 16-bit streams should also see some improvement. + +drflac_read_pcm_frames_s16() has been optimized. Previously this sat on top of drflac_read_pcm_frames_s32() and performed it's s32 +to s16 conversion in a second pass. This is now all done in a single pass. This includes SSE2 and ARM NEON optimized paths. + +A minor optimization has been implemented for drflac_read_pcm_frames_s32(). This will now use an SSE2 optimized pipeline for stereo +channel reconstruction which is the last part of the decoding process. + +The ARM build has seen a few improvements. The CLZ (count leading zeroes) and REV (byte swap) instructions are now used when +compiling with GCC and Clang which is achieved using inline assembly. The CLZ instruction requires ARM architecture version 5 at +compile time and the REV instruction requires ARM architecture version 6. + +An ARM NEON optimized pipeline has been implemented. To enable this you'll need to add -mfpu=neon to the command line when compiling. + + +Removed APIs +------------ +The following APIs were deprecated in version 0.11.0 and have been completely removed in version 0.12.0: + + drflac_read_s32() -> drflac_read_pcm_frames_s32() + drflac_read_s16() -> drflac_read_pcm_frames_s16() + drflac_read_f32() -> drflac_read_pcm_frames_f32() + drflac_seek_to_sample() -> drflac_seek_to_pcm_frame() + drflac_open_and_decode_s32() -> drflac_open_and_read_pcm_frames_s32() + drflac_open_and_decode_s16() -> drflac_open_and_read_pcm_frames_s16() + drflac_open_and_decode_f32() -> drflac_open_and_read_pcm_frames_f32() + drflac_open_and_decode_file_s32() -> drflac_open_file_and_read_pcm_frames_s32() + drflac_open_and_decode_file_s16() -> drflac_open_file_and_read_pcm_frames_s16() + drflac_open_and_decode_file_f32() -> drflac_open_file_and_read_pcm_frames_f32() + drflac_open_and_decode_memory_s32() -> drflac_open_memory_and_read_pcm_frames_s32() + drflac_open_and_decode_memory_s16() -> drflac_open_memory_and_read_pcm_frames_s16() + drflac_open_and_decode_memory_f32() -> drflac_open_memroy_and_read_pcm_frames_f32() + +Prior versions of dr_flac operated on a per-sample basis whereas now it operates on PCM frames. The removed APIs all relate +to the old per-sample APIs. You now need to use the "pcm_frame" versions. +*/ + + +/* +USAGE +===== +dr_flac is a single-file library. To use it, do something like the following in one .c file. + + #define DR_FLAC_IMPLEMENTATION + #include "dr_flac.h" + +You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, +do something like the following: + + drflac* pFlac = drflac_open_file("MySong.flac", NULL); + if (pFlac == NULL) { + // Failed to open FLAC file + } + + drflac_int32* pSamples = malloc(pFlac->totalPCMFrameCount * pFlac->channels * sizeof(drflac_int32)); + drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_pcm_frames_s32(pFlac, pFlac->totalPCMFrameCount, pSamples); + +The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of +channels and the bits per sample, should be directly accessible - just make sure you don't change their values. Samples are +always output as interleaved signed 32-bit PCM. In the example above a native FLAC stream was opened, however dr_flac has +seamless support for Ogg encapsulated FLAC streams as well. + +You do not need to decode the entire stream in one go - you just specify how many samples you'd like at any given time and +the decoder will give you as many samples as it can, up to the amount requested. Later on when you need the next batch of +samples, just call it again. Example: + + while (drflac_read_pcm_frames_s32(pFlac, chunkSizeInPCMFrames, pChunkSamples) > 0) { + do_something(); + } + +You can seek to a specific sample with drflac_seek_to_sample(). The given sample is based on interleaving. So for example, +if you were to seek to the sample at index 0 in a stereo stream, you'll be seeking to the first sample of the left channel. +The sample at index 1 will be the first sample of the right channel. The sample at index 2 will be the second sample of the +left channel, etc. + + +If you just want to quickly decode an entire FLAC file in one go you can do something like this: + + unsigned int channels; + unsigned int sampleRate; + drflac_uint64 totalPCMFrameCount; + drflac_int32* pSampleData = drflac_open_file_and_read_pcm_frames_s32("MySong.flac", &channels, &sampleRate, &totalPCMFrameCount, NULL); + if (pSampleData == NULL) { + // Failed to open and decode FLAC file. + } + + ... + + drflac_free(pSampleData); + + +You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs +respectively, but note that these should be considered lossy. + + +If you need access to metadata (album art, etc.), use drflac_open_with_metadata(), drflac_open_file_with_metdata() or +drflac_open_memory_with_metadata(). The rationale for keeping these APIs separate is that they're slightly slower than the +normal versions and also just a little bit harder to use. + +dr_flac reports metadata to the application through the use of a callback, and every metadata block is reported before +drflac_open_with_metdata() returns. + + +The main opening APIs (drflac_open(), etc.) will fail if the header is not present. The presents a problem in certain +scenarios such as broadcast style streams or internet radio where the header may not be present because the user has +started playback mid-stream. To handle this, use the relaxed APIs: drflac_open_relaxed() and drflac_open_with_metadata_relaxed(). + +It is not recommended to use these APIs for file based streams because a missing header would usually indicate a +corrupt or perverse file. In addition, these APIs can take a long time to initialize because they may need to spend +a lot of time finding the first frame. + + + +OPTIONS +======= +#define these options before including this file. + +#define DR_FLAC_NO_STDIO + Disable drflac_open_file() and family. + +#define DR_FLAC_NO_OGG + Disables support for Ogg/FLAC streams. + +#define DR_FLAC_BUFFER_SIZE <number> + Defines the size of the internal buffer to store data from onRead(). This buffer is used to reduce the number of calls + back to the client for more data. Larger values means more memory, but better performance. My tests show diminishing + returns after about 4KB (which is the default). Consider reducing this if you have a very efficient implementation of + onRead(), or increase it if it's very inefficient. Must be a multiple of 8. + +#define DR_FLAC_NO_CRC + Disables CRC checks. This will offer a performance boost when CRC is unnecessary. This will disable binary search seeking. + When seeking, the seek table will be used if available. Otherwise the seek will be performed using brute force. + +#define DR_FLAC_NO_SIMD + Disables SIMD optimizations (SSE on x86/x64 architectures, NEON on ARM architectures). Use this if you are having + compatibility issues with your compiler. + + + +QUICK NOTES +=========== +- dr_flac does not currently support changing the sample rate nor channel count mid stream. +- This has not been tested on big-endian architectures. +- dr_flac is not thread-safe, but its APIs can be called from any thread so long as you do your own synchronization. +- When using Ogg encapsulation, a corrupted metadata block will result in drflac_open_with_metadata() and drflac_open() + returning inconsistent samples. +*/ #ifndef dr_flac_h #define dr_flac_h @@ -139,9 +254,25 @@ typedef drflac_uint32 drflac_bool32; #define DRFLAC_TRUE 1 #define DRFLAC_FALSE 0 -// As data is read from the client it is placed into an internal buffer for fast access. This controls the -// size of that buffer. Larger values means more speed, but also more memory. In my testing there is diminishing -// returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8. +#if defined(_MSC_VER) && _MSC_VER >= 1700 /* Visual Studio 2012 */ + #define DRFLAC_DEPRECATED __declspec(deprecated) +#elif (defined(__GNUC__) && __GNUC__ >= 4) /* GCC 4 */ + #define DRFLAC_DEPRECATED __attribute__((deprecated)) +#elif defined(__has_feature) /* Clang */ + #if __has_feature(attribute_deprecated) + #define DRFLAC_DEPRECATED __attribute__((deprecated)) + #else + #define DRFLAC_DEPRECATED + #endif +#else + #define DRFLAC_DEPRECATED +#endif + +/* +As data is read from the client it is placed into an internal buffer for fast access. This controls the +size of that buffer. Larger values means more speed, but also more memory. In my testing there is diminishing +returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8. +*/ #ifndef DR_FLAC_BUFFER_SIZE #define DR_FLAC_BUFFER_SIZE 4096 #endif @@ -150,7 +281,7 @@ typedef drflac_uint32 drflac_bool32; extern "C" { #endif -// Check if we can enable 64-bit optimizations. +/* Check if we can enable 64-bit optimizations. */ #if defined(_WIN64) || defined(_LP64) || defined(__LP64__) #define DRFLAC_64BIT #endif @@ -161,7 +292,7 @@ typedef drflac_uint64 drflac_cache_t; typedef drflac_uint32 drflac_cache_t; #endif -// The various metadata block types. +/* The various metadata block types. */ #define DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO 0 #define DRFLAC_METADATA_BLOCK_TYPE_PADDING 1 #define DRFLAC_METADATA_BLOCK_TYPE_APPLICATION 2 @@ -171,7 +302,7 @@ typedef drflac_uint32 drflac_cache_t; #define DRFLAC_METADATA_BLOCK_TYPE_PICTURE 6 #define DRFLAC_METADATA_BLOCK_TYPE_INVALID 127 -// The various picture types specified in the PICTURE block. +/* The various picture types specified in the PICTURE block. */ #define DRFLAC_PICTURE_TYPE_OTHER 0 #define DRFLAC_PICTURE_TYPE_FILE_ICON 1 #define DRFLAC_PICTURE_TYPE_OTHER_FILE_ICON 2 @@ -207,40 +338,42 @@ typedef enum drflac_seek_origin_current } drflac_seek_origin; -// Packing is important on this structure because we map this directly to the raw data within the SEEKTABLE metadata block. +/* Packing is important on this structure because we map this directly to the raw data within the SEEKTABLE metadata block. */ #pragma pack(2) typedef struct { - drflac_uint64 firstSample; - drflac_uint64 frameOffset; // The offset from the first byte of the header of the first frame. - drflac_uint16 sampleCount; + drflac_uint64 firstPCMFrame; + drflac_uint64 flacFrameOffset; /* The offset from the first byte of the header of the first frame. */ + drflac_uint16 pcmFrameCount; } drflac_seekpoint; #pragma pack() typedef struct { - drflac_uint16 minBlockSize; - drflac_uint16 maxBlockSize; - drflac_uint32 minFrameSize; - drflac_uint32 maxFrameSize; + drflac_uint16 minBlockSizeInPCMFrames; + drflac_uint16 maxBlockSizeInPCMFrames; + drflac_uint32 minFrameSizeInPCMFrames; + drflac_uint32 maxFrameSizeInPCMFrames; drflac_uint32 sampleRate; drflac_uint8 channels; drflac_uint8 bitsPerSample; - drflac_uint64 totalSampleCount; + drflac_uint64 totalPCMFrameCount; drflac_uint8 md5[16]; } drflac_streaminfo; typedef struct { - // The metadata type. Use this to know how to interpret the data below. + /* The metadata type. Use this to know how to interpret the data below. */ drflac_uint32 type; - // A pointer to the raw data. This points to a temporary buffer so don't hold on to it. It's best to - // not modify the contents of this buffer. Use the structures below for more meaningful and structured - // information about the metadata. It's possible for this to be null. + /* + A pointer to the raw data. This points to a temporary buffer so don't hold on to it. It's best to + not modify the contents of this buffer. Use the structures below for more meaningful and structured + information about the metadata. It's possible for this to be null. + */ const void* pRawData; - // The size in bytes of the block and the buffer pointed to by pRawData if it's non-NULL. + /* The size in bytes of the block and the buffer pointed to by pRawData if it's non-NULL. */ drflac_uint32 rawDataSize; union @@ -300,40 +433,57 @@ typedef struct } drflac_metadata; -// Callback for when data needs to be read from the client. -// -// pUserData [in] The user data that was passed to drflac_open() and family. -// pBufferOut [out] The output buffer. -// bytesToRead [in] The number of bytes to read. -// -// Returns the number of bytes actually read. -// -// A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until -// either the entire bytesToRead is filled or you have reached the end of the stream. +/* +Callback for when data needs to be read from the client. + +pUserData [in] The user data that was passed to drflac_open() and family. +pBufferOut [out] The output buffer. +bytesToRead [in] The number of bytes to read. + +Returns the number of bytes actually read. + +A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until +either the entire bytesToRead is filled or you have reached the end of the stream. +*/ typedef size_t (* drflac_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead); -// Callback for when data needs to be seeked. -// -// pUserData [in] The user data that was passed to drflac_open() and family. -// offset [in] The number of bytes to move, relative to the origin. Will never be negative. -// origin [in] The origin of the seek - the current position or the start of the stream. -// -// Returns whether or not the seek was successful. -// -// The offset will never be negative. Whether or not it is relative to the beginning or current position is determined -// by the "origin" parameter which will be either drflac_seek_origin_start or drflac_seek_origin_current. +/* +Callback for when data needs to be seeked. + +pUserData [in] The user data that was passed to drflac_open() and family. +offset [in] The number of bytes to move, relative to the origin. Will never be negative. +origin [in] The origin of the seek - the current position or the start of the stream. + +Returns whether or not the seek was successful. + +The offset will never be negative. Whether or not it is relative to the beginning or current position is determined +by the "origin" parameter which will be either drflac_seek_origin_start or drflac_seek_origin_current. + +When seeking to a PCM frame using drflac_seek_to_pcm_frame(), dr_flac may call this with an offset beyond the end of +the FLAC stream. This needs to be detected and handled by returning DRFLAC_FALSE. +*/ typedef drflac_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin); -// Callback for when a metadata block is read. -// -// pUserData [in] The user data that was passed to drflac_open() and family. -// pMetadata [in] A pointer to a structure containing the data of the metadata block. -// -// Use pMetadata->type to determine which metadata block is being handled and how to read the data. +/* +Callback for when a metadata block is read. + +pUserData [in] The user data that was passed to drflac_open() and family. +pMetadata [in] A pointer to a structure containing the data of the metadata block. + +Use pMetadata->type to determine which metadata block is being handled and how to read the data. +*/ typedef void (* drflac_meta_proc)(void* pUserData, drflac_metadata* pMetadata); -// Structure for internal use. Only used for decoders opened with drflac_open_memory. +typedef struct +{ + void* pUserData; + void* (* onMalloc)(size_t sz, void* pUserData); + void* (* onRealloc)(void* p, size_t sz, void* pUserData); + void (* onFree)(void* p, void* pUserData); +} drflac_allocation_callbacks; + +/* Structure for internal use. Only used for decoders opened with drflac_open_memory. */ typedef struct { const drflac_uint8* data; @@ -341,402 +491,442 @@ typedef struct size_t currentReadPos; } drflac__memory_stream; -// Structure for internal use. Used for bit streaming. +/* Structure for internal use. Used for bit streaming. */ typedef struct { - // The function to call when more data needs to be read. + /* The function to call when more data needs to be read. */ drflac_read_proc onRead; - // The function to call when the current read position needs to be moved. + /* The function to call when the current read position needs to be moved. */ drflac_seek_proc onSeek; - // The user data to pass around to onRead and onSeek. + /* The user data to pass around to onRead and onSeek. */ void* pUserData; - // The number of unaligned bytes in the L2 cache. This will always be 0 until the end of the stream is hit. At the end of the - // stream there will be a number of bytes that don't cleanly fit in an L1 cache line, so we use this variable to know whether - // or not the bistreamer needs to run on a slower path to read those last bytes. This will never be more than sizeof(drflac_cache_t). + /* + The number of unaligned bytes in the L2 cache. This will always be 0 until the end of the stream is hit. At the end of the + stream there will be a number of bytes that don't cleanly fit in an L1 cache line, so we use this variable to know whether + or not the bistreamer needs to run on a slower path to read those last bytes. This will never be more than sizeof(drflac_cache_t). + */ size_t unalignedByteCount; - // The content of the unaligned bytes. + /* The content of the unaligned bytes. */ drflac_cache_t unalignedCache; - // The index of the next valid cache line in the "L2" cache. + /* The index of the next valid cache line in the "L2" cache. */ drflac_uint32 nextL2Line; - // The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining. + /* The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining. */ drflac_uint32 consumedBits; - // The cached data which was most recently read from the client. There are two levels of cache. Data flows as such: - // Client -> L2 -> L1. The L2 -> L1 movement is aligned and runs on a fast path in just a few instructions. + /* + The cached data which was most recently read from the client. There are two levels of cache. Data flows as such: + Client -> L2 -> L1. The L2 -> L1 movement is aligned and runs on a fast path in just a few instructions. + */ drflac_cache_t cacheL2[DR_FLAC_BUFFER_SIZE/sizeof(drflac_cache_t)]; drflac_cache_t cache; - // CRC-16. This is updated whenever bits are read from the bit stream. Manually set this to 0 to reset the CRC. For FLAC, this - // is reset to 0 at the beginning of each frame. + /* + CRC-16. This is updated whenever bits are read from the bit stream. Manually set this to 0 to reset the CRC. For FLAC, this + is reset to 0 at the beginning of each frame. + */ drflac_uint16 crc16; - drflac_cache_t crc16Cache; // A cache for optimizing CRC calculations. This is filled when when the L1 cache is reloaded. - drflac_uint32 crc16CacheIgnoredBytes; // The number of bytes to ignore when updating the CRC-16 from the CRC-16 cache. + drflac_cache_t crc16Cache; /* A cache for optimizing CRC calculations. This is filled when when the L1 cache is reloaded. */ + drflac_uint32 crc16CacheIgnoredBytes; /* The number of bytes to ignore when updating the CRC-16 from the CRC-16 cache. */ } drflac_bs; typedef struct { - // The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC. + /* The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC. */ drflac_uint8 subframeType; - // The number of wasted bits per sample as specified by the sub-frame header. + /* The number of wasted bits per sample as specified by the sub-frame header. */ drflac_uint8 wastedBitsPerSample; - // The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC. + /* The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC. */ drflac_uint8 lpcOrder; - // The number of bits per sample for this subframe. This is not always equal to the current frame's bit per sample because - // an extra bit is required for side channels when interchannel decorrelation is being used. - drflac_uint32 bitsPerSample; - - // A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData. Note that - // it's a signed 32-bit integer for each value. - drflac_int32* pDecodedSamples; + /* A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData. */ + drflac_int32* pSamplesS32; } drflac_subframe; typedef struct { - // If the stream uses variable block sizes, this will be set to the index of the first sample. If fixed block sizes are used, this will - // always be set to 0. - drflac_uint64 sampleNumber; + /* + If the stream uses variable block sizes, this will be set to the index of the first PCM frame. If fixed block sizes are used, this will + always be set to 0. + */ + drflac_uint64 pcmFrameNumber; - // If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0. - drflac_uint32 frameNumber; + /* If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0. */ + drflac_uint32 flacFrameNumber; - // The sample rate of this frame. + /* The sample rate of this frame. */ drflac_uint32 sampleRate; - // The number of samples in each sub-frame within this frame. - drflac_uint16 blockSize; + /* The number of PCM frames in each sub-frame within this frame. */ + drflac_uint16 blockSizeInPCMFrames; - // The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this - // will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE. + /* + The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this + will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE. + */ drflac_uint8 channelAssignment; - // The number of bits per sample within this frame. + /* The number of bits per sample within this frame. */ drflac_uint8 bitsPerSample; - // The frame's CRC. + /* The frame's CRC. */ drflac_uint8 crc8; } drflac_frame_header; typedef struct { - // The header. + /* The header. */ drflac_frame_header header; - // The number of samples left to be read in this frame. This is initially set to the block size multiplied by the channel count. As samples - // are read, this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame. - drflac_uint32 samplesRemaining; + /* + The number of PCM frames left to be read in this FLAC frame. This is initially set to the block size. As PCM frames are read, + this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame. + */ + drflac_uint32 pcmFramesRemaining; - // The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels. + /* The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels. */ drflac_subframe subframes[8]; } drflac_frame; typedef struct { - // The function to call when a metadata block is read. + /* The function to call when a metadata block is read. */ drflac_meta_proc onMeta; - // The user data posted to the metadata callback function. + /* The user data posted to the metadata callback function. */ void* pUserDataMD; + /* Memory allocation callbacks. */ + drflac_allocation_callbacks allocationCallbacks; + - // The sample rate. Will be set to something like 44100. + /* The sample rate. Will be set to something like 44100. */ drflac_uint32 sampleRate; - // The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the - // value specified in the STREAMINFO block. + /* + The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the + value specified in the STREAMINFO block. + */ drflac_uint8 channels; - // The bits per sample. Will be set to something like 16, 24, etc. + /* The bits per sample. Will be set to something like 16, 24, etc. */ drflac_uint8 bitsPerSample; - // The maximum block size, in samples. This number represents the number of samples in each channel (not combined). - drflac_uint16 maxBlockSize; + /* The maximum block size, in samples. This number represents the number of samples in each channel (not combined). */ + drflac_uint16 maxBlockSizeInPCMFrames; - // The total number of samples making up the stream. This includes every channel. For example, if the stream has 2 channels, - // with each channel having a total of 4096, this value will be set to 2*4096 = 8192. Can be 0 in which case it's still a - // valid stream, but just means the total sample count is unknown. Likely the case with streams like internet radio. - drflac_uint64 totalSampleCount; + /* + The total number of PCM Frames making up the stream. Can be 0 in which case it's still a valid stream, but just means + the total PCM frame count is unknown. Likely the case with streams like internet radio. + */ + drflac_uint64 totalPCMFrameCount; - // The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream. + /* The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream. */ drflac_container container; - // The number of seekpoints in the seektable. + /* The number of seekpoints in the seektable. */ drflac_uint32 seekpointCount; - // Information about the frame the decoder is currently sitting on. - drflac_frame currentFrame; + /* Information about the frame the decoder is currently sitting on. */ + drflac_frame currentFLACFrame; - // The index of the sample the decoder is currently sitting on. This is only used for seeking. - drflac_uint64 currentSample; - // The position of the first frame in the stream. This is only ever used for seeking. - drflac_uint64 firstFramePos; + /* The index of the PCM frame the decoder is currently sitting on. This is only used for seeking. */ + drflac_uint64 currentPCMFrame; + + /* The position of the first FLAC frame in the stream. This is only ever used for seeking. */ + drflac_uint64 firstFLACFramePosInBytes; - // A hack to avoid a malloc() when opening a decoder with drflac_open_memory(). + /* A hack to avoid a malloc() when opening a decoder with drflac_open_memory(). */ drflac__memory_stream memoryStream; - // A pointer to the decoded sample data. This is an offset of pExtraData. + /* A pointer to the decoded sample data. This is an offset of pExtraData. */ drflac_int32* pDecodedSamples; - // A pointer to the seek table. This is an offset of pExtraData, or NULL if there is no seek table. + /* A pointer to the seek table. This is an offset of pExtraData, or NULL if there is no seek table. */ drflac_seekpoint* pSeekpoints; - // Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData. + /* Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData. */ void* _oggbs; - // The bit streamer. The raw FLAC data is fed through this object. + /* Internal use only. Used for profiling and testing different seeking modes. */ + drflac_bool32 _noSeekTableSeek : 1; + drflac_bool32 _noBinarySearchSeek : 1; + drflac_bool32 _noBruteForceSeek : 1; + + /* The bit streamer. The raw FLAC data is fed through this object. */ drflac_bs bs; - // Variable length extra data. We attach this to the end of the object so we can avoid unnecessary mallocs. + /* Variable length extra data. We attach this to the end of the object so we can avoid unnecessary mallocs. */ drflac_uint8 pExtraData[1]; } drflac; +/* +Opens a FLAC decoder. + +onRead [in] The function to call when data needs to be read from the client. +onSeek [in] The function to call when the read position of the client data needs to move. +pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek. +pAllocationCallbacks [in, optional] A pointer to application defined callbacks for managing memory allocations. + +Returns a pointer to an object representing the decoder. + +Close the decoder with drflac_close(). + +pAllocationCallbacks can be NULL in which case it will use DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE. + +This function will automatically detect whether or not you are attempting to open a native or Ogg encapsulated +FLAC, both of which should work seamlessly without any manual intervention. Ogg encapsulation also works with +multiplexed streams which basically means it can play FLAC encoded audio tracks in videos. + +This is the lowest level function for opening a FLAC stream. You can also use drflac_open_file() and drflac_open_memory() +to open the stream from a file or from a block of memory respectively. + +The STREAMINFO block must be present for this to succeed. Use drflac_open_relaxed() to open a FLAC stream where +the header may not be present. + +See also: drflac_open_file(), drflac_open_memory(), drflac_open_with_metadata(), drflac_close() +*/ +drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +The same as drflac_open(), except attempts to open the stream even when a header block is not present. + +Because the header is not necessarily available, the caller must explicitly define the container (Native or Ogg). Do +not set this to drflac_container_unknown - that is for internal use only. + +Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never +found it will continue forever. To abort, force your onRead callback to return 0, which dr_flac will use as an +indicator that the end of the stream was found. +*/ +drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.). + +onRead [in] The function to call when data needs to be read from the client. +onSeek [in] The function to call when the read position of the client data needs to move. +onMeta [in] The function to call for every metadata block. +pUserData [in, optional] A pointer to application defined data that will be passed to onRead, onSeek and onMeta. +pAllocationCallbacks [in, optional] A pointer to application defined callbacks for managing memory allocations. + +Returns a pointer to an object representing the decoder. + +Close the decoder with drflac_close(). + +pAllocationCallbacks can be NULL in which case it will use DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE. + +This is slower than drflac_open(), so avoid this one if you don't need metadata. Internally, this will allocate and free +memory on the heap for every metadata block except for STREAMINFO and PADDING blocks. + +The caller is notified of the metadata via the onMeta callback. All metadata blocks will be handled before the function +returns. + +The STREAMINFO block must be present for this to succeed. Use drflac_open_with_metadata_relaxed() to open a FLAC +stream where the header may not be present. + +Note that this will behave inconsistently with drflac_open() if the stream is an Ogg encapsulated stream and a metadata +block is corrupted. This is due to the way the Ogg stream recovers from corrupted pages. When drflac_open_with_metadata() +is being used, the open routine will try to read the contents of the metadata block, whereas drflac_open() will simply +seek past it (for the sake of efficiency). This inconsistency can result in different samples being returned depending on +whether or not the stream is being opened with metadata. -// Opens a FLAC decoder. -// -// onRead [in] The function to call when data needs to be read from the client. -// onSeek [in] The function to call when the read position of the client data needs to move. -// pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek. -// -// Returns a pointer to an object representing the decoder. -// -// Close the decoder with drflac_close(). -// -// This function will automatically detect whether or not you are attempting to open a native or Ogg encapsulated -// FLAC, both of which should work seamlessly without any manual intervention. Ogg encapsulation also works with -// multiplexed streams which basically means it can play FLAC encoded audio tracks in videos. -// -// This is the lowest level function for opening a FLAC stream. You can also use drflac_open_file() and drflac_open_memory() -// to open the stream from a file or from a block of memory respectively. -// -// The STREAMINFO block must be present for this to succeed. Use drflac_open_relaxed() to open a FLAC stream where -// the header may not be present. -// -// See also: drflac_open_file(), drflac_open_memory(), drflac_open_with_metadata(), drflac_close() -drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData); - -// The same as drflac_open(), except attempts to open the stream even when a header block is not present. -// -// Because the header is not necessarily available, the caller must explicitly define the container (Native or Ogg). Do -// not set this to drflac_container_unknown - that is for internal use only. -// -// Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never -// found it will continue forever. To abort, force your onRead callback to return 0, which dr_flac will use as an -// indicator that the end of the stream was found. -drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData); - -// Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.). -// -// onRead [in] The function to call when data needs to be read from the client. -// onSeek [in] The function to call when the read position of the client data needs to move. -// onMeta [in] The function to call for every metadata block. -// pUserData [in, optional] A pointer to application defined data that will be passed to onRead, onSeek and onMeta. -// -// Returns a pointer to an object representing the decoder. -// -// Close the decoder with drflac_close(). -// -// This is slower than drflac_open(), so avoid this one if you don't need metadata. Internally, this will do a DRFLAC_MALLOC() -// and DRFLAC_FREE() for every metadata block except for STREAMINFO and PADDING blocks. -// -// The caller is notified of the metadata via the onMeta callback. All metadata blocks will be handled before the function -// returns. -// -// The STREAMINFO block must be present for this to succeed. Use drflac_open_with_metadata_relaxed() to open a FLAC -// stream where the header may not be present. -// -// Note that this will behave inconsistently with drflac_open() if the stream is an Ogg encapsulated stream and a metadata -// block is corrupted. This is due to the way the Ogg stream recovers from corrupted pages. When drflac_open_with_metadata() -// is being used, the open routine will try to read the contents of the metadata block, whereas drflac_open() will simply -// seek past it (for the sake of efficiency). This inconsistency can result in different samples being returned depending on -// whether or not the stream is being opened with metadata. -// -// See also: drflac_open_file_with_metadata(), drflac_open_memory_with_metadata(), drflac_open(), drflac_close() -drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData); - -// The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present. -// -// See also: drflac_open_with_metadata(), drflac_open_relaxed() -drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData); - -// Closes the given FLAC decoder. -// -// pFlac [in] The decoder to close. -// -// This will destroy the decoder object. +See also: drflac_open_file_with_metadata(), drflac_open_memory_with_metadata(), drflac_open(), drflac_close() +*/ +drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present. + +See also: drflac_open_with_metadata(), drflac_open_relaxed() +*/ +drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Closes the given FLAC decoder. + +pFlac [in] The decoder to close. + +This will destroy the decoder object. +*/ void drflac_close(drflac* pFlac); -// Reads sample data from the given FLAC decoder, output as interleaved signed 32-bit PCM. -// -// pFlac [in] The decoder. -// samplesToRead [in] The number of samples to read. -// pBufferOut [out, optional] A pointer to the buffer that will receive the decoded samples. -// -// Returns the number of samples actually read. -// -// pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of samples -// seeked. -drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int32* pBufferOut); - -// Same as drflac_read_s32(), except outputs samples as 16-bit integer PCM rather than 32-bit. -// -// pFlac [in] The decoder. -// samplesToRead [in] The number of samples to read. -// pBufferOut [out, optional] A pointer to the buffer that will receive the decoded samples. -// -// Returns the number of samples actually read. -// -// pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of samples -// seeked. -// -// Note that this is lossy for streams where the bits per sample is larger than 16. -drflac_uint64 drflac_read_s16(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int16* pBufferOut); - -// Same as drflac_read_s32(), except outputs samples as 32-bit floating-point PCM. -// -// pFlac [in] The decoder. -// samplesToRead [in] The number of samples to read. -// pBufferOut [out, optional] A pointer to the buffer that will receive the decoded samples. -// -// Returns the number of samples actually read. -// -// pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of samples -// seeked. -// -// Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly -// represent every possible number. -drflac_uint64 drflac_read_f32(drflac* pFlac, drflac_uint64 samplesToRead, float* pBufferOut); - -// Seeks to the sample at the given index. -// -// pFlac [in] The decoder. -// sampleIndex [in] The index of the sample to seek to. See notes below. -// -// Returns DRFLAC_TRUE if successful; DRFLAC_FALSE otherwise. -// -// The sample index is based on interleaving. In a stereo stream, for example, the sample at index 0 is the first sample -// in the left channel; the sample at index 1 is the first sample on the right channel, and so on. -// -// When seeking, you will likely want to ensure it's rounded to a multiple of the channel count. You can do this with -// something like drflac_seek_to_sample(pFlac, (mySampleIndex + (mySampleIndex % pFlac->channels))) -drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex); +/* +Reads sample data from the given FLAC decoder, output as interleaved signed 32-bit PCM. + +pFlac [in] The decoder. +framesToRead [in] The number of PCM frames to read. +pBufferOut [out, optional] A pointer to the buffer that will receive the decoded samples. + +Returns the number of PCM frames actually read. + +pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames +seeked. +*/ +drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut); + +/* +Same as drflac_read_pcm_frames_s32(), except outputs samples as 16-bit integer PCM rather than 32-bit. + +Note that this is lossy for streams where the bits per sample is larger than 16. +*/ +drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut); + +/* +Same as drflac_read_pcm_frames_s32(), except outputs samples as 32-bit floating-point PCM. + +Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly +represent every possible number. +*/ +drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut); + +/* +Seeks to the PCM frame at the given index. + +pFlac [in] The decoder. +pcmFrameIndex [in] The index of the PCM frame to seek to. See notes below. + +Returns DRFLAC_TRUE if successful; DRFLAC_FALSE otherwise. +*/ +drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex); #ifndef DR_FLAC_NO_STDIO -// Opens a FLAC decoder from the file at the given path. -// -// filename [in] The path of the file to open, either absolute or relative to the current directory. -// -// Returns a pointer to an object representing the decoder. -// -// Close the decoder with drflac_close(). -// -// This will hold a handle to the file until the decoder is closed with drflac_close(). Some platforms will restrict the -// number of files a process can have open at any given time, so keep this mind if you have many decoders open at the -// same time. -// -// See also: drflac_open(), drflac_open_file_with_metadata(), drflac_close() -drflac* drflac_open_file(const char* filename); - -// Opens a FLAC decoder from the file at the given path and notifies the caller of the metadata chunks (album art, etc.) -// -// Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled. -drflac* drflac_open_file_with_metadata(const char* filename, drflac_meta_proc onMeta, void* pUserData); -#endif - -// Opens a FLAC decoder from a pre-allocated block of memory -// -// This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for -// the lifetime of the decoder. -drflac* drflac_open_memory(const void* data, size_t dataSize); - -// Opens a FLAC decoder from a pre-allocated block of memory and notifies the caller of the metadata chunks (album art, etc.) -// -// Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled. -drflac* drflac_open_memory_with_metadata(const void* data, size_t dataSize, drflac_meta_proc onMeta, void* pUserData); - - - -//// High Level APIs //// - -// Opens a FLAC stream from the given callbacks and fully decodes it in a single operation. The return value is a -// pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with DRFLAC_FREE(). -// -// Sometimes a FLAC file won't keep track of the total sample count. In this situation the function will continuously -// read samples into a dynamically sized buffer on the heap until no samples are left. -// -// Do not call this function on a broadcast type of stream (like internet radio streams and whatnot). -drflac_int32* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); - -// Same as drflac_open_and_decode_s32(), except returns signed 16-bit integer samples. -drflac_int16* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); - -// Same as drflac_open_and_decode_s32(), except returns 32-bit floating-point samples. -float* drflac_open_and_decode_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +/* +Opens a FLAC decoder from the file at the given path. + +filename [in] The path of the file to open, either absolute or relative to the current directory. +pAllocationCallbacks [in, optional] A pointer to application defined callbacks for managing memory allocations. + +Returns a pointer to an object representing the decoder. + +Close the decoder with drflac_close(). + +This will hold a handle to the file until the decoder is closed with drflac_close(). Some platforms will restrict the +number of files a process can have open at any given time, so keep this mind if you have many decoders open at the +same time. + +See also: drflac_open(), drflac_open_file_with_metadata(), drflac_close() +*/ +drflac* drflac_open_file(const char* filename, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Opens a FLAC decoder from the file at the given path and notifies the caller of the metadata chunks (album art, etc.) + +Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled. +*/ +drflac* drflac_open_file_with_metadata(const char* filename, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); +#endif + +/* +Opens a FLAC decoder from a pre-allocated block of memory + +This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for +the lifetime of the decoder. +*/ +drflac* drflac_open_memory(const void* data, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Opens a FLAC decoder from a pre-allocated block of memory and notifies the caller of the metadata chunks (album art, etc.) + +Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled. +*/ +drflac* drflac_open_memory_with_metadata(const void* data, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + + + +/* High Level APIs */ + +/* +Opens a FLAC stream from the given callbacks and fully decodes it in a single operation. The return value is a +pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with drflac_free(). + +You can pass in custom memory allocation callbacks via the pAllocationCallbacks parameter. This can be NULL in which +case it will use DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE. + +Sometimes a FLAC file won't keep track of the total sample count. In this situation the function will continuously +read samples into a dynamically sized buffer on the heap until no samples are left. + +Do not call this function on a broadcast type of stream (like internet radio streams and whatnot). +*/ +drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* Same as drflac_open_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */ +drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* Same as drflac_open_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */ +float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); #ifndef DR_FLAC_NO_STDIO -// Same as drflac_open_and_decode_s32() except opens the decoder from a file. -drflac_int32* drflac_open_and_decode_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a file. */ +drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); -// Same as drflac_open_and_decode_file_s32(), except returns signed 16-bit integer samples. -drflac_int16* drflac_open_and_decode_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */ +drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); -// Same as drflac_open_and_decode_file_f32(), except returns 32-bit floating-point samples. -float* drflac_open_and_decode_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */ +float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); #endif -// Same as drflac_open_and_decode_s32() except opens the decoder from a block of memory. -drflac_int32* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a block of memory. */ +drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */ +drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); -// Same as drflac_open_and_decode_memory_s32(), except returns signed 16-bit integer samples. -drflac_int16* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */ +float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); -// Same as drflac_open_and_decode_memory_s32(), except returns 32-bit floating-point samples. -float* drflac_open_and_decode_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +/* +Frees memory that was allocated internally by dr_flac. -// Frees memory that was allocated internally by dr_flac. -void drflac_free(void* p); +Set pAllocationCallbacks to the same object that was passed to drflac_open_*_and_read_pcm_frames_*(). If you originally passed in NULL, pass in NULL for this. +*/ +void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks); -// Structure representing an iterator for vorbis comments in a VORBIS_COMMENT metadata block. +/* Structure representing an iterator for vorbis comments in a VORBIS_COMMENT metadata block. */ typedef struct { drflac_uint32 countRemaining; const char* pRunningData; } drflac_vorbis_comment_iterator; -// Initializes a vorbis comment iterator. This can be used for iterating over the vorbis comments in a VORBIS_COMMENT -// metadata block. +/* +Initializes a vorbis comment iterator. This can be used for iterating over the vorbis comments in a VORBIS_COMMENT +metadata block. +*/ void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments); -// Goes to the next vorbis comment in the given iterator. If null is returned it means there are no more comments. The -// returned string is NOT null terminated. +/* +Goes to the next vorbis comment in the given iterator. If null is returned it means there are no more comments. The +returned string is NOT null terminated. +*/ const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut); -// Structure representing an iterator for cuesheet tracks in a CUESHEET metadata block. +/* Structure representing an iterator for cuesheet tracks in a CUESHEET metadata block. */ typedef struct { drflac_uint32 countRemaining; const char* pRunningData; } drflac_cuesheet_track_iterator; -// Packing is important on this structure because we map this directly to the raw data within the CUESHEET metadata block. +/* Packing is important on this structure because we map this directly to the raw data within the CUESHEET metadata block. */ #pragma pack(4) typedef struct { @@ -757,27 +947,39 @@ typedef struct const drflac_cuesheet_track_index* pIndexPoints; } drflac_cuesheet_track; -// Initializes a cuesheet track iterator. This can be used for iterating over the cuesheet tracks in a CUESHEET metadata -// block. +/* +Initializes a cuesheet track iterator. This can be used for iterating over the cuesheet tracks in a CUESHEET metadata +block. +*/ void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData); -// Goes to the next cuesheet track in the given iterator. If DRFLAC_FALSE is returned it means there are no more comments. +/* Goes to the next cuesheet track in the given iterator. If DRFLAC_FALSE is returned it means there are no more comments. */ drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack); - #ifdef __cplusplus } #endif -#endif //dr_flac_h +#endif /* dr_flac_h */ + +/************************************************************************************************************************************************************ + ************************************************************************************************************************************************************ -/////////////////////////////////////////////////////////////////////////////// -// -// IMPLEMENTATION -// -/////////////////////////////////////////////////////////////////////////////// + IMPLEMENTATION + + ************************************************************************************************************************************************************ + ************************************************************************************************************************************************************/ #ifdef DR_FLAC_IMPLEMENTATION + +/* Disable some annoying warnings. */ +#if defined(__GNUC__) + #pragma GCC diagnostic push + #if __GNUC__ >= 7 + #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" + #endif +#endif + #ifdef __linux__ #ifndef _BSD_SOURCE #define _BSD_SOURCE @@ -791,7 +993,26 @@ drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, #include <stdlib.h> #include <string.h> -// CPU architecture. +#ifdef _MSC_VER + #define DRFLAC_INLINE __forceinline +#elif defined(__GNUC__) + /* + I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when + the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some + case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the + command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue + I am using "__inline__" only when we're compiling in strict ANSI mode. + */ + #if defined(__STRICT_ANSI__) + #define DRFLAC_INLINE __inline__ __attribute__((always_inline)) + #else + #define DRFLAC_INLINE inline __attribute__((always_inline)) + #endif +#else + #define DRFLAC_INLINE +#endif + +/* CPU architecture. */ #if defined(__x86_64__) || defined(_M_X64) #define DRFLAC_X64 #elif defined(__i386) || defined(_M_IX86) @@ -800,7 +1021,63 @@ drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, #define DRFLAC_ARM #endif -// Compile-time CPU feature support. +/* Intrinsics Support */ +#if !defined(DR_FLAC_NO_SIMD) + #if defined(DRFLAC_X64) || defined(DRFLAC_X86) + #if defined(_MSC_VER) && !defined(__clang__) + /* MSVC. */ + #if _MSC_VER >= 1400 && !defined(DRFLAC_NO_SSE2) /* 2005 */ + #define DRFLAC_SUPPORT_SSE2 + #endif + #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41) /* 2010 */ + #define DRFLAC_SUPPORT_SSE41 + #endif + #else + /* Assume GNUC-style. */ + #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2) + #define DRFLAC_SUPPORT_SSE2 + #endif + #if defined(__SSE4_1__) && !defined(DRFLAC_NO_SSE41) + #define DRFLAC_SUPPORT_SSE41 + #endif + #endif + + /* If at this point we still haven't determined compiler support for the intrinsics just fall back to __has_include. */ + #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include) + #if !defined(DRFLAC_SUPPORT_SSE2) && !defined(DRFLAC_NO_SSE2) && __has_include(<emmintrin.h>) + #define DRFLAC_SUPPORT_SSE2 + #endif + #if !defined(DRFLAC_SUPPORT_SSE41) && !defined(DRFLAC_NO_SSE41) && __has_include(<smmintrin.h>) + #define DRFLAC_SUPPORT_SSE41 + #endif + #endif + + #if defined(DRFLAC_SUPPORT_SSE41) + #include <smmintrin.h> + #elif defined(DRFLAC_SUPPORT_SSE2) + #include <emmintrin.h> + #endif + #endif + + #if defined(DRFLAC_ARM) + #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)) + #define DRFLAC_SUPPORT_NEON + #endif + + /* Fall back to looking for the #include file. */ + #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include) + #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>) + #define DRFLAC_SUPPORT_NEON + #endif + #endif + + #if defined(DRFLAC_SUPPORT_NEON) + #include <arm_neon.h> + #endif + #endif +#endif + +/* Compile-time CPU feature support. */ #if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) #if defined(_MSC_VER) && !defined(__clang__) #if _MSC_VER >= 1400 @@ -816,11 +1093,13 @@ drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, #if defined(__GNUC__) || defined(__clang__) static void drflac__cpuid(int info[4], int fid) { - // It looks like the -fPIC option uses the ebx register which GCC complains about. We can work around this by just using a different register, the - // specific register of which I'm letting the compiler decide on. The "k" prefix is used to specify a 32-bit register. The {...} syntax is for - // supporting different assembly dialects. - // - // What's basically happening is that we're saving and restoring the ebx register manually. + /* + It looks like the -fPIC option uses the ebx register which GCC complains about. We can work around this by just using a different register, the + specific register of which I'm letting the compiler decide on. The "k" prefix is used to specify a 32-bit register. The {...} syntax is for + supporting different assembly dialects. + + What's basically happening is that we're saving and restoring the ebx register manually. + */ #if defined(DRFLAC_X86) && defined(__PIC__) __asm__ __volatile__ ( "xchg{l} {%%}ebx, %k1;" @@ -842,14 +1121,66 @@ drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, #define DRFLAC_NO_CPUID #endif +static DRFLAC_INLINE drflac_bool32 drflac_has_sse2() +{ +#if defined(DRFLAC_SUPPORT_SSE2) + #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE2) + #if defined(DRFLAC_X64) + return DRFLAC_TRUE; /* 64-bit targets always support SSE2. */ + #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__) + return DRFLAC_TRUE; /* If the compiler is allowed to freely generate SSE2 code we can assume support. */ + #else + #if defined(DRFLAC_NO_CPUID) + return DRFLAC_FALSE; + #else + int info[4]; + drflac__cpuid(info, 1); + return (info[3] & (1 << 26)) != 0; + #endif + #endif + #else + return DRFLAC_FALSE; /* SSE2 is only supported on x86 and x64 architectures. */ + #endif +#else + return DRFLAC_FALSE; /* No compiler support. */ +#endif +} + +static DRFLAC_INLINE drflac_bool32 drflac_has_sse41() +{ +#if defined(DRFLAC_SUPPORT_SSE41) + #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41) + #if defined(DRFLAC_X64) + return DRFLAC_TRUE; /* 64-bit targets always support SSE4.1. */ + #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__) + return DRFLAC_TRUE; /* If the compiler is allowed to freely generate SSE41 code we can assume support. */ + #else + #if defined(DRFLAC_NO_CPUID) + return DRFLAC_FALSE; + #else + int info[4]; + drflac__cpuid(info, 1); + return (info[2] & (1 << 19)) != 0; + #endif + #endif + #else + return DRFLAC_FALSE; /* SSE41 is only supported on x86 and x64 architectures. */ + #endif +#else + return DRFLAC_FALSE; /* No compiler support. */ +#endif +} + #if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) #define DRFLAC_HAS_LZCNT_INTRINSIC #elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) #define DRFLAC_HAS_LZCNT_INTRINSIC #elif defined(__clang__) - #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl) - #define DRFLAC_HAS_LZCNT_INTRINSIC + #if defined(__has_builtin) + #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl) + #define DRFLAC_HAS_LZCNT_INTRINSIC + #endif #endif #endif @@ -858,14 +1189,16 @@ drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, #define DRFLAC_HAS_BYTESWAP32_INTRINSIC #define DRFLAC_HAS_BYTESWAP64_INTRINSIC #elif defined(__clang__) - #if __has_builtin(__builtin_bswap16) - #define DRFLAC_HAS_BYTESWAP16_INTRINSIC - #endif - #if __has_builtin(__builtin_bswap32) - #define DRFLAC_HAS_BYTESWAP32_INTRINSIC - #endif - #if __has_builtin(__builtin_bswap64) - #define DRFLAC_HAS_BYTESWAP64_INTRINSIC + #if defined(__has_builtin) + #if __has_builtin(__builtin_bswap16) + #define DRFLAC_HAS_BYTESWAP16_INTRINSIC + #endif + #if __has_builtin(__builtin_bswap32) + #define DRFLAC_HAS_BYTESWAP32_INTRINSIC + #endif + #if __has_builtin(__builtin_bswap64) + #define DRFLAC_HAS_BYTESWAP64_INTRINSIC + #endif #endif #elif defined(__GNUC__) #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) @@ -878,7 +1211,7 @@ drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, #endif -// Standard library stuff. +/* Standard library stuff. */ #ifndef DRFLAC_ASSERT #include <assert.h> #define DRFLAC_ASSERT(expression) assert(expression) @@ -899,21 +1232,11 @@ drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, #define DRFLAC_ZERO_MEMORY(p, sz) memset((p), 0, (sz)) #endif -#define DRFLAC_MAX_SIMD_VECTOR_SIZE 64 // 64 for AVX-512 in the future. - -#ifdef _MSC_VER -#define DRFLAC_INLINE __forceinline -#else -#ifdef __GNUC__ -#define DRFLAC_INLINE inline __attribute__((always_inline)) -#else -#define DRFLAC_INLINE inline -#endif -#endif +#define DRFLAC_MAX_SIMD_VECTOR_SIZE 64 /* 64 for AVX-512 in the future. */ typedef drflac_int32 drflac_result; #define DRFLAC_SUCCESS 0 -#define DRFLAC_ERROR -1 // A generic error. +#define DRFLAC_ERROR -1 /* A generic error. */ #define DRFLAC_INVALID_ARGS -2 #define DRFLAC_END_OF_STREAM -128 #define DRFLAC_CRC_MISMATCH -129 @@ -932,37 +1255,96 @@ typedef drflac_int32 drflac_result; #define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE 9 #define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE 10 - #define drflac_align(x, a) ((((x) + (a) - 1) / (a)) * (a)) -#define drflac_assert DRFLAC_ASSERT -#define drflac_copy_memory DRFLAC_COPY_MEMORY -#define drflac_zero_memory DRFLAC_ZERO_MEMORY -// CPU caps. +/* CPU caps. */ +#if defined(__has_feature) + #if __has_feature(thread_sanitizer) + #define DRFLAC_NO_THREAD_SANITIZE __attribute__((no_sanitize("thread"))) + #else + #define DRFLAC_NO_THREAD_SANITIZE + #endif +#else + #define DRFLAC_NO_THREAD_SANITIZE +#endif + +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) static drflac_bool32 drflac__gIsLZCNTSupported = DRFLAC_FALSE; -#ifndef DRFLAC_NO_CPUID -static drflac_bool32 drflac__gIsSSE42Supported = DRFLAC_FALSE; -static void drflac__init_cpu_caps() -{ - int info[4] = {0}; +#endif - // LZCNT - drflac__cpuid(info, 0x80000001); - drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0; +#ifndef DRFLAC_NO_CPUID +static drflac_bool32 drflac__gIsSSE2Supported = DRFLAC_FALSE; +static drflac_bool32 drflac__gIsSSE41Supported = DRFLAC_FALSE; - // SSE4.2 - drflac__cpuid(info, 1); - drflac__gIsSSE42Supported = (info[2] & (1 << 19)) != 0; +/* +I've had a bug report that Clang's ThreadSanitizer presents a warning in this function. Having reviewed this, this does +actually make sense. However, since CPU caps should never differ for a running process, I don't think the trade off of +complicating internal API's by passing around CPU caps versus just disabling the warnings is worthwhile. I'm therefore +just going to disable these warnings. This is disabled via the DRFLAC_NO_THREAD_SANITIZE attribute. +*/ +DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps() +{ + static drflac_bool32 isCPUCapsInitialized = DRFLAC_FALSE; + + if (!isCPUCapsInitialized) { + int info[4] = {0}; + + /* LZCNT */ +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) + drflac__cpuid(info, 0x80000001); + drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0; +#endif + + /* SSE2 */ + drflac__gIsSSE2Supported = drflac_has_sse2(); + + /* SSE4.1 */ + drflac__gIsSSE41Supported = drflac_has_sse41(); + + /* Initialized. */ + isCPUCapsInitialized = DRFLAC_TRUE; + } +} +#else +static drflac_bool32 drflac__gIsNEONSupported = DRFLAC_FALSE; + +static DRFLAC_INLINE drflac_bool32 drflac__has_neon() +{ +#if defined(DRFLAC_SUPPORT_NEON) + #if defined(DRFLAC_ARM) && !defined(DRFLAC_NO_NEON) + #if (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)) + return DRFLAC_TRUE; /* If the compiler is allowed to freely generate NEON code we can assume support. */ + #else + /* TODO: Runtime check. */ + return DRFLAC_FALSE; + #endif + #else + return DRFLAC_FALSE; /* NEON is only supported on ARM architectures. */ + #endif +#else + return DRFLAC_FALSE; /* No compiler support. */ +#endif +} + +DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps() +{ + drflac__gIsNEONSupported = drflac__has_neon(); + +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) + drflac__gIsLZCNTSupported = DRFLAC_TRUE; +#endif } #endif -//// Endian Management //// +/* Endian Management */ static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian() { #if defined(DRFLAC_X86) || defined(DRFLAC_X64) return DRFLAC_TRUE; +#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN + return DRFLAC_TRUE; #else int n = 1; return (*(char*)&n) == 1; @@ -991,7 +1373,20 @@ static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n) #if defined(_MSC_VER) return _byteswap_ulong(n); #elif defined(__GNUC__) || defined(__clang__) - return __builtin_bswap32(n); + #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRFLAC_64BIT) /* <-- 64-bit inline assembly has not been tested, so disabling for now. */ + /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */ + drflac_uint32 r; + __asm__ __volatile__ ( + #if defined(DRFLAC_64BIT) + "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n) /* <-- This is untested. If someone in the community could test this, that would be appreciated! */ + #else + "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n) + #endif + ); + return r; + #else + return __builtin_bswap32(n); + #endif #else #error "This compiler does not support the byte swap intrinsic." #endif @@ -1028,55 +1423,39 @@ static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n) static DRFLAC_INLINE drflac_uint16 drflac__be2host_16(drflac_uint16 n) { -#ifdef __linux__ - return be16toh(n); -#else if (drflac__is_little_endian()) { return drflac__swap_endian_uint16(n); } return n; -#endif } static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n) { -#ifdef __linux__ - return be32toh(n); -#else if (drflac__is_little_endian()) { return drflac__swap_endian_uint32(n); } return n; -#endif } static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n) { -#ifdef __linux__ - return be64toh(n); -#else if (drflac__is_little_endian()) { return drflac__swap_endian_uint64(n); } return n; -#endif } static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n) { -#ifdef __linux__ - return le32toh(n); -#else if (!drflac__is_little_endian()) { return drflac__swap_endian_uint32(n); } return n; -#endif } @@ -1093,7 +1472,7 @@ static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n) -// The CRC code below is based on this document: http://zlib.net/crc_v3.txt +/* The CRC code below is based on this document: http://zlib.net/crc_v3.txt */ static drflac_uint8 drflac__crc8_table[] = { 0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D, 0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65, 0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D, @@ -1155,8 +1534,6 @@ static DRFLAC_INLINE drflac_uint8 drflac_crc8_byte(drflac_uint8 crc, drflac_uint static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 data, drflac_uint32 count) { - drflac_assert(count <= 32); - #ifdef DR_FLAC_NO_CRC (void)crc; (void)data; @@ -1164,7 +1541,7 @@ static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 da return 0; #else #if 0 - // REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc8(crc, 0, 8);") + /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc8(crc, 0, 8);") */ drflac_uint8 p = 0x07; for (int i = count-1; i >= 0; --i) { drflac_uint8 bit = (data & (1 << i)) >> i; @@ -1176,13 +1553,19 @@ static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 da } return crc; #else - drflac_uint32 wholeBytes = count >> 3; - drflac_uint32 leftoverBits = count - (wholeBytes*8); + drflac_uint32 wholeBytes; + drflac_uint32 leftoverBits; + drflac_uint64 leftoverDataMask; static drflac_uint64 leftoverDataMaskTable[8] = { 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F }; - drflac_uint64 leftoverDataMask = leftoverDataMaskTable[leftoverBits]; + + DRFLAC_ASSERT(count <= 32); + + wholeBytes = count >> 3; + leftoverBits = count - (wholeBytes*8); + leftoverDataMask = leftoverDataMaskTable[leftoverBits]; switch (wholeBytes) { case 4: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits))); @@ -1201,6 +1584,22 @@ static DRFLAC_INLINE drflac_uint16 drflac_crc16_byte(drflac_uint16 crc, drflac_u return (crc << 8) ^ drflac__crc16_table[(drflac_uint8)(crc >> 8) ^ data]; } +static DRFLAC_INLINE drflac_uint16 drflac_crc16_cache(drflac_uint16 crc, drflac_cache_t data) +{ +#ifdef DRFLAC_64BIT + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF)); +#endif + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 8) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 0) & 0xFF)); + + return crc; +} + static DRFLAC_INLINE drflac_uint16 drflac_crc16_bytes(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 byteCount) { switch (byteCount) @@ -1220,10 +1619,9 @@ static DRFLAC_INLINE drflac_uint16 drflac_crc16_bytes(drflac_uint16 crc, drflac_ return crc; } +#if 0 static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac_uint32 data, drflac_uint32 count) { - drflac_assert(count <= 64); - #ifdef DR_FLAC_NO_CRC (void)crc; (void)data; @@ -1231,7 +1629,7 @@ static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac return 0; #else #if 0 - // REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc16(crc, 0, 16);") + /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc16(crc, 0, 16);") */ drflac_uint16 p = 0x8005; for (int i = count-1; i >= 0; --i) { drflac_uint16 bit = (data & (1ULL << i)) >> i; @@ -1244,13 +1642,19 @@ static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac return crc; #else - drflac_uint32 wholeBytes = count >> 3; - drflac_uint32 leftoverBits = count - (wholeBytes*8); + drflac_uint32 wholeBytes; + drflac_uint32 leftoverBits; + drflac_uint64 leftoverDataMask; static drflac_uint64 leftoverDataMaskTable[8] = { 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F }; - drflac_uint64 leftoverDataMask = leftoverDataMaskTable[leftoverBits]; + + DRFLAC_ASSERT(count <= 64); + + wholeBytes = count >> 3; + leftoverBits = count & 7; + leftoverDataMask = leftoverDataMaskTable[leftoverBits]; switch (wholeBytes) { default: @@ -1267,32 +1671,36 @@ static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac static DRFLAC_INLINE drflac_uint16 drflac_crc16__64bit(drflac_uint16 crc, drflac_uint64 data, drflac_uint32 count) { - drflac_assert(count <= 64); - #ifdef DR_FLAC_NO_CRC (void)crc; (void)data; (void)count; return 0; #else - drflac_uint32 wholeBytes = count >> 3; - drflac_uint32 leftoverBits = count - (wholeBytes*8); + drflac_uint32 wholeBytes; + drflac_uint32 leftoverBits; + drflac_uint64 leftoverDataMask; static drflac_uint64 leftoverDataMaskTable[8] = { 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F }; - drflac_uint64 leftoverDataMask = leftoverDataMaskTable[leftoverBits]; + + DRFLAC_ASSERT(count <= 64); + + wholeBytes = count >> 3; + leftoverBits = count & 7; + leftoverDataMask = leftoverDataMaskTable[leftoverBits]; switch (wholeBytes) { default: - case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0xFF00000000000000 << leftoverBits)) >> (56 + leftoverBits))); - case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x00FF000000000000 << leftoverBits)) >> (48 + leftoverBits))); - case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x0000FF0000000000 << leftoverBits)) >> (40 + leftoverBits))); - case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x000000FF00000000 << leftoverBits)) >> (32 + leftoverBits))); - case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x00000000FF000000 << leftoverBits)) >> (24 + leftoverBits))); - case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x0000000000FF0000 << leftoverBits)) >> (16 + leftoverBits))); - case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x000000000000FF00 << leftoverBits)) >> ( 8 + leftoverBits))); - case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x00000000000000FF << leftoverBits)) >> ( 0 + leftoverBits))); + case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000 << 32) << leftoverBits)) >> (56 + leftoverBits))); /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */ + case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000 << 32) << leftoverBits)) >> (48 + leftoverBits))); + case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00 << 32) << leftoverBits)) >> (40 + leftoverBits))); + case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF << 32) << leftoverBits)) >> (32 + leftoverBits))); + case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000 ) << leftoverBits)) >> (24 + leftoverBits))); + case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000 ) << leftoverBits)) >> (16 + leftoverBits))); + case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00 ) << leftoverBits)) >> ( 8 + leftoverBits))); + case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF ) << leftoverBits)) >> ( 0 + leftoverBits))); case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)]; } return crc; @@ -1308,6 +1716,7 @@ static DRFLAC_INLINE drflac_uint16 drflac_crc16(drflac_uint16 crc, drflac_cache_ return drflac_crc16__32bit(crc, data, count); #endif } +#endif #ifdef DRFLAC_64BIT @@ -1316,13 +1725,15 @@ static DRFLAC_INLINE drflac_uint16 drflac_crc16(drflac_uint16 crc, drflac_cache_ #define drflac__be2host__cache_line drflac__be2host_32 #endif -// BIT READING ATTEMPT #2 -// -// This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting -// on the most significant bit. It uses the notion of an L1 and L2 cache (borrowed from CPU architecture), where the L1 cache -// is a 32- or 64-bit unsigned integer (depending on whether or not a 32- or 64-bit build is being compiled) and the L2 is an -// array of "cache lines", with each cache line being the same size as the L1. The L2 is a buffer of about 4KB and is where data -// from onRead() is read into. +/* +BIT READING ATTEMPT #2 + +This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting +on the most significant bit. It uses the notion of an L1 and L2 cache (borrowed from CPU architecture), where the L1 cache +is a 32- or 64-bit unsigned integer (depending on whether or not a 32- or 64-bit build is being compiled) and the L2 is an +array of "cache lines", with each cache line being the same size as the L1. The L2 is a buffer of about 4KB and is where data +from onRead() is read into. +*/ #define DRFLAC_CACHE_L1_SIZE_BYTES(bs) (sizeof((bs)->cache)) #define DRFLAC_CACHE_L1_SIZE_BITS(bs) (sizeof((bs)->cache)*8) #define DRFLAC_CACHE_L1_BITS_REMAINING(bs) (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (bs)->consumedBits) @@ -1345,25 +1756,33 @@ static DRFLAC_INLINE void drflac__reset_crc16(drflac_bs* bs) static DRFLAC_INLINE void drflac__update_crc16(drflac_bs* bs) { - bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache, DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bs->crc16CacheIgnoredBytes); - bs->crc16CacheIgnoredBytes = 0; + if (bs->crc16CacheIgnoredBytes == 0) { + bs->crc16 = drflac_crc16_cache(bs->crc16, bs->crc16Cache); + } else { + bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache, DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bs->crc16CacheIgnoredBytes); + bs->crc16CacheIgnoredBytes = 0; + } } static DRFLAC_INLINE drflac_uint16 drflac__flush_crc16(drflac_bs* bs) { - // We should never be flushing in a situation where we are not aligned on a byte boundary. - drflac_assert((DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7) == 0); + /* We should never be flushing in a situation where we are not aligned on a byte boundary. */ + DRFLAC_ASSERT((DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7) == 0); - // The bits that were read from the L1 cache need to be accumulated. The number of bytes needing to be accumulated is determined - // by the number of bits that have been consumed. + /* + The bits that were read from the L1 cache need to be accumulated. The number of bytes needing to be accumulated is determined + by the number of bits that have been consumed. + */ if (DRFLAC_CACHE_L1_BITS_REMAINING(bs) == 0) { drflac__update_crc16(bs); } else { - // We only accumulate the consumed bits. + /* We only accumulate the consumed bits. */ bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache >> DRFLAC_CACHE_L1_BITS_REMAINING(bs), (bs->consumedBits >> 3) - bs->crc16CacheIgnoredBytes); - // The bits that we just accumulated should never be accumulated again. We need to keep track of how many bytes were accumulated - // so we can handle that later. + /* + The bits that we just accumulated should never be accumulated again. We need to keep track of how many bytes were accumulated + so we can handle that later. + */ bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3; } @@ -1373,19 +1792,24 @@ static DRFLAC_INLINE drflac_uint16 drflac__flush_crc16(drflac_bs* bs) static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs) { - // Fast path. Try loading straight from L2. + size_t bytesRead; + size_t alignedL1LineCount; + + /* Fast path. Try loading straight from L2. */ if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { bs->cache = bs->cacheL2[bs->nextL2Line++]; return DRFLAC_TRUE; } - // If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client, if there's - // any left. + /* + If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client, if there's + any left. + */ if (bs->unalignedByteCount > 0) { - return DRFLAC_FALSE; // If we have any unaligned bytes it means there's no more aligned bytes left in the client. + return DRFLAC_FALSE; /* If we have any unaligned bytes it means there's no more aligned bytes left in the client. */ } - size_t bytesRead = bs->onRead(bs->pUserData, bs->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES(bs)); + bytesRead = bs->onRead(bs->pUserData, bs->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES(bs)); bs->nextL2Line = 0; if (bytesRead == DRFLAC_CACHE_L2_SIZE_BYTES(bs)) { @@ -1394,13 +1818,15 @@ static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs } - // If we get here it means we were unable to retrieve enough data to fill the entire L2 cache. It probably - // means we've just reached the end of the file. We need to move the valid data down to the end of the buffer - // and adjust the index of the next line accordingly. Also keep in mind that the L2 cache must be aligned to - // the size of the L1 so we'll need to seek backwards by any misaligned bytes. - size_t alignedL1LineCount = bytesRead / DRFLAC_CACHE_L1_SIZE_BYTES(bs); + /* + If we get here it means we were unable to retrieve enough data to fill the entire L2 cache. It probably + means we've just reached the end of the file. We need to move the valid data down to the end of the buffer + and adjust the index of the next line accordingly. Also keep in mind that the L2 cache must be aligned to + the size of the L1 so we'll need to seek backwards by any misaligned bytes. + */ + alignedL1LineCount = bytesRead / DRFLAC_CACHE_L1_SIZE_BYTES(bs); - // We need to keep track of any unaligned bytes for later use. + /* We need to keep track of any unaligned bytes for later use. */ bs->unalignedByteCount = bytesRead - (alignedL1LineCount * DRFLAC_CACHE_L1_SIZE_BYTES(bs)); if (bs->unalignedByteCount > 0) { bs->unalignedCache = bs->cacheL2[alignedL1LineCount]; @@ -1408,7 +1834,8 @@ static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs if (alignedL1LineCount > 0) { size_t offset = DRFLAC_CACHE_L2_LINE_COUNT(bs) - alignedL1LineCount; - for (size_t i = alignedL1LineCount; i > 0; --i) { + size_t i; + for (i = alignedL1LineCount; i > 0; --i) { bs->cacheL2[i-1 + offset] = bs->cacheL2[i-1]; } @@ -1416,7 +1843,7 @@ static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs bs->cache = bs->cacheL2[bs->nextL2Line++]; return DRFLAC_TRUE; } else { - // If we get into this branch it means we weren't able to load any L1-aligned data. + /* If we get into this branch it means we weren't able to load any L1-aligned data. */ bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs); return DRFLAC_FALSE; } @@ -1424,11 +1851,13 @@ static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs static drflac_bool32 drflac__reload_cache(drflac_bs* bs) { + size_t bytesRead; + #ifndef DR_FLAC_NO_CRC drflac__update_crc16(bs); #endif - // Fast path. Try just moving the next value in the L2 cache to the L1 cache. + /* Fast path. Try just moving the next value in the L2 cache to the L1 cache. */ if (drflac__reload_l1_cache_from_l2(bs)) { bs->cache = drflac__be2host__cache_line(bs->cache); bs->consumedBits = 0; @@ -1438,23 +1867,25 @@ static drflac_bool32 drflac__reload_cache(drflac_bs* bs) return DRFLAC_TRUE; } - // Slow path. + /* Slow path. */ - // If we get here it means we have failed to load the L1 cache from the L2. Likely we've just reached the end of the stream and the last - // few bytes did not meet the alignment requirements for the L2 cache. In this case we need to fall back to a slower path and read the - // data from the unaligned cache. - size_t bytesRead = bs->unalignedByteCount; + /* + If we get here it means we have failed to load the L1 cache from the L2. Likely we've just reached the end of the stream and the last + few bytes did not meet the alignment requirements for the L2 cache. In this case we need to fall back to a slower path and read the + data from the unaligned cache. + */ + bytesRead = bs->unalignedByteCount; if (bytesRead == 0) { - bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs); // <-- The stream has been exhausted, so marked the bits as consumed. + bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs); /* <-- The stream has been exhausted, so marked the bits as consumed. */ return DRFLAC_FALSE; } - drflac_assert(bytesRead < DRFLAC_CACHE_L1_SIZE_BYTES(bs)); + DRFLAC_ASSERT(bytesRead < DRFLAC_CACHE_L1_SIZE_BYTES(bs)); bs->consumedBits = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bytesRead) * 8; bs->cache = drflac__be2host__cache_line(bs->unalignedCache); - bs->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_BITS_REMAINING(bs)); // <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property. - bs->unalignedByteCount = 0; // <-- At this point the unaligned bytes have been moved into the cache and we thus have no more unaligned bytes. + bs->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_BITS_REMAINING(bs)); /* <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property. */ + bs->unalignedByteCount = 0; /* <-- At this point the unaligned bytes have been moved into the cache and we thus have no more unaligned bytes. */ #ifndef DR_FLAC_NO_CRC bs->crc16Cache = bs->cache >> bs->consumedBits; @@ -1465,10 +1896,10 @@ static drflac_bool32 drflac__reload_cache(drflac_bs* bs) static void drflac__reset_cache(drflac_bs* bs) { - bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs); // <-- This clears the L2 cache. - bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs); // <-- This clears the L1 cache. + bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs); /* <-- This clears the L2 cache. */ + bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs); /* <-- This clears the L1 cache. */ bs->cache = 0; - bs->unalignedByteCount = 0; // <-- This clears the trailing unaligned bytes. + bs->unalignedByteCount = 0; /* <-- This clears the trailing unaligned bytes. */ bs->unalignedCache = 0; #ifndef DR_FLAC_NO_CRC @@ -1480,10 +1911,10 @@ static void drflac__reset_cache(drflac_bs* bs) static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned int bitCount, drflac_uint32* pResultOut) { - drflac_assert(bs != NULL); - drflac_assert(pResultOut != NULL); - drflac_assert(bitCount > 0); - drflac_assert(bitCount <= 32); + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResultOut != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 32); if (bs->consumedBits == DRFLAC_CACHE_L1_SIZE_BITS(bs)) { if (!drflac__reload_cache(bs)) { @@ -1492,9 +1923,11 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i } if (bitCount <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { - // If we want to load all 32-bits from a 32-bit cache we need to do it slightly differently because we can't do - // a 32-bit shift on a 32-bit integer. This will never be the case on 64-bit caches, so we can have a slightly - // more optimal solution for this. + /* + If we want to load all 32-bits from a 32-bit cache we need to do it slightly differently because we can't do + a 32-bit shift on a 32-bit integer. This will never be the case on 64-bit caches, so we can have a slightly + more optimal solution for this. + */ #ifdef DRFLAC_64BIT *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount); bs->consumedBits += bitCount; @@ -1505,7 +1938,7 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i bs->consumedBits += bitCount; bs->cache <<= bitCount; } else { - // Cannot shift by 32-bits, so need to do it differently. + /* Cannot shift by 32-bits, so need to do it differently. */ *pResultOut = (drflac_uint32)bs->cache; bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs); bs->cache = 0; @@ -1514,7 +1947,7 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i return DRFLAC_TRUE; } else { - // It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. + /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */ drflac_uint32 bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING(bs); drflac_uint32 bitCountLo = bitCount - bitCountHi; drflac_uint32 resultHi = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi); @@ -1532,17 +1965,19 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, drflac_int32* pResult) { - drflac_assert(bs != NULL); - drflac_assert(pResult != NULL); - drflac_assert(bitCount > 0); - drflac_assert(bitCount <= 32); - drflac_uint32 result; + drflac_uint32 signbit; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResult != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 32); + if (!drflac__read_uint32(bs, bitCount, &result)) { return DRFLAC_FALSE; } - drflac_uint32 signbit = ((result >> (bitCount-1)) & 0x01); + signbit = ((result >> (bitCount-1)) & 0x01); result |= (~signbit + 1) << bitCount; *pResult = (drflac_int32)result; @@ -1552,15 +1987,16 @@ static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, dr #ifdef DRFLAC_64BIT static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, drflac_uint64* pResultOut) { - drflac_assert(bitCount <= 64); - drflac_assert(bitCount > 32); - drflac_uint32 resultHi; + drflac_uint32 resultLo; + + DRFLAC_ASSERT(bitCount <= 64); + DRFLAC_ASSERT(bitCount > 32); + if (!drflac__read_uint32(bs, bitCount - 32, &resultHi)) { return DRFLAC_FALSE; } - drflac_uint32 resultLo; if (!drflac__read_uint32(bs, 32, &resultLo)) { return DRFLAC_FALSE; } @@ -1570,18 +2006,20 @@ static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, d } #endif -// Function below is unused, but leaving it here in case I need to quickly add it again. +/* Function below is unused, but leaving it here in case I need to quickly add it again. */ #if 0 static drflac_bool32 drflac__read_int64(drflac_bs* bs, unsigned int bitCount, drflac_int64* pResultOut) { - drflac_assert(bitCount <= 64); - drflac_uint64 result; + drflac_uint64 signbit; + + DRFLAC_ASSERT(bitCount <= 64); + if (!drflac__read_uint64(bs, bitCount, &result)) { return DRFLAC_FALSE; } - drflac_uint64 signbit = ((result >> (bitCount-1)) & 0x01); + signbit = ((result >> (bitCount-1)) & 0x01); result |= (~signbit + 1) << bitCount; *pResultOut = (drflac_int64)result; @@ -1591,12 +2029,13 @@ static drflac_bool32 drflac__read_int64(drflac_bs* bs, unsigned int bitCount, dr static drflac_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, drflac_uint16* pResult) { - drflac_assert(bs != NULL); - drflac_assert(pResult != NULL); - drflac_assert(bitCount > 0); - drflac_assert(bitCount <= 16); - drflac_uint32 result; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResult != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 16); + if (!drflac__read_uint32(bs, bitCount, &result)) { return DRFLAC_FALSE; } @@ -1608,12 +2047,13 @@ static drflac_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, d #if 0 static drflac_bool32 drflac__read_int16(drflac_bs* bs, unsigned int bitCount, drflac_int16* pResult) { - drflac_assert(bs != NULL); - drflac_assert(pResult != NULL); - drflac_assert(bitCount > 0); - drflac_assert(bitCount <= 16); - drflac_int32 result; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResult != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 16); + if (!drflac__read_int32(bs, bitCount, &result)) { return DRFLAC_FALSE; } @@ -1625,12 +2065,13 @@ static drflac_bool32 drflac__read_int16(drflac_bs* bs, unsigned int bitCount, dr static drflac_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, drflac_uint8* pResult) { - drflac_assert(bs != NULL); - drflac_assert(pResult != NULL); - drflac_assert(bitCount > 0); - drflac_assert(bitCount <= 8); - drflac_uint32 result; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResult != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 8); + if (!drflac__read_uint32(bs, bitCount, &result)) { return DRFLAC_FALSE; } @@ -1641,12 +2082,13 @@ static drflac_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, dr static drflac_bool32 drflac__read_int8(drflac_bs* bs, unsigned int bitCount, drflac_int8* pResult) { - drflac_assert(bs != NULL); - drflac_assert(pResult != NULL); - drflac_assert(bitCount > 0); - drflac_assert(bitCount <= 8); - drflac_int32 result; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResult != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 8); + if (!drflac__read_int32(bs, bitCount, &result)) { return DRFLAC_FALSE; } @@ -1663,12 +2105,12 @@ static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek) bs->cache <<= bitsToSeek; return DRFLAC_TRUE; } else { - // It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here. + /* It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here. */ bitsToSeek -= DRFLAC_CACHE_L1_BITS_REMAINING(bs); bs->consumedBits += DRFLAC_CACHE_L1_BITS_REMAINING(bs); bs->cache = 0; - // Simple case. Seek in groups of the same number as bits that fit within a cache line. + /* Simple case. Seek in groups of the same number as bits that fit within a cache line. */ #ifdef DRFLAC_64BIT while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) { drflac_uint64 bin; @@ -1687,7 +2129,7 @@ static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek) } #endif - // Whole leftover bytes. + /* Whole leftover bytes. */ while (bitsToSeek >= 8) { drflac_uint8 bin; if (!drflac__read_uint8(bs, 8, &bin)) { @@ -1696,38 +2138,41 @@ static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek) bitsToSeek -= 8; } - // Leftover bits. + /* Leftover bits. */ if (bitsToSeek > 0) { drflac_uint8 bin; if (!drflac__read_uint8(bs, (drflac_uint32)bitsToSeek, &bin)) { return DRFLAC_FALSE; } - bitsToSeek = 0; // <-- Necessary for the assert below. + bitsToSeek = 0; /* <-- Necessary for the assert below. */ } - drflac_assert(bitsToSeek == 0); + DRFLAC_ASSERT(bitsToSeek == 0); return DRFLAC_TRUE; } } -// This function moves the bit streamer to the first bit after the sync code (bit 15 of the of the frame header). It will also update the CRC-16. +/* This function moves the bit streamer to the first bit after the sync code (bit 15 of the of the frame header). It will also update the CRC-16. */ static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs) { - drflac_assert(bs != NULL); + DRFLAC_ASSERT(bs != NULL); - // The sync code is always aligned to 8 bits. This is convenient for us because it means we can do byte-aligned movements. The first - // thing to do is align to the next byte. + /* + The sync code is always aligned to 8 bits. This is convenient for us because it means we can do byte-aligned movements. The first + thing to do is align to the next byte. + */ if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) { return DRFLAC_FALSE; } for (;;) { + drflac_uint8 hi; + #ifndef DR_FLAC_NO_CRC drflac__reset_crc16(bs); #endif - drflac_uint8 hi; if (!drflac__read_uint8(bs, 8, &hi)) { return DRFLAC_FALSE; } @@ -1748,12 +2193,12 @@ static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs) } } - // Should never get here. - //return DRFLAC_FALSE; + /* Should never get here. */ + /*return DRFLAC_FALSE;*/ } -#if !defined(DR_FLAC_NO_SIMD) && defined(DRFLAC_HAS_LZCNT_INTRINSIC) +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) #define DRFLAC_IMPLEMENT_CLZ_LZCNT #endif #if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86)) @@ -1762,6 +2207,7 @@ static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs) static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x) { + drflac_uint32 n; static drflac_uint32 clz_table_4[] = { 0, 4, @@ -1770,13 +2216,17 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x) 1, 1, 1, 1, 1, 1, 1, 1 }; - drflac_uint32 n = clz_table_4[x >> (sizeof(x)*8 - 4)]; + if (x == 0) { + return sizeof(x)*8; + } + + n = clz_table_4[x >> (sizeof(x)*8 - 4)]; if (n == 0) { #ifdef DRFLAC_64BIT - if ((x & 0xFFFFFFFF00000000ULL) == 0) { n = 32; x <<= 32; } - if ((x & 0xFFFF000000000000ULL) == 0) { n += 16; x <<= 16; } - if ((x & 0xFF00000000000000ULL) == 0) { n += 8; x <<= 8; } - if ((x & 0xF000000000000000ULL) == 0) { n += 4; x <<= 4; } + if ((x & ((drflac_uint64)0xFFFFFFFF << 32)) == 0) { n = 32; x <<= 32; } + if ((x & ((drflac_uint64)0xFFFF0000 << 32)) == 0) { n += 16; x <<= 16; } + if ((x & ((drflac_uint64)0xFF000000 << 32)) == 0) { n += 8; x <<= 8; } + if ((x & ((drflac_uint64)0xF0000000 << 32)) == 0) { n += 4; x <<= 4; } #else if ((x & 0xFFFF0000) == 0) { n = 16; x <<= 16; } if ((x & 0xFF000000) == 0) { n += 8; x <<= 8; } @@ -1791,11 +2241,16 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x) #ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported() { - // If the compiler itself does not support the intrinsic then we'll need to return false. -#ifdef DRFLAC_HAS_LZCNT_INTRINSIC - return drflac__gIsLZCNTSupported; + /* Fast compile time check for ARM. */ +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) + return DRFLAC_TRUE; #else - return DRFLAC_FALSE; + /* If the compiler itself does not support the intrinsic then we'll need to return false. */ + #ifdef DRFLAC_HAS_LZCNT_INTRINSIC + return drflac__gIsLZCNTSupported; + #else + return DRFLAC_FALSE; + #endif #endif } @@ -1809,13 +2264,49 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x) #endif #else #if defined(__GNUC__) || defined(__clang__) - #ifdef DRFLAC_64BIT - return (drflac_uint32)__builtin_clzll((unsigned long long)x); + #if defined(DRFLAC_X64) + { + drflac_uint64 r; + __asm__ __volatile__ ( + "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x) + ); + + return (drflac_uint32)r; + } + #elif defined(DRFLAC_X86) + { + drflac_uint32 r; + __asm__ __volatile__ ( + "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x) + ); + + return r; + } + #elif defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) && !defined(DRFLAC_64BIT) /* <-- I haven't tested 64-bit inline assembly, so only enabling this for the 32-bit build for now. */ + { + unsigned int r; + __asm__ __volatile__ ( + #if defined(DRFLAC_64BIT) + "clz %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(x) /* <-- This is untested. If someone in the community could test this, that would be appreciated! */ + #else + "clz %[out], %[in]" : [out]"=r"(r) : [in]"r"(x) + #endif + ); + + return r; + } #else - return (drflac_uint32)__builtin_clzl((unsigned long)x); + if (x == 0) { + return sizeof(x)*8; + } + #ifdef DRFLAC_64BIT + return (drflac_uint32)__builtin_clzll((drflac_uint64)x); + #else + return (drflac_uint32)__builtin_clzl((drflac_uint32)x); + #endif #endif #else - // Unsupported compiler. + /* Unsupported compiler. */ #error "This compiler does not support the lzcnt intrinsic." #endif #endif @@ -1823,11 +2314,16 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x) #endif #ifdef DRFLAC_IMPLEMENT_CLZ_MSVC -#include <intrin.h> // For BitScanReverse(). +#include <intrin.h> /* For BitScanReverse(). */ static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x) { drflac_uint32 n; + + if (x == 0) { + return sizeof(x)*8; + } + #ifdef DRFLAC_64BIT _BitScanReverse64((unsigned long*)&n, x); #else @@ -1839,7 +2335,6 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x) static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x) { - // This function assumes at least one bit is set. Checking for 0 needs to be done at a higher level, outside this function. #ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT if (drflac__is_lzcnt_supported()) { return drflac__clz_lzcnt(x); @@ -1855,9 +2350,11 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x) } -static inline drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut) +static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut) { drflac_uint32 zeroCounter = 0; + drflac_uint32 setBitOffsetPlus1; + while (bs->cache == 0) { zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs); if (!drflac__reload_cache(bs)) { @@ -1865,7 +2362,7 @@ static inline drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsign } } - drflac_uint32 setBitOffsetPlus1 = drflac__clz(bs->cache); + setBitOffsetPlus1 = drflac__clz(bs->cache); setBitOffsetPlus1 += 1; bs->consumedBits += setBitOffsetPlus1; @@ -1879,12 +2376,14 @@ static inline drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsign static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFromStart) { - drflac_assert(bs != NULL); - drflac_assert(offsetFromStart > 0); + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(offsetFromStart > 0); - // Seeking from the start is not quite as trivial as it sounds because the onSeek callback takes a signed 32-bit integer (which - // is intentional because it simplifies the implementation of the onSeek callbacks), however offsetFromStart is unsigned 64-bit. - // To resolve we just need to do an initial seek from the start, and then a series of offset seeks to make up the remainder. + /* + Seeking from the start is not quite as trivial as it sounds because the onSeek callback takes a signed 32-bit integer (which + is intentional because it simplifies the implementation of the onSeek callbacks), however offsetFromStart is unsigned 64-bit. + To resolve we just need to do an initial seek from the start, and then a series of offset seeks to make up the remainder. + */ if (offsetFromStart > 0x7FFFFFFF) { drflac_uint64 bytesRemaining = offsetFromStart; if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) { @@ -1910,7 +2409,7 @@ static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFro } } - // The cache should be reset to force a reload of fresh data from the client. + /* The cache should be reset to force a reload of fresh data from the client. */ drflac__reset_cache(bs); return DRFLAC_TRUE; } @@ -1918,12 +2417,18 @@ static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFro static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64* pNumberOut, drflac_uint8* pCRCOut) { - drflac_assert(bs != NULL); - drflac_assert(pNumberOut != NULL); + drflac_uint8 crc; + drflac_uint64 result; + unsigned char utf8[7] = {0}; + int byteCount; + int i; - drflac_uint8 crc = *pCRCOut; + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pNumberOut != NULL); + DRFLAC_ASSERT(pCRCOut != NULL); + + crc = *pCRCOut; - unsigned char utf8[7] = {0}; if (!drflac__read_uint8(bs, 8, utf8)) { *pNumberOut = 0; return DRFLAC_END_OF_STREAM; @@ -1936,7 +2441,7 @@ static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64 return DRFLAC_SUCCESS; } - int byteCount = 1; + /*byteCount = 1;*/ if ((utf8[0] & 0xE0) == 0xC0) { byteCount = 2; } else if ((utf8[0] & 0xF0) == 0xE0) { @@ -1951,14 +2456,14 @@ static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64 byteCount = 7; } else { *pNumberOut = 0; - return DRFLAC_CRC_MISMATCH; // Bad UTF-8 encoding. + return DRFLAC_CRC_MISMATCH; /* Bad UTF-8 encoding. */ } - // Read extra bytes. - drflac_assert(byteCount > 1); + /* Read extra bytes. */ + DRFLAC_ASSERT(byteCount > 1); - drflac_uint64 result = (drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1))); - for (int i = 1; i < byteCount; ++i) { + result = (drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1))); + for (i = 1; i < byteCount; ++i) { if (!drflac__read_uint8(bs, 8, utf8 + i)) { *pNumberOut = 0; return DRFLAC_END_OF_STREAM; @@ -1975,20 +2480,21 @@ static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64 +/* +The next two functions are responsible for calculating the prediction. -// The next two functions are responsible for calculating the prediction. -// -// When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's -// safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16. +When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's +safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16. +*/ static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) { - drflac_assert(order <= 32); + drflac_int32 prediction = 0; - // 32-bit version. + DRFLAC_ASSERT(order <= 32); - // VC++ optimizes this to a single jmp. I've not yet verified this for other compilers. - drflac_int32 prediction = 0; + /* 32-bit version. */ + /* VC++ optimizes this to a single jmp. I've not yet verified this for other compilers. */ switch (order) { case 32: prediction += coefficients[31] * pDecodedSamples[-32]; @@ -2030,13 +2536,14 @@ static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) { - drflac_assert(order <= 32); + drflac_int64 prediction; - // 64-bit version. + DRFLAC_ASSERT(order <= 32); - // This method is faster on the 32-bit build when compiling with VC++. See note below. + /* 64-bit version. */ + + /* This method is faster on the 32-bit build when compiling with VC++. See note below. */ #ifndef DRFLAC_64BIT - drflac_int64 prediction; if (order == 8) { prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; @@ -2153,18 +2660,21 @@ static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 } else { + int j; + prediction = 0; - for (int j = 0; j < (int)order; ++j) { + for (j = 0; j < (int)order; ++j) { prediction += coefficients[j] * (drflac_int64)pDecodedSamples[-j-1]; } } #endif - // VC++ optimizes this to a single jmp instruction, but only the 64-bit build. The 32-bit build generates less efficient code for some - // reason. The ugly version above is faster so we'll just switch between the two depending on the target platform. + /* + VC++ optimizes this to a single jmp instruction, but only the 64-bit build. The 32-bit build generates less efficient code for some + reason. The ugly version above is faster so we'll just switch between the two depending on the target platform. + */ #ifdef DRFLAC_64BIT - drflac_int64 prediction = 0; - + prediction = 0; switch (order) { case 32: prediction += coefficients[31] * (drflac_int64)pDecodedSamples[-32]; @@ -2205,16 +2715,21 @@ static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 return (drflac_int32)(prediction >> shift); } + #if 0 -// Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the -// sake of readability and should only be used as a reference. +/* +Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the +sake of readability and should only be used as a reference. +*/ static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { - drflac_assert(bs != NULL); - drflac_assert(count > 0); - drflac_assert(pSamplesOut != NULL); + drflac_uint32 i; - for (drflac_uint32 i = 0; i < count; ++i) { + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(count > 0); + DRFLAC_ASSERT(pSamplesOut != NULL); + + for (i = 0; i < count; ++i) { drflac_uint32 zeroCounter = 0; for (;;) { drflac_uint8 bit; @@ -2246,7 +2761,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drfla } - if (bitsPerSample > 16) { + if (bitsPerSample+shift >= 32) { pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i); } else { pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i); @@ -2261,6 +2776,8 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drfla static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) { drflac_uint32 zeroCounter = 0; + drflac_uint32 decodedRice; + for (;;) { drflac_uint8 bit; if (!drflac__read_uint8(bs, 1, &bit)) { @@ -2274,7 +2791,6 @@ static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_ui } } - drflac_uint32 decodedRice; if (riceParam > 0) { if (!drflac__read_uint32(bs, riceParam, &decodedRice)) { return DRFLAC_FALSE; @@ -2289,13 +2805,20 @@ static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_ui } #endif +#if 0 static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) { - drflac_assert(riceParam > 0); // <-- riceParam should never be 0. drflac__read_rice_parts__param_equals_zero() should be used instead for this case. + drflac_cache_t riceParamMask; + drflac_uint32 zeroCounter; + drflac_uint32 setBitOffsetPlus1; + drflac_uint32 riceParamPart; + drflac_uint32 riceLength; - drflac_cache_t riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam); + DRFLAC_ASSERT(riceParam > 0); /* <-- riceParam should never be 0. drflac__read_rice_parts__param_equals_zero() should be used instead for this case. */ - drflac_uint32 zeroCounter = 0; + riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam); + + zeroCounter = 0; while (bs->cache == 0) { zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs); if (!drflac__reload_cache(bs)) { @@ -2303,25 +2826,26 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac } } - drflac_uint32 setBitOffsetPlus1 = drflac__clz(bs->cache); + setBitOffsetPlus1 = drflac__clz(bs->cache); zeroCounter += setBitOffsetPlus1; setBitOffsetPlus1 += 1; - - drflac_uint32 riceParamPart; - drflac_uint32 riceLength = setBitOffsetPlus1 + riceParam; + riceLength = setBitOffsetPlus1 + riceParam; if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { riceParamPart = (drflac_uint32)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceLength)); bs->consumedBits += riceLength; bs->cache <<= riceLength; } else { + drflac_uint32 bitCountLo; + drflac_cache_t resultHi; + bs->consumedBits += riceLength; - bs->cache <<= setBitOffsetPlus1 & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1); // <-- Equivalent to "if (setBitOffsetPlus1 < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { bs->cache <<= setBitOffsetPlus1; }" + bs->cache <<= setBitOffsetPlus1 & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1); /* <-- Equivalent to "if (setBitOffsetPlus1 < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { bs->cache <<= setBitOffsetPlus1; }" */ - // It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. - drflac_uint32 bitCountLo = bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs); - drflac_cache_t resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, riceParam); // <-- Use DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE() if ever this function allows riceParam=0. + /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */ + bitCountLo = bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs); + resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, riceParam); /* <-- Use DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE() if ever this function allows riceParam=0. */ if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { #ifndef DR_FLAC_NO_CRC @@ -2333,7 +2857,7 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac bs->crc16Cache = bs->cache; #endif } else { - // Slow path. We need to fetch more data from the client. + /* Slow path. We need to fetch more data from the client. */ if (!drflac__reload_cache(bs)) { return DRFLAC_FALSE; } @@ -2345,154 +2869,278 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac bs->cache <<= bitCountLo; } - *pZeroCounterOut = zeroCounter; - *pRiceParamPartOut = riceParamPart; + pZeroCounterOut[0] = zeroCounter; + pRiceParamPartOut[0] = riceParamPart; + return DRFLAC_TRUE; } +#endif -static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts__param_equals_zero(drflac_bs* bs, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) -{ - drflac_cache_t riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(0); +static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) +{ + drflac_uint32 riceParamPlus1 = riceParam + 1; + /*drflac_cache_t riceParamPlus1Mask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParamPlus1);*/ + drflac_uint32 riceParamPlus1Shift = DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPlus1); + drflac_uint32 riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1; + + /* + The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have + no idea how this will work in practice... + */ + drflac_cache_t bs_cache = bs->cache; + drflac_uint32 bs_consumedBits = bs->consumedBits; + + /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */ + drflac_uint32 lzcount = drflac__clz(bs_cache); + if (lzcount < sizeof(bs_cache)*8) { + pZeroCounterOut[0] = lzcount; + + /* + It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting + this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled + outside of this function at a higher level. + */ + extract_rice_param_part: + bs_cache <<= lzcount; + bs_consumedBits += lzcount; + + if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) { + /* Getting here means the rice parameter part is wholly contained within the current cache line. */ + pRiceParamPartOut[0] = (drflac_uint32)(bs_cache >> riceParamPlus1Shift); + bs_cache <<= riceParamPlus1; + bs_consumedBits += riceParamPlus1; + } else { + drflac_uint32 riceParamPartHi; + drflac_uint32 riceParamPartLo; + drflac_uint32 riceParamPartLoBitCount; + + /* + Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache + line, reload the cache, and then combine it with the head of the next cache line. + */ + + /* Grab the high part of the rice parameter part. */ + riceParamPartHi = (drflac_uint32)(bs_cache >> riceParamPlus1Shift); + + /* Before reloading the cache we need to grab the size in bits of the low part. */ + riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits; + DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32); + + /* Now reload the cache. */ + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = riceParamPartLoBitCount; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + /* Slow path. We need to fetch more data from the client. */ + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } - drflac_uint32 zeroCounter = 0; - while (bs->cache == 0) { - zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs); - if (!drflac__reload_cache(bs)) { - return DRFLAC_FALSE; + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount; + } + + /* We should now have enough information to construct the rice parameter part. */ + riceParamPartLo = (drflac_uint32)(bs_cache >> (DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPartLoBitCount))); + pRiceParamPartOut[0] = riceParamPartHi | riceParamPartLo; + + bs_cache <<= riceParamPartLoBitCount; } - } + } else { + /* + Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call + to drflac__clz() and we need to reload the cache. + */ + drflac_uint32 zeroCounter = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs_consumedBits); + for (;;) { + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = 0; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + /* Slow path. We need to fetch more data from the client. */ + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } - drflac_uint32 setBitOffsetPlus1 = drflac__clz(bs->cache); - zeroCounter += setBitOffsetPlus1; - setBitOffsetPlus1 += 1; + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits; + } + lzcount = drflac__clz(bs_cache); + zeroCounter += lzcount; - drflac_uint32 riceParamPart; - drflac_uint32 riceLength = setBitOffsetPlus1; - if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { - riceParamPart = (drflac_uint32)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceLength)); + if (lzcount < sizeof(bs_cache)*8) { + break; + } + } - bs->consumedBits += riceLength; - bs->cache <<= riceLength; - } else { - // It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. - drflac_uint32 bitCountLo = riceLength + bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs); + pZeroCounterOut[0] = zeroCounter; + goto extract_rice_param_part; + } - if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { -#ifndef DR_FLAC_NO_CRC - drflac__update_crc16(bs); -#endif - bs->cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); - bs->consumedBits = 0; -#ifndef DR_FLAC_NO_CRC - bs->crc16Cache = bs->cache; -#endif + /* Make sure the cache is restored at the end of it all. */ + bs->cache = bs_cache; + bs->consumedBits = bs_consumedBits; + + return DRFLAC_TRUE; +} + +static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac_uint8 riceParam) +{ + drflac_uint32 riceParamPlus1 = riceParam + 1; + drflac_uint32 riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1; + + /* + The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have + no idea how this will work in practice... + */ + drflac_cache_t bs_cache = bs->cache; + drflac_uint32 bs_consumedBits = bs->consumedBits; + + /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */ + drflac_uint32 lzcount = drflac__clz(bs_cache); + if (lzcount < sizeof(bs_cache)*8) { + /* + It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting + this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled + outside of this function at a higher level. + */ + extract_rice_param_part: + bs_cache <<= lzcount; + bs_consumedBits += lzcount; + + if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) { + /* Getting here means the rice parameter part is wholly contained within the current cache line. */ + bs_cache <<= riceParamPlus1; + bs_consumedBits += riceParamPlus1; } else { - // Slow path. We need to fetch more data from the client. - if (!drflac__reload_cache(bs)) { - return DRFLAC_FALSE; + /* + Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache + line, reload the cache, and then combine it with the head of the next cache line. + */ + + /* Before reloading the cache we need to grab the size in bits of the low part. */ + drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits; + DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32); + + /* Now reload the cache. */ + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = riceParamPartLoBitCount; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + /* Slow path. We need to fetch more data from the client. */ + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount; } + + bs_cache <<= riceParamPartLoBitCount; } + } else { + /* + Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call + to drflac__clz() and we need to reload the cache. + */ + for (;;) { + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = 0; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + /* Slow path. We need to fetch more data from the client. */ + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } - riceParamPart = (drflac_uint32)(DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo)); + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits; + } - bs->consumedBits += bitCountLo; - bs->cache <<= bitCountLo; + lzcount = drflac__clz(bs_cache); + if (lzcount < sizeof(bs_cache)*8) { + break; + } + } + + goto extract_rice_param_part; } - *pZeroCounterOut = zeroCounter; - *pRiceParamPartOut = riceParamPart; + /* Make sure the cache is restored at the end of it all. */ + bs->cache = bs_cache; + bs->consumedBits = bs_consumedBits; + return DRFLAC_TRUE; } -static drflac_bool32 drflac__decode_samples_with_residual__rice__simple(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorder(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { - drflac_assert(bs != NULL); - drflac_assert(count > 0); - drflac_assert(pSamplesOut != NULL); - - static drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; - + drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; drflac_uint32 zeroCountPart0; - drflac_uint32 zeroCountPart1; - drflac_uint32 zeroCountPart2; - drflac_uint32 zeroCountPart3; drflac_uint32 riceParamPart0; - drflac_uint32 riceParamPart1; - drflac_uint32 riceParamPart2; - drflac_uint32 riceParamPart3; - drflac_uint32 i4 = 0; - drflac_uint32 count4 = count >> 2; - while (i4 < count4) { - // Rice extraction. - if (!drflac__read_rice_parts(bs, riceParam, &zeroCountPart0, &riceParamPart0) || - !drflac__read_rice_parts(bs, riceParam, &zeroCountPart1, &riceParamPart1) || - !drflac__read_rice_parts(bs, riceParam, &zeroCountPart2, &riceParamPart2) || - !drflac__read_rice_parts(bs, riceParam, &zeroCountPart3, &riceParamPart3)) { - return DRFLAC_FALSE; - } - - riceParamPart0 |= (zeroCountPart0 << riceParam); - riceParamPart1 |= (zeroCountPart1 << riceParam); - riceParamPart2 |= (zeroCountPart2 << riceParam); - riceParamPart3 |= (zeroCountPart3 << riceParam); + drflac_uint32 riceParamMask; + drflac_uint32 i; - riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; - riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; - riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; - riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(count > 0); + DRFLAC_ASSERT(pSamplesOut != NULL); - if (bitsPerSample > 16) { - pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); - pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1); - pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2); - pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3); - } else { - pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); - pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1); - pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2); - pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3); - } + (void)bitsPerSample; + (void)order; + (void)shift; + (void)coefficients; - i4 += 1; - pSamplesOut += 4; - } + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); - drflac_uint32 i = i4 << 2; + i = 0; while (i < count) { - // Rice extraction. - if (!drflac__read_rice_parts(bs, riceParam, &zeroCountPart0, &riceParamPart0)) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) { return DRFLAC_FALSE; } - // Rice reconstruction. + /* Rice reconstruction. */ + riceParamPart0 &= riceParamMask; riceParamPart0 |= (zeroCountPart0 << riceParam); riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; - //riceParamPart0 = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1); - // Sample reconstruction. - if (bitsPerSample > 16) { - pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); - } else { - pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); - } + pSamplesOut[i] = riceParamPart0; i += 1; - pSamplesOut += 1; } return DRFLAC_TRUE; } -static drflac_bool32 drflac__decode_samples_with_residual__rice__param_equals_zero(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { - drflac_assert(bs != NULL); - drflac_assert(count > 0); - drflac_assert(pSamplesOut != NULL); - - static drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; - + drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; drflac_uint32 zeroCountPart0; drflac_uint32 zeroCountPart1; drflac_uint32 zeroCountPart2; @@ -2501,56 +3149,104 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__param_equals_ze drflac_uint32 riceParamPart1; drflac_uint32 riceParamPart2; drflac_uint32 riceParamPart3; - drflac_uint32 i4 = 0; - drflac_uint32 count4 = count >> 2; - while (i4 < count4) { - // Rice extraction. - if (!drflac__read_rice_parts__param_equals_zero(bs, &zeroCountPart0, &riceParamPart0) || - !drflac__read_rice_parts__param_equals_zero(bs, &zeroCountPart1, &riceParamPart1) || - !drflac__read_rice_parts__param_equals_zero(bs, &zeroCountPart2, &riceParamPart2) || - !drflac__read_rice_parts__param_equals_zero(bs, &zeroCountPart3, &riceParamPart3)) { - return DRFLAC_FALSE; - } + drflac_uint32 riceParamMask; + const drflac_int32* pSamplesOutEnd; + drflac_uint32 i; - riceParamPart0 |= zeroCountPart0; - riceParamPart1 |= zeroCountPart1; - riceParamPart2 |= zeroCountPart2; - riceParamPart3 |= zeroCountPart3; + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(count > 0); + DRFLAC_ASSERT(pSamplesOut != NULL); - riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; - riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; - riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; - riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; + if (order == 0) { + return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + } + + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); + pSamplesOutEnd = pSamplesOut + (count & ~3); + + if (bitsPerSample+shift > 32) { + while (pSamplesOut < pSamplesOutEnd) { + /* + Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version + against an array. Not sure why, but perhaps it's making more efficient use of registers? + */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) { + return DRFLAC_FALSE; + } + + riceParamPart0 &= riceParamMask; + riceParamPart1 &= riceParamMask; + riceParamPart2 &= riceParamMask; + riceParamPart3 &= riceParamMask; + + riceParamPart0 |= (zeroCountPart0 << riceParam); + riceParamPart1 |= (zeroCountPart1 << riceParam); + riceParamPart2 |= (zeroCountPart2 << riceParam); + riceParamPart3 |= (zeroCountPart3 << riceParam); + + riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; + riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; + riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; + + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); + pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1); + pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2); + pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3); + + pSamplesOut += 4; + } + } else { + while (pSamplesOut < pSamplesOutEnd) { + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) { + return DRFLAC_FALSE; + } + + riceParamPart0 &= riceParamMask; + riceParamPart1 &= riceParamMask; + riceParamPart2 &= riceParamMask; + riceParamPart3 &= riceParamMask; + + riceParamPart0 |= (zeroCountPart0 << riceParam); + riceParamPart1 |= (zeroCountPart1 << riceParam); + riceParamPart2 |= (zeroCountPart2 << riceParam); + riceParamPart3 |= (zeroCountPart3 << riceParam); + + riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; + riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; + riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; - if (bitsPerSample > 16) { - pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); - pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1); - pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2); - pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3); - } else { pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1); pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2); pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3); - } - i4 += 1; - pSamplesOut += 4; + pSamplesOut += 4; + } } - drflac_uint32 i = i4 << 2; + i = (count & ~3); while (i < count) { - // Rice extraction. - if (!drflac__read_rice_parts__param_equals_zero(bs, &zeroCountPart0, &riceParamPart0)) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) { return DRFLAC_FALSE; } - // Rice reconstruction. - riceParamPart0 |= zeroCountPart0; + /* Rice reconstruction. */ + riceParamPart0 &= riceParamMask; + riceParamPart0 |= (zeroCountPart0 << riceParam); riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + /*riceParamPart0 = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/ - // Sample reconstruction. - if (bitsPerSample > 16) { + /* Sample reconstruction. */ + if (bitsPerSample+shift > 32) { pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); } else { pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); @@ -2563,3212 +3259,7072 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__param_equals_ze return DRFLAC_TRUE; } -static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE __m128i drflac__mm_packs_interleaved_epi32(__m128i a, __m128i b) { -#if 0 - return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); -#else - if (riceParam != 0) { - return drflac__decode_samples_with_residual__rice__simple(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); - } else { - return drflac__decode_samples_with_residual__rice__param_equals_zero(bs, bitsPerSample, count, order, shift, coefficients, pSamplesOut); - } -#endif -} + __m128i r; -// Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. -static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam) -{ - drflac_assert(bs != NULL); - drflac_assert(count > 0); + /* Pack. */ + r = _mm_packs_epi32(a, b); - drflac_uint32 zeroCountPart; - drflac_uint32 riceParamPart; + /* a3a2 a1a0 b3b2 b1b0 -> a3a2 b3b2 a1a0 b1b0 */ + r = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 1, 2, 0)); - if (riceParam != 0) { - for (drflac_uint32 i = 0; i < count; ++i) { - if (!drflac__read_rice_parts(bs, riceParam, &zeroCountPart, &riceParamPart)) { - return DRFLAC_FALSE; - } - } - } else { - for (drflac_uint32 i = 0; i < count; ++i) { - if (!drflac__read_rice_parts__param_equals_zero(bs, &zeroCountPart, &riceParamPart)) { - return DRFLAC_FALSE; - } - } - } + /* a3a2 b3b2 a1a0 b1b0 -> a3b3 a2b2 a1b1 a0b0 */ + r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(3, 1, 2, 0)); + r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(3, 1, 2, 0)); - return DRFLAC_TRUE; + return r; } +#endif -static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +#if defined(DRFLAC_SUPPORT_SSE41) +static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a) { - drflac_assert(bs != NULL); - drflac_assert(count > 0); - drflac_assert(unencodedBitsPerSample <= 31); // <-- unencodedBitsPerSample is a 5 bit number, so cannot exceed 31. - drflac_assert(pSamplesOut != NULL); - - for (unsigned int i = 0; i < count; ++i) { - if (unencodedBitsPerSample > 0) { - if (!drflac__read_int32(bs, unencodedBitsPerSample, pSamplesOut + i)) { - return DRFLAC_FALSE; - } - } else { - pSamplesOut[i] = 0; - } - - if (bitsPerSample > 16) { - pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i); - } else { - pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i); - } - } + return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); +} - return DRFLAC_TRUE; +static DRFLAC_INLINE __m128i drflac__mm_hadd_epi32(__m128i x) +{ + __m128i x64 = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2))); + __m128i x32 = _mm_shufflelo_epi16(x64, _MM_SHUFFLE(1, 0, 3, 2)); + return _mm_add_epi32(x64, x32); } +static DRFLAC_INLINE __m128i drflac__mm_hadd_epi64(__m128i x) +{ + return _mm_add_epi64(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2))); +} -// Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called -// when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The -// <blockSize> and <order> parameters are used to determine how many residual values need to be decoded. -static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) +static DRFLAC_INLINE __m128i drflac__mm_srai_epi64(__m128i x, int count) { - drflac_assert(bs != NULL); - drflac_assert(blockSize != 0); - drflac_assert(pDecodedSamples != NULL); // <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode? + /* + To simplify this we are assuming count < 32. This restriction allows us to work on a low side and a high side. The low side + is shifted with zero bits, whereas the right side is shifted with sign bits. + */ + __m128i lo = _mm_srli_epi64(x, count); + __m128i hi = _mm_srai_epi32(x, count); - drflac_uint8 residualMethod; - if (!drflac__read_uint8(bs, 2, &residualMethod)) { - return DRFLAC_FALSE; - } + hi = _mm_and_si128(hi, _mm_set_epi32(0xFFFFFFFF, 0, 0xFFFFFFFF, 0)); /* The high part needs to have the low part cleared. */ - if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { - return DRFLAC_FALSE; // Unknown or unsupported residual coding method. - } + return _mm_or_si128(lo, hi); +} - // Ignore the first <order> values. - pDecodedSamples += order; +static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + int i; + drflac_uint32 riceParamMask; + drflac_int32* pDecodedSamples = pSamplesOut; + drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); + drflac_uint32 zeroCountParts0; + drflac_uint32 zeroCountParts1; + drflac_uint32 zeroCountParts2; + drflac_uint32 zeroCountParts3; + drflac_uint32 riceParamParts0; + drflac_uint32 riceParamParts1; + drflac_uint32 riceParamParts2; + drflac_uint32 riceParamParts3; + __m128i coefficients128_0; + __m128i coefficients128_4; + __m128i coefficients128_8; + __m128i samples128_0; + __m128i samples128_4; + __m128i samples128_8; + __m128i riceParamMask128; + const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; - drflac_uint8 partitionOrder; - if (!drflac__read_uint8(bs, 4, &partitionOrder)) { - return DRFLAC_FALSE; - } + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); + riceParamMask128 = _mm_set1_epi32(riceParamMask); - // From the FLAC spec: - // The Rice partition order in a Rice-coded residual section must be less than or equal to 8. - if (partitionOrder > 8) { - return DRFLAC_FALSE; - } + /* Pre-load. */ + coefficients128_0 = _mm_setzero_si128(); + coefficients128_4 = _mm_setzero_si128(); + coefficients128_8 = _mm_setzero_si128(); - // Validation check. - if ((blockSize / (1 << partitionOrder)) <= order) { - return DRFLAC_FALSE; - } + samples128_0 = _mm_setzero_si128(); + samples128_4 = _mm_setzero_si128(); + samples128_8 = _mm_setzero_si128(); - drflac_uint32 samplesInPartition = (blockSize / (1 << partitionOrder)) - order; - drflac_uint32 partitionsRemaining = (1 << partitionOrder); - for (;;) { - drflac_uint8 riceParam = 0; - if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) { - if (!drflac__read_uint8(bs, 4, &riceParam)) { - return DRFLAC_FALSE; - } - if (riceParam == 15) { - riceParam = 0xFF; - } - } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { - if (!drflac__read_uint8(bs, 5, &riceParam)) { - return DRFLAC_FALSE; - } - if (riceParam == 31) { - riceParam = 0xFF; + /* + Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than + what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results + in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted + so I think there's opportunity for this to be simplified. + */ +#if 1 + { + int runningOrder = order; + + /* 0 - 3. */ + if (runningOrder >= 4) { + coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0)); + samples128_0 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 4)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break; + case 2: coefficients128_0 = _mm_set_epi32(0, 0, coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0, 0); break; + case 1: coefficients128_0 = _mm_set_epi32(0, 0, 0, coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0, 0, 0); break; } + runningOrder = 0; } - if (riceParam != 0xFF) { - if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) { - return DRFLAC_FALSE; - } + /* 4 - 7 */ + if (runningOrder >= 4) { + coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4)); + samples128_4 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 8)); + runningOrder -= 4; } else { - unsigned char unencodedBitsPerSample = 0; - if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) { - return DRFLAC_FALSE; + switch (runningOrder) { + case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break; + case 2: coefficients128_4 = _mm_set_epi32(0, 0, coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0, 0); break; + case 1: coefficients128_4 = _mm_set_epi32(0, 0, 0, coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0, 0, 0); break; } + runningOrder = 0; + } - if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) { - return DRFLAC_FALSE; + /* 8 - 11 */ + if (runningOrder == 4) { + coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8)); + samples128_8 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 12)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break; + case 2: coefficients128_8 = _mm_set_epi32(0, 0, coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0, 0); break; + case 1: coefficients128_8 = _mm_set_epi32(0, 0, 0, coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0, 0, 0); break; } + runningOrder = 0; } - pDecodedSamples += samplesInPartition; + /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ + coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3)); + coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3)); + coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3)); + } +#else + /* This causes strict-aliasing warnings with GCC. */ + switch (order) + { + case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12]; + case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11]; + case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10]; + case 9: ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9]; + case 8: ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8]; + case 7: ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7]; + case 6: ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6]; + case 5: ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5]; + case 4: ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4]; + case 3: ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3]; + case 2: ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2]; + case 1: ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1]; + } +#endif + /* For this version we are doing one sample at a time. */ + while (pDecodedSamples < pDecodedSamplesEnd) { + __m128i prediction128; + __m128i zeroCountPart128; + __m128i riceParamPart128; - if (partitionsRemaining == 1) { - break; + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) { + return DRFLAC_FALSE; } - partitionsRemaining -= 1; + zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0); + riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0); - if (partitionOrder != 0) { - samplesInPartition = blockSize / (1 << partitionOrder); - } - } + riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128); + riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam)); + riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01))), _mm_set1_epi32(0x01))); /* <-- SSE2 compatible */ + /*riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01)), _mm_set1_epi32(0xFFFFFFFF)));*/ /* <-- Only supported from SSE4.1 and is slower in my testing... */ - return DRFLAC_TRUE; -} + if (order <= 4) { + for (i = 0; i < 4; i += 1) { + prediction128 = _mm_mullo_epi32(coefficients128_0, samples128_0); -// Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called -// when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be set to 0. The -// <blockSize> and <order> parameters are used to determine how many residual values need to be decoded. -static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 order) -{ - drflac_assert(bs != NULL); - drflac_assert(blockSize != 0); + /* Horizontal add and shift. */ + prediction128 = drflac__mm_hadd_epi32(prediction128); + prediction128 = _mm_srai_epi32(prediction128, shift); + prediction128 = _mm_add_epi32(riceParamPart128, prediction128); - drflac_uint8 residualMethod; - if (!drflac__read_uint8(bs, 2, &residualMethod)) { - return DRFLAC_FALSE; - } + samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); + riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); + } + } else if (order <= 8) { + for (i = 0; i < 4; i += 1) { + prediction128 = _mm_mullo_epi32(coefficients128_4, samples128_4); + prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0)); + + /* Horizontal add and shift. */ + prediction128 = drflac__mm_hadd_epi32(prediction128); + prediction128 = _mm_srai_epi32(prediction128, shift); + prediction128 = _mm_add_epi32(riceParamPart128, prediction128); + + samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4); + samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); + riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); + } + } else { + for (i = 0; i < 4; i += 1) { + prediction128 = _mm_mullo_epi32(coefficients128_8, samples128_8); + prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_4, samples128_4)); + prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0)); + + /* Horizontal add and shift. */ + prediction128 = drflac__mm_hadd_epi32(prediction128); + prediction128 = _mm_srai_epi32(prediction128, shift); + prediction128 = _mm_add_epi32(riceParamPart128, prediction128); + + samples128_8 = _mm_alignr_epi8(samples128_4, samples128_8, 4); + samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4); + samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); + riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); + } + } - if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { - return DRFLAC_FALSE; // Unknown or unsupported residual coding method. + /* We store samples in groups of 4. */ + _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0); + pDecodedSamples += 4; } - drflac_uint8 partitionOrder; - if (!drflac__read_uint8(bs, 4, &partitionOrder)) { - return DRFLAC_FALSE; - } + /* Make sure we process the last few samples. */ + i = (count & ~3); + while (i < (int)count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) { + return DRFLAC_FALSE; + } - // From the FLAC spec: - // The Rice partition order in a Rice-coded residual section must be less than or equal to 8. - if (partitionOrder > 8) { - return DRFLAC_FALSE; - } + /* Rice reconstruction. */ + riceParamParts0 &= riceParamMask; + riceParamParts0 |= (zeroCountParts0 << riceParam); + riceParamParts0 = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01]; - // Validation check. - if ((blockSize / (1 << partitionOrder)) <= order) { - return DRFLAC_FALSE; + /* Sample reconstruction. */ + pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples); + + i += 1; + pDecodedSamples += 1; } - drflac_uint32 samplesInPartition = (blockSize / (1 << partitionOrder)) - order; - drflac_uint32 partitionsRemaining = (1 << partitionOrder); - for (;;) + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + int i; + drflac_uint32 riceParamMask; + drflac_int32* pDecodedSamples = pSamplesOut; + drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); + drflac_uint32 zeroCountParts0; + drflac_uint32 zeroCountParts1; + drflac_uint32 zeroCountParts2; + drflac_uint32 zeroCountParts3; + drflac_uint32 riceParamParts0; + drflac_uint32 riceParamParts1; + drflac_uint32 riceParamParts2; + drflac_uint32 riceParamParts3; + __m128i coefficients128_0; + __m128i coefficients128_4; + __m128i coefficients128_8; + __m128i samples128_0; + __m128i samples128_4; + __m128i samples128_8; + __m128i prediction128; + __m128i riceParamMask128; + + const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + + DRFLAC_ASSERT(order <= 12); + + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); + riceParamMask128 = _mm_set1_epi32(riceParamMask); + + prediction128 = _mm_setzero_si128(); + + /* Pre-load. */ + coefficients128_0 = _mm_setzero_si128(); + coefficients128_4 = _mm_setzero_si128(); + coefficients128_8 = _mm_setzero_si128(); + + samples128_0 = _mm_setzero_si128(); + samples128_4 = _mm_setzero_si128(); + samples128_8 = _mm_setzero_si128(); + +#if 1 { - drflac_uint8 riceParam = 0; - if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) { - if (!drflac__read_uint8(bs, 4, &riceParam)) { - return DRFLAC_FALSE; - } - if (riceParam == 15) { - riceParam = 0xFF; - } - } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { - if (!drflac__read_uint8(bs, 5, &riceParam)) { - return DRFLAC_FALSE; - } - if (riceParam == 31) { - riceParam = 0xFF; + int runningOrder = order; + + /* 0 - 3. */ + if (runningOrder >= 4) { + coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0)); + samples128_0 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 4)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break; + case 2: coefficients128_0 = _mm_set_epi32(0, 0, coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0, 0); break; + case 1: coefficients128_0 = _mm_set_epi32(0, 0, 0, coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0, 0, 0); break; } + runningOrder = 0; } - if (riceParam != 0xFF) { - if (!drflac__read_and_seek_residual__rice(bs, samplesInPartition, riceParam)) { - return DRFLAC_FALSE; - } + /* 4 - 7 */ + if (runningOrder >= 4) { + coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4)); + samples128_4 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 8)); + runningOrder -= 4; } else { - unsigned char unencodedBitsPerSample = 0; - if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) { - return DRFLAC_FALSE; + switch (runningOrder) { + case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break; + case 2: coefficients128_4 = _mm_set_epi32(0, 0, coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0, 0); break; + case 1: coefficients128_4 = _mm_set_epi32(0, 0, 0, coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0, 0, 0); break; } + runningOrder = 0; + } - if (!drflac__seek_bits(bs, unencodedBitsPerSample * samplesInPartition)) { - return DRFLAC_FALSE; + /* 8 - 11 */ + if (runningOrder == 4) { + coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8)); + samples128_8 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 12)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break; + case 2: coefficients128_8 = _mm_set_epi32(0, 0, coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0, 0); break; + case 1: coefficients128_8 = _mm_set_epi32(0, 0, 0, coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0, 0, 0); break; } + runningOrder = 0; } + /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ + coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3)); + coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3)); + coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3)); + } +#else + switch (order) + { + case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12]; + case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11]; + case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10]; + case 9: ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9]; + case 8: ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8]; + case 7: ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7]; + case 6: ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6]; + case 5: ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5]; + case 4: ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4]; + case 3: ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3]; + case 2: ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2]; + case 1: ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1]; + } +#endif - if (partitionsRemaining == 1) { - break; + /* For this version we are doing one sample at a time. */ + while (pDecodedSamples < pDecodedSamplesEnd) { + __m128i zeroCountPart128; + __m128i riceParamPart128; + + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) { + return DRFLAC_FALSE; } - partitionsRemaining -= 1; - samplesInPartition = blockSize / (1 << partitionOrder); - } + zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0); + riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0); - return DRFLAC_TRUE; -} + riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128); + riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam)); + riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(1))), _mm_set1_epi32(1))); + for (i = 0; i < 4; i += 1) { + prediction128 = _mm_xor_si128(prediction128, prediction128); /* Reset to 0. */ -static drflac_bool32 drflac__decode_samples__constant(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_int32* pDecodedSamples) -{ - // Only a single sample needs to be decoded here. - drflac_int32 sample; - if (!drflac__read_int32(bs, bitsPerSample, &sample)) { - return DRFLAC_FALSE; - } + switch (order) + { + case 12: + case 11: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(1, 1, 0, 0)))); + case 10: + case 9: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(3, 3, 2, 2)))); + case 8: + case 7: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(1, 1, 0, 0)))); + case 6: + case 5: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(3, 3, 2, 2)))); + case 4: + case 3: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(1, 1, 0, 0)))); + case 2: + case 1: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(3, 3, 2, 2)))); + } - // We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely) - // we'll want to look at a more efficient way. - for (drflac_uint32 i = 0; i < blockSize; ++i) { - pDecodedSamples[i] = sample; - } + /* Horizontal add and shift. */ + prediction128 = drflac__mm_hadd_epi64(prediction128); + prediction128 = drflac__mm_srai_epi64(prediction128, shift); + prediction128 = _mm_add_epi32(riceParamPart128, prediction128); - return DRFLAC_TRUE; -} + /* Our value should be sitting in prediction128[0]. We need to combine this with our SSE samples. */ + samples128_8 = _mm_alignr_epi8(samples128_4, samples128_8, 4); + samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4); + samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); -static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_int32* pDecodedSamples) -{ - for (drflac_uint32 i = 0; i < blockSize; ++i) { - drflac_int32 sample; - if (!drflac__read_int32(bs, bitsPerSample, &sample)) { - return DRFLAC_FALSE; + /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */ + riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); } - pDecodedSamples[i] = sample; + /* We store samples in groups of 4. */ + _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0); + pDecodedSamples += 4; } - return DRFLAC_TRUE; -} - -static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples) -{ - drflac_int32 lpcCoefficientsTable[5][4] = { - {0, 0, 0, 0}, - {1, 0, 0, 0}, - {2, -1, 0, 0}, - {3, -3, 1, 0}, - {4, -6, 4, -1} - }; - - // Warm up samples and coefficients. - for (drflac_uint32 i = 0; i < lpcOrder; ++i) { - drflac_int32 sample; - if (!drflac__read_int32(bs, bitsPerSample, &sample)) { + /* Make sure we process the last few samples. */ + i = (count & ~3); + while (i < (int)count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) { return DRFLAC_FALSE; } - pDecodedSamples[i] = sample; - } + /* Rice reconstruction. */ + riceParamParts0 &= riceParamMask; + riceParamParts0 |= (zeroCountParts0 << riceParam); + riceParamParts0 = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01]; + /* Sample reconstruction. */ + pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples); - if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) { - return DRFLAC_FALSE; + i += 1; + pDecodedSamples += 1; } return DRFLAC_TRUE; } -static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples) +static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { - drflac_uint8 i; + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(count > 0); + DRFLAC_ASSERT(pSamplesOut != NULL); - // Warm up samples. - for (i = 0; i < lpcOrder; ++i) { - drflac_int32 sample; - if (!drflac__read_int32(bs, bitsPerSample, &sample)) { - return DRFLAC_FALSE; + /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */ + if (order > 0 && order <= 12) { + if (bitsPerSample+shift > 32) { + return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut); + } else { + return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut); } - - pDecodedSamples[i] = sample; + } else { + return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); } +} +#endif - drflac_uint8 lpcPrecision; - if (!drflac__read_uint8(bs, 4, &lpcPrecision)) { - return DRFLAC_FALSE; - } - if (lpcPrecision == 15) { - return DRFLAC_FALSE; // Invalid. - } - lpcPrecision += 1; +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac__vst2q_s32(drflac_int32* p, int32x4x2_t x) +{ + vst1q_s32(p+0, x.val[0]); + vst1q_s32(p+4, x.val[1]); +} +static DRFLAC_INLINE void drflac__vst2q_f32(float* p, float32x4x2_t x) +{ + vst1q_f32(p+0, x.val[0]); + vst1q_f32(p+4, x.val[1]); +} - drflac_int8 lpcShift; - if (!drflac__read_int8(bs, 5, &lpcShift)) { - return DRFLAC_FALSE; - } +static DRFLAC_INLINE void drflac__vst2q_s16(drflac_int16* p, int16x4x2_t x) +{ + vst1q_s16(p, vcombine_s16(x.val[0], x.val[1])); +} +static DRFLAC_INLINE int32x4_t drflac__vdupq_n_s32x4(drflac_int32 x3, drflac_int32 x2, drflac_int32 x1, drflac_int32 x0) +{ + drflac_int32 x[4]; + x[3] = x3; + x[2] = x2; + x[1] = x1; + x[0] = x0; + return vld1q_s32(x); +} - drflac_int32 coefficients[32]; - for (i = 0; i < lpcOrder; ++i) { - if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) { - return DRFLAC_FALSE; - } - } +static DRFLAC_INLINE int32x4_t drflac__valignrq_s32_1(int32x4_t a, int32x4_t b) +{ + /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */ - if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) { - return DRFLAC_FALSE; - } + /* Reference */ + /*return drflac__vdupq_n_s32x4( + vgetq_lane_s32(a, 0), + vgetq_lane_s32(b, 3), + vgetq_lane_s32(b, 2), + vgetq_lane_s32(b, 1) + );*/ - return DRFLAC_TRUE; + return vextq_s32(b, a, 1); } +static DRFLAC_INLINE uint32x4_t drflac__valignrq_u32_1(uint32x4_t a, uint32x4_t b) +{ + /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */ + + /* Reference */ + /*return drflac__vdupq_n_s32x4( + vgetq_lane_s32(a, 0), + vgetq_lane_s32(b, 3), + vgetq_lane_s32(b, 2), + vgetq_lane_s32(b, 1) + );*/ -static drflac_bool32 drflac__read_next_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header) + return vextq_u32(b, a, 1); +} + +static DRFLAC_INLINE int32x2_t drflac__vhaddq_s32(int32x4_t x) { - drflac_assert(bs != NULL); - drflac_assert(header != NULL); + /* The sum must end up in position 0. */ - const drflac_uint32 sampleRateTable[12] = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000}; - const drflac_uint8 bitsPerSampleTable[8] = {0, 8, 12, (drflac_uint8)-1, 16, 20, 24, (drflac_uint8)-1}; // -1 = reserved. + /* Reference */ + /*return vdupq_n_s32( + vgetq_lane_s32(x, 3) + + vgetq_lane_s32(x, 2) + + vgetq_lane_s32(x, 1) + + vgetq_lane_s32(x, 0) + );*/ - // Keep looping until we find a valid sync code. - for (;;) { - if (!drflac__find_and_seek_to_next_sync_code(bs)) { - return DRFLAC_FALSE; - } + int32x2_t r = vadd_s32(vget_high_s32(x), vget_low_s32(x)); + return vpadd_s32(r, r); +} - drflac_uint8 crc8 = 0xCE; // 0xCE = drflac_crc8(0, 0x3FFE, 14); +static DRFLAC_INLINE int64x1_t drflac__vhaddq_s64(int64x2_t x) +{ + return vadd_s64(vget_high_s64(x), vget_low_s64(x)); +} - drflac_uint8 reserved = 0; - if (!drflac__read_uint8(bs, 1, &reserved)) { - return DRFLAC_FALSE; - } - if (reserved == 1) { - continue; - } - crc8 = drflac_crc8(crc8, reserved, 1); +static DRFLAC_INLINE int32x4_t drflac__vrevq_s32(int32x4_t x) +{ + /* Reference */ + /*return drflac__vdupq_n_s32x4( + vgetq_lane_s32(x, 0), + vgetq_lane_s32(x, 1), + vgetq_lane_s32(x, 2), + vgetq_lane_s32(x, 3) + );*/ + return vrev64q_s32(vcombine_s32(vget_high_s32(x), vget_low_s32(x))); +} - drflac_uint8 blockingStrategy = 0; - if (!drflac__read_uint8(bs, 1, &blockingStrategy)) { - return DRFLAC_FALSE; - } - crc8 = drflac_crc8(crc8, blockingStrategy, 1); +static DRFLAC_INLINE int32x4_t drflac__vnotq_s32(int32x4_t x) +{ + return veorq_s32(x, vdupq_n_s32(0xFFFFFFFF)); +} +static DRFLAC_INLINE uint32x4_t drflac__vnotq_u32(uint32x4_t x) +{ + return veorq_u32(x, vdupq_n_u32(0xFFFFFFFF)); +} - drflac_uint8 blockSize = 0; - if (!drflac__read_uint8(bs, 4, &blockSize)) { - return DRFLAC_FALSE; - } - if (blockSize == 0) { - continue; - } - crc8 = drflac_crc8(crc8, blockSize, 4); +static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + int i; + drflac_uint32 riceParamMask; + drflac_int32* pDecodedSamples = pSamplesOut; + drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); + drflac_uint32 zeroCountParts[4]; + drflac_uint32 riceParamParts[4]; + int32x4_t coefficients128_0; + int32x4_t coefficients128_4; + int32x4_t coefficients128_8; + int32x4_t samples128_0; + int32x4_t samples128_4; + int32x4_t samples128_8; + uint32x4_t riceParamMask128; + int32x4_t riceParam128; + int32x2_t shift64; + uint32x4_t one128; + const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; - drflac_uint8 sampleRate = 0; - if (!drflac__read_uint8(bs, 4, &sampleRate)) { - return DRFLAC_FALSE; - } - crc8 = drflac_crc8(crc8, sampleRate, 4); + riceParamMask = ~((~0UL) << riceParam); + riceParamMask128 = vdupq_n_u32(riceParamMask); + riceParam128 = vdupq_n_s32(riceParam); + shift64 = vdup_n_s32(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */ + one128 = vdupq_n_u32(1); - drflac_uint8 channelAssignment = 0; - if (!drflac__read_uint8(bs, 4, &channelAssignment)) { - return DRFLAC_FALSE; - } - if (channelAssignment > 10) { - continue; + /* + Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than + what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results + in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted + so I think there's opportunity for this to be simplified. + */ + { + int runningOrder = order; + drflac_int32 tempC[4] = {0, 0, 0, 0}; + drflac_int32 tempS[4] = {0, 0, 0, 0}; + + /* 0 - 3. */ + if (runningOrder >= 4) { + coefficients128_0 = vld1q_s32(coefficients + 0); + samples128_0 = vld1q_s32(pSamplesOut - 4); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */ + case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */ + case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */ + } + + coefficients128_0 = vld1q_s32(tempC); + samples128_0 = vld1q_s32(tempS); + runningOrder = 0; } - crc8 = drflac_crc8(crc8, channelAssignment, 4); + /* 4 - 7 */ + if (runningOrder >= 4) { + coefficients128_4 = vld1q_s32(coefficients + 4); + samples128_4 = vld1q_s32(pSamplesOut - 8); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */ + case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */ + case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */ + } - drflac_uint8 bitsPerSample = 0; - if (!drflac__read_uint8(bs, 3, &bitsPerSample)) { - return DRFLAC_FALSE; + coefficients128_4 = vld1q_s32(tempC); + samples128_4 = vld1q_s32(tempS); + runningOrder = 0; } - if (bitsPerSample == 3 || bitsPerSample == 7) { - continue; + + /* 8 - 11 */ + if (runningOrder == 4) { + coefficients128_8 = vld1q_s32(coefficients + 8); + samples128_8 = vld1q_s32(pSamplesOut - 12); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */ + case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */ + case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */ + } + + coefficients128_8 = vld1q_s32(tempC); + samples128_8 = vld1q_s32(tempS); + runningOrder = 0; } - crc8 = drflac_crc8(crc8, bitsPerSample, 3); + /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ + coefficients128_0 = drflac__vrevq_s32(coefficients128_0); + coefficients128_4 = drflac__vrevq_s32(coefficients128_4); + coefficients128_8 = drflac__vrevq_s32(coefficients128_8); + } - if (!drflac__read_uint8(bs, 1, &reserved)) { + /* For this version we are doing one sample at a time. */ + while (pDecodedSamples < pDecodedSamplesEnd) { + int32x4_t prediction128; + int32x2_t prediction64; + uint32x4_t zeroCountPart128; + uint32x4_t riceParamPart128; + + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) { return DRFLAC_FALSE; } - if (reserved == 1) { - continue; - } - crc8 = drflac_crc8(crc8, reserved, 1); + zeroCountPart128 = vld1q_u32(zeroCountParts); + riceParamPart128 = vld1q_u32(riceParamParts); - drflac_bool32 isVariableBlockSize = blockingStrategy == 1; - if (isVariableBlockSize) { - drflac_uint64 sampleNumber; - drflac_result result = drflac__read_utf8_coded_number(bs, &sampleNumber, &crc8); - if (result != DRFLAC_SUCCESS) { - if (result == DRFLAC_END_OF_STREAM) { - return DRFLAC_FALSE; - } else { - continue; - } + riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128); + riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128)); + riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128)); + + if (order <= 4) { + for (i = 0; i < 4; i += 1) { + prediction128 = vmulq_s32(coefficients128_0, samples128_0); + + /* Horizontal add and shift. */ + prediction64 = drflac__vhaddq_s32(prediction128); + prediction64 = vshl_s32(prediction64, shift64); + prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128))); + + samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0); + riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); + } + } else if (order <= 8) { + for (i = 0; i < 4; i += 1) { + prediction128 = vmulq_s32(coefficients128_4, samples128_4); + prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0); + + /* Horizontal add and shift. */ + prediction64 = drflac__vhaddq_s32(prediction128); + prediction64 = vshl_s32(prediction64, shift64); + prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128))); + + samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4); + samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0); + riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); } - header->frameNumber = 0; - header->sampleNumber = sampleNumber; } else { - drflac_uint64 frameNumber = 0; - drflac_result result = drflac__read_utf8_coded_number(bs, &frameNumber, &crc8); - if (result != DRFLAC_SUCCESS) { - if (result == DRFLAC_END_OF_STREAM) { - return DRFLAC_FALSE; - } else { - continue; - } + for (i = 0; i < 4; i += 1) { + prediction128 = vmulq_s32(coefficients128_8, samples128_8); + prediction128 = vmlaq_s32(prediction128, coefficients128_4, samples128_4); + prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0); + + /* Horizontal add and shift. */ + prediction64 = drflac__vhaddq_s32(prediction128); + prediction64 = vshl_s32(prediction64, shift64); + prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128))); + + samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8); + samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4); + samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0); + riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); } - header->frameNumber = (drflac_uint32)frameNumber; // <-- Safe cast. - header->sampleNumber = 0; } + /* We store samples in groups of 4. */ + vst1q_s32(pDecodedSamples, samples128_0); + pDecodedSamples += 4; + } - if (blockSize == 1) { - header->blockSize = 192; - } else if (blockSize >= 2 && blockSize <= 5) { - header->blockSize = 576 * (1 << (blockSize - 2)); - } else if (blockSize == 6) { - if (!drflac__read_uint16(bs, 8, &header->blockSize)) { - return DRFLAC_FALSE; - } - crc8 = drflac_crc8(crc8, header->blockSize, 8); - header->blockSize += 1; - } else if (blockSize == 7) { - if (!drflac__read_uint16(bs, 16, &header->blockSize)) { - return DRFLAC_FALSE; - } - crc8 = drflac_crc8(crc8, header->blockSize, 16); - header->blockSize += 1; - } else { - header->blockSize = 256 * (1 << (blockSize - 8)); + /* Make sure we process the last few samples. */ + i = (count & ~3); + while (i < (int)count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) { + return DRFLAC_FALSE; } + /* Rice reconstruction. */ + riceParamParts[0] &= riceParamMask; + riceParamParts[0] |= (zeroCountParts[0] << riceParam); + riceParamParts[0] = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01]; - if (sampleRate <= 11) { - header->sampleRate = sampleRateTable[sampleRate]; - } else if (sampleRate == 12) { - if (!drflac__read_uint32(bs, 8, &header->sampleRate)) { - return DRFLAC_FALSE; - } - crc8 = drflac_crc8(crc8, header->sampleRate, 8); - header->sampleRate *= 1000; - } else if (sampleRate == 13) { - if (!drflac__read_uint32(bs, 16, &header->sampleRate)) { - return DRFLAC_FALSE; - } - crc8 = drflac_crc8(crc8, header->sampleRate, 16); - } else if (sampleRate == 14) { - if (!drflac__read_uint32(bs, 16, &header->sampleRate)) { - return DRFLAC_FALSE; - } - crc8 = drflac_crc8(crc8, header->sampleRate, 16); - header->sampleRate *= 10; + /* Sample reconstruction. */ + pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples); + + i += 1; + pDecodedSamples += 1; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + int i; + drflac_uint32 riceParamMask; + drflac_int32* pDecodedSamples = pSamplesOut; + drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); + drflac_uint32 zeroCountParts[4]; + drflac_uint32 riceParamParts[4]; + int32x4_t coefficients128_0; + int32x4_t coefficients128_4; + int32x4_t coefficients128_8; + int32x4_t samples128_0; + int32x4_t samples128_4; + int32x4_t samples128_8; + uint32x4_t riceParamMask128; + int32x4_t riceParam128; + int64x1_t shift64; + uint32x4_t one128; + + const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + + riceParamMask = ~((~0UL) << riceParam); + riceParamMask128 = vdupq_n_u32(riceParamMask); + + riceParam128 = vdupq_n_s32(riceParam); + shift64 = vdup_n_s64(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */ + one128 = vdupq_n_u32(1); + + /* + Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than + what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results + in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted + so I think there's opportunity for this to be simplified. + */ + { + int runningOrder = order; + drflac_int32 tempC[4] = {0, 0, 0, 0}; + drflac_int32 tempS[4] = {0, 0, 0, 0}; + + /* 0 - 3. */ + if (runningOrder >= 4) { + coefficients128_0 = vld1q_s32(coefficients + 0); + samples128_0 = vld1q_s32(pSamplesOut - 4); + runningOrder -= 4; } else { - continue; // Invalid. Assume an invalid block. + switch (runningOrder) { + case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */ + case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */ + case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */ + } + + coefficients128_0 = vld1q_s32(tempC); + samples128_0 = vld1q_s32(tempS); + runningOrder = 0; } + /* 4 - 7 */ + if (runningOrder >= 4) { + coefficients128_4 = vld1q_s32(coefficients + 4); + samples128_4 = vld1q_s32(pSamplesOut - 8); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */ + case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */ + case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */ + } + + coefficients128_4 = vld1q_s32(tempC); + samples128_4 = vld1q_s32(tempS); + runningOrder = 0; + } - header->channelAssignment = channelAssignment; + /* 8 - 11 */ + if (runningOrder == 4) { + coefficients128_8 = vld1q_s32(coefficients + 8); + samples128_8 = vld1q_s32(pSamplesOut - 12); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */ + case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */ + case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */ + } - header->bitsPerSample = bitsPerSampleTable[bitsPerSample]; - if (header->bitsPerSample == 0) { - header->bitsPerSample = streaminfoBitsPerSample; + coefficients128_8 = vld1q_s32(tempC); + samples128_8 = vld1q_s32(tempS); + runningOrder = 0; } - if (!drflac__read_uint8(bs, 8, &header->crc8)) { + /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ + coefficients128_0 = drflac__vrevq_s32(coefficients128_0); + coefficients128_4 = drflac__vrevq_s32(coefficients128_4); + coefficients128_8 = drflac__vrevq_s32(coefficients128_8); + } + + /* For this version we are doing one sample at a time. */ + while (pDecodedSamples < pDecodedSamplesEnd) { + int64x2_t prediction128; + uint32x4_t zeroCountPart128; + uint32x4_t riceParamPart128; + + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) { return DRFLAC_FALSE; } -#ifndef DR_FLAC_NO_CRC - if (header->crc8 != crc8) { - continue; // CRC mismatch. Loop back to the top and find the next sync code. - } -#endif - return DRFLAC_TRUE; - } -} + zeroCountPart128 = vld1q_u32(zeroCountParts); + riceParamPart128 = vld1q_u32(riceParamParts); -static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pSubframe) -{ - drflac_uint8 header; - if (!drflac__read_uint8(bs, 8, &header)) { - return DRFLAC_FALSE; - } + riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128); + riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128)); + riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128)); - // First bit should always be 0. - if ((header & 0x80) != 0) { - return DRFLAC_FALSE; - } + for (i = 0; i < 4; i += 1) { + int64x1_t prediction64; - int type = (header & 0x7E) >> 1; - if (type == 0) { - pSubframe->subframeType = DRFLAC_SUBFRAME_CONSTANT; - } else if (type == 1) { - pSubframe->subframeType = DRFLAC_SUBFRAME_VERBATIM; - } else { - if ((type & 0x20) != 0) { - pSubframe->subframeType = DRFLAC_SUBFRAME_LPC; - pSubframe->lpcOrder = (type & 0x1F) + 1; - } else if ((type & 0x08) != 0) { - pSubframe->subframeType = DRFLAC_SUBFRAME_FIXED; - pSubframe->lpcOrder = (type & 0x07); - if (pSubframe->lpcOrder > 4) { - pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED; - pSubframe->lpcOrder = 0; + prediction128 = veorq_s64(prediction128, prediction128); /* Reset to 0. */ + switch (order) + { + case 12: + case 11: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_8), vget_low_s32(samples128_8))); + case 10: + case 9: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_8), vget_high_s32(samples128_8))); + case 8: + case 7: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_4), vget_low_s32(samples128_4))); + case 6: + case 5: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_4), vget_high_s32(samples128_4))); + case 4: + case 3: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_0), vget_low_s32(samples128_0))); + case 2: + case 1: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_0), vget_high_s32(samples128_0))); } - } else { - pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED; + + /* Horizontal add and shift. */ + prediction64 = drflac__vhaddq_s64(prediction128); + prediction64 = vshl_s64(prediction64, shift64); + prediction64 = vadd_s64(prediction64, vdup_n_s64(vgetq_lane_u32(riceParamPart128, 0))); + + /* Our value should be sitting in prediction64[0]. We need to combine this with our SSE samples. */ + samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8); + samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4); + samples128_0 = drflac__valignrq_s32_1(vcombine_s32(vreinterpret_s32_s64(prediction64), vdup_n_s32(0)), samples128_0); + + /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */ + riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); } - } - if (pSubframe->subframeType == DRFLAC_SUBFRAME_RESERVED) { - return DRFLAC_FALSE; + /* We store samples in groups of 4. */ + vst1q_s32(pDecodedSamples, samples128_0); + pDecodedSamples += 4; } - // Wasted bits per sample. - pSubframe->wastedBitsPerSample = 0; - if ((header & 0x01) == 1) { - unsigned int wastedBitsPerSample; - if (!drflac__seek_past_next_set_bit(bs, &wastedBitsPerSample)) { + /* Make sure we process the last few samples. */ + i = (count & ~3); + while (i < (int)count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) { return DRFLAC_FALSE; } - pSubframe->wastedBitsPerSample = (unsigned char)wastedBitsPerSample + 1; + + /* Rice reconstruction. */ + riceParamParts[0] &= riceParamMask; + riceParamParts[0] |= (zeroCountParts[0] << riceParam); + riceParamParts[0] = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01]; + + /* Sample reconstruction. */ + pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples); + + i += 1; + pDecodedSamples += 1; } return DRFLAC_TRUE; } -static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex, drflac_int32* pDecodedSamplesOut) +static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { - drflac_assert(bs != NULL); - drflac_assert(frame != NULL); - - drflac_subframe* pSubframe = frame->subframes + subframeIndex; - if (!drflac__read_subframe_header(bs, pSubframe)) { - return DRFLAC_FALSE; - } - - // Side channels require an extra bit per sample. Took a while to figure that one out... - pSubframe->bitsPerSample = frame->header.bitsPerSample; - if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) { - pSubframe->bitsPerSample += 1; - } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) { - pSubframe->bitsPerSample += 1; - } + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(count > 0); + DRFLAC_ASSERT(pSamplesOut != NULL); - // Need to handle wasted bits per sample. - if (pSubframe->wastedBitsPerSample >= pSubframe->bitsPerSample) { - return DRFLAC_FALSE; + /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */ + if (order > 0 && order <= 12) { + if (bitsPerSample+shift > 32) { + return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut); + } else { + return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut); + } + } else { + return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); } - pSubframe->bitsPerSample -= pSubframe->wastedBitsPerSample; - pSubframe->pDecodedSamples = pDecodedSamplesOut; +} +#endif - switch (pSubframe->subframeType) +static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ +#if defined(DRFLAC_SUPPORT_SSE41) + if (drflac__gIsSSE41Supported) { + return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported) { + return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + } else +#endif { - case DRFLAC_SUBFRAME_CONSTANT: - { - drflac__decode_samples__constant(bs, frame->header.blockSize, pSubframe->bitsPerSample, pSubframe->pDecodedSamples); - } break; - - case DRFLAC_SUBFRAME_VERBATIM: - { - drflac__decode_samples__verbatim(bs, frame->header.blockSize, pSubframe->bitsPerSample, pSubframe->pDecodedSamples); - } break; + /* Scalar fallback. */ + #if 0 + return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + #else + return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + #endif + } +} - case DRFLAC_SUBFRAME_FIXED: - { - drflac__decode_samples__fixed(bs, frame->header.blockSize, pSubframe->bitsPerSample, pSubframe->lpcOrder, pSubframe->pDecodedSamples); - } break; +/* Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. */ +static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam) +{ + drflac_uint32 i; - case DRFLAC_SUBFRAME_LPC: - { - drflac__decode_samples__lpc(bs, frame->header.blockSize, pSubframe->bitsPerSample, pSubframe->lpcOrder, pSubframe->pDecodedSamples); - } break; + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(count > 0); - default: return DRFLAC_FALSE; + for (i = 0; i < count; ++i) { + if (!drflac__seek_rice_parts(bs, riceParam)) { + return DRFLAC_FALSE; + } } return DRFLAC_TRUE; } -static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex) +static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { - drflac_assert(bs != NULL); - drflac_assert(frame != NULL); + drflac_uint32 i; - drflac_subframe* pSubframe = frame->subframes + subframeIndex; - if (!drflac__read_subframe_header(bs, pSubframe)) { + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(count > 0); + DRFLAC_ASSERT(unencodedBitsPerSample <= 31); /* <-- unencodedBitsPerSample is a 5 bit number, so cannot exceed 31. */ + DRFLAC_ASSERT(pSamplesOut != NULL); + + for (i = 0; i < count; ++i) { + if (unencodedBitsPerSample > 0) { + if (!drflac__read_int32(bs, unencodedBitsPerSample, pSamplesOut + i)) { + return DRFLAC_FALSE; + } + } else { + pSamplesOut[i] = 0; + } + + if (bitsPerSample >= 24) { + pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i); + } else { + pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i); + } + } + + return DRFLAC_TRUE; +} + + +/* +Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called +when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The +<blockSize> and <order> parameters are used to determine how many residual values need to be decoded. +*/ +static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) +{ + drflac_uint8 residualMethod; + drflac_uint8 partitionOrder; + drflac_uint32 samplesInPartition; + drflac_uint32 partitionsRemaining; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(blockSize != 0); + DRFLAC_ASSERT(pDecodedSamples != NULL); /* <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode? */ + + if (!drflac__read_uint8(bs, 2, &residualMethod)) { return DRFLAC_FALSE; } - // Side channels require an extra bit per sample. Took a while to figure that one out... - pSubframe->bitsPerSample = frame->header.bitsPerSample; - if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) { - pSubframe->bitsPerSample += 1; - } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) { - pSubframe->bitsPerSample += 1; + if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + return DRFLAC_FALSE; /* Unknown or unsupported residual coding method. */ } - // Need to handle wasted bits per sample. - if (pSubframe->wastedBitsPerSample >= pSubframe->bitsPerSample) { + /* Ignore the first <order> values. */ + pDecodedSamples += order; + + if (!drflac__read_uint8(bs, 4, &partitionOrder)) { return DRFLAC_FALSE; } - pSubframe->bitsPerSample -= pSubframe->wastedBitsPerSample; - pSubframe->pDecodedSamples = NULL; - switch (pSubframe->subframeType) - { - case DRFLAC_SUBFRAME_CONSTANT: - { - if (!drflac__seek_bits(bs, pSubframe->bitsPerSample)) { - return DRFLAC_FALSE; - } - } break; + /* + From the FLAC spec: + The Rice partition order in a Rice-coded residual section must be less than or equal to 8. + */ + if (partitionOrder > 8) { + return DRFLAC_FALSE; + } - case DRFLAC_SUBFRAME_VERBATIM: - { - unsigned int bitsToSeek = frame->header.blockSize * pSubframe->bitsPerSample; - if (!drflac__seek_bits(bs, bitsToSeek)) { + /* Validation check. */ + if ((blockSize / (1 << partitionOrder)) <= order) { + return DRFLAC_FALSE; + } + + samplesInPartition = (blockSize / (1 << partitionOrder)) - order; + partitionsRemaining = (1 << partitionOrder); + for (;;) { + drflac_uint8 riceParam = 0; + if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) { + if (!drflac__read_uint8(bs, 4, &riceParam)) { return DRFLAC_FALSE; } - } break; - - case DRFLAC_SUBFRAME_FIXED: - { - unsigned int bitsToSeek = pSubframe->lpcOrder * pSubframe->bitsPerSample; - if (!drflac__seek_bits(bs, bitsToSeek)) { + if (riceParam == 15) { + riceParam = 0xFF; + } + } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + if (!drflac__read_uint8(bs, 5, &riceParam)) { return DRFLAC_FALSE; } + if (riceParam == 31) { + riceParam = 0xFF; + } + } - if (!drflac__read_and_seek_residual(bs, frame->header.blockSize, pSubframe->lpcOrder)) { + if (riceParam != 0xFF) { + if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) { return DRFLAC_FALSE; } - } break; - - case DRFLAC_SUBFRAME_LPC: - { - unsigned int bitsToSeek = pSubframe->lpcOrder * pSubframe->bitsPerSample; - if (!drflac__seek_bits(bs, bitsToSeek)) { + } else { + unsigned char unencodedBitsPerSample = 0; + if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) { return DRFLAC_FALSE; } - unsigned char lpcPrecision; - if (!drflac__read_uint8(bs, 4, &lpcPrecision)) { + if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) { return DRFLAC_FALSE; } - if (lpcPrecision == 15) { - return DRFLAC_FALSE; // Invalid. - } - lpcPrecision += 1; + } + pDecodedSamples += samplesInPartition; - bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5; // +5 for shift. - if (!drflac__seek_bits(bs, bitsToSeek)) { - return DRFLAC_FALSE; - } + if (partitionsRemaining == 1) { + break; + } - if (!drflac__read_and_seek_residual(bs, frame->header.blockSize, pSubframe->lpcOrder)) { - return DRFLAC_FALSE; - } - } break; + partitionsRemaining -= 1; - default: return DRFLAC_FALSE; + if (partitionOrder != 0) { + samplesInPartition = blockSize / (1 << partitionOrder); + } } return DRFLAC_TRUE; } - -static DRFLAC_INLINE drflac_uint8 drflac__get_channel_count_from_channel_assignment(drflac_int8 channelAssignment) +/* +Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called +when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be set to 0. The +<blockSize> and <order> parameters are used to determine how many residual values need to be decoded. +*/ +static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 order) { - drflac_assert(channelAssignment <= 10); - - drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2}; - return lookup[channelAssignment]; -} + drflac_uint8 residualMethod; + drflac_uint8 partitionOrder; + drflac_uint32 samplesInPartition; + drflac_uint32 partitionsRemaining; -static drflac_result drflac__decode_frame(drflac* pFlac) -{ - // This function should be called while the stream is sitting on the first byte after the frame header. - drflac_zero_memory(pFlac->currentFrame.subframes, sizeof(pFlac->currentFrame.subframes)); + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(blockSize != 0); - // The frame block size must never be larger than the maximum block size defined by the FLAC stream. - if (pFlac->currentFrame.header.blockSize > pFlac->maxBlockSize) { - return DRFLAC_ERROR; + if (!drflac__read_uint8(bs, 2, &residualMethod)) { + return DRFLAC_FALSE; } - // The number of channels in the frame must match the channel count from the STREAMINFO block. - int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment); - if (channelCount != (int)pFlac->channels) { - return DRFLAC_ERROR; + if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + return DRFLAC_FALSE; /* Unknown or unsupported residual coding method. */ } - for (int i = 0; i < channelCount; ++i) { - if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFrame, i, pFlac->pDecodedSamples + (pFlac->currentFrame.header.blockSize * i))) { - return DRFLAC_ERROR; - } + if (!drflac__read_uint8(bs, 4, &partitionOrder)) { + return DRFLAC_FALSE; } - drflac_uint8 paddingSizeInBits = DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7; - if (paddingSizeInBits > 0) { - drflac_uint8 padding = 0; - if (!drflac__read_uint8(&pFlac->bs, paddingSizeInBits, &padding)) { - return DRFLAC_END_OF_STREAM; - } + /* + From the FLAC spec: + The Rice partition order in a Rice-coded residual section must be less than or equal to 8. + */ + if (partitionOrder > 8) { + return DRFLAC_FALSE; } -#ifndef DR_FLAC_NO_CRC - drflac_uint16 actualCRC16 = drflac__flush_crc16(&pFlac->bs); -#endif - drflac_uint16 desiredCRC16; - if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) { - return DRFLAC_END_OF_STREAM; + /* Validation check. */ + if ((blockSize / (1 << partitionOrder)) <= order) { + return DRFLAC_FALSE; } -#ifndef DR_FLAC_NO_CRC - if (actualCRC16 != desiredCRC16) { - return DRFLAC_CRC_MISMATCH; // CRC mismatch. - } -#endif + samplesInPartition = (blockSize / (1 << partitionOrder)) - order; + partitionsRemaining = (1 << partitionOrder); + for (;;) + { + drflac_uint8 riceParam = 0; + if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) { + if (!drflac__read_uint8(bs, 4, &riceParam)) { + return DRFLAC_FALSE; + } + if (riceParam == 15) { + riceParam = 0xFF; + } + } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + if (!drflac__read_uint8(bs, 5, &riceParam)) { + return DRFLAC_FALSE; + } + if (riceParam == 31) { + riceParam = 0xFF; + } + } - pFlac->currentFrame.samplesRemaining = pFlac->currentFrame.header.blockSize * channelCount; + if (riceParam != 0xFF) { + if (!drflac__read_and_seek_residual__rice(bs, samplesInPartition, riceParam)) { + return DRFLAC_FALSE; + } + } else { + unsigned char unencodedBitsPerSample = 0; + if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) { + return DRFLAC_FALSE; + } - return DRFLAC_SUCCESS; -} + if (!drflac__seek_bits(bs, unencodedBitsPerSample * samplesInPartition)) { + return DRFLAC_FALSE; + } + } -static drflac_result drflac__seek_frame(drflac* pFlac) -{ - int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment); - for (int i = 0; i < channelCount; ++i) { - if (!drflac__seek_subframe(&pFlac->bs, &pFlac->currentFrame, i)) { - return DRFLAC_ERROR; + + if (partitionsRemaining == 1) { + break; } - } - // Padding. - if (!drflac__seek_bits(&pFlac->bs, DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7)) { - return DRFLAC_ERROR; + partitionsRemaining -= 1; + samplesInPartition = blockSize / (1 << partitionOrder); } - // CRC. -#ifndef DR_FLAC_NO_CRC - drflac_uint16 actualCRC16 = drflac__flush_crc16(&pFlac->bs); -#endif - drflac_uint16 desiredCRC16; - if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) { - return DRFLAC_END_OF_STREAM; + return DRFLAC_TRUE; +} + + +static drflac_bool32 drflac__decode_samples__constant(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples) +{ + drflac_uint32 i; + + /* Only a single sample needs to be decoded here. */ + drflac_int32 sample; + if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) { + return DRFLAC_FALSE; } -#ifndef DR_FLAC_NO_CRC - if (actualCRC16 != desiredCRC16) { - return DRFLAC_CRC_MISMATCH; // CRC mismatch. + /* + We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely) + we'll want to look at a more efficient way. + */ + for (i = 0; i < blockSize; ++i) { + pDecodedSamples[i] = sample; } -#endif - return DRFLAC_SUCCESS; + return DRFLAC_TRUE; } -static drflac_bool32 drflac__read_and_decode_next_frame(drflac* pFlac) +static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples) { - drflac_assert(pFlac != NULL); + drflac_uint32 i; - for (;;) { - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + for (i = 0; i < blockSize; ++i) { + drflac_int32 sample; + if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) { return DRFLAC_FALSE; } - drflac_result result = drflac__decode_frame(pFlac); - if (result != DRFLAC_SUCCESS) { - if (result == DRFLAC_CRC_MISMATCH) { - continue; // CRC mismatch. Skip to the next frame. - } else { - return DRFLAC_FALSE; - } - } - - return DRFLAC_TRUE; + pDecodedSamples[i] = sample; } -} + return DRFLAC_TRUE; +} -static void drflac__get_current_frame_sample_range(drflac* pFlac, drflac_uint64* pFirstSampleInFrameOut, drflac_uint64* pLastSampleInFrameOut) +static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples) { - drflac_assert(pFlac != NULL); + drflac_uint32 i; - unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment); + static drflac_int32 lpcCoefficientsTable[5][4] = { + {0, 0, 0, 0}, + {1, 0, 0, 0}, + {2, -1, 0, 0}, + {3, -3, 1, 0}, + {4, -6, 4, -1} + }; + + /* Warm up samples and coefficients. */ + for (i = 0; i < lpcOrder; ++i) { + drflac_int32 sample; + if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) { + return DRFLAC_FALSE; + } - drflac_uint64 firstSampleInFrame = pFlac->currentFrame.header.sampleNumber; - if (firstSampleInFrame == 0) { - firstSampleInFrame = pFlac->currentFrame.header.frameNumber * pFlac->maxBlockSize*channelCount; + pDecodedSamples[i] = sample; } - drflac_uint64 lastSampleInFrame = firstSampleInFrame + (pFlac->currentFrame.header.blockSize*channelCount); - if (lastSampleInFrame > 0) { - lastSampleInFrame -= 1; // Needs to be zero based. + if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) { + return DRFLAC_FALSE; } - if (pFirstSampleInFrameOut) *pFirstSampleInFrameOut = firstSampleInFrame; - if (pLastSampleInFrameOut) *pLastSampleInFrameOut = lastSampleInFrame; + return DRFLAC_TRUE; } -static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac) +static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples) { - drflac_assert(pFlac != NULL); - - drflac_bool32 result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFramePos); + drflac_uint8 i; + drflac_uint8 lpcPrecision; + drflac_int8 lpcShift; + drflac_int32 coefficients[32]; - drflac_zero_memory(&pFlac->currentFrame, sizeof(pFlac->currentFrame)); - pFlac->currentSample = 0; + /* Warm up samples. */ + for (i = 0; i < lpcOrder; ++i) { + drflac_int32 sample; + if (!drflac__read_int32(bs, bitsPerSample, &sample)) { + return DRFLAC_FALSE; + } - return result; -} + pDecodedSamples[i] = sample; + } -static DRFLAC_INLINE drflac_result drflac__seek_to_next_frame(drflac* pFlac) -{ - // This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section. - drflac_assert(pFlac != NULL); - return drflac__seek_frame(pFlac); -} - -static drflac_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, drflac_uint64 sampleIndex) -{ - drflac_assert(pFlac != NULL); - - drflac_bool32 isMidFrame = DRFLAC_FALSE; - - // If we are seeking forward we start from the current position. Otherwise we need to start all the way from the start of the file. - drflac_uint64 runningSampleCount; - if (sampleIndex >= pFlac->currentSample) { - // Seeking forward. Need to seek from the current position. - runningSampleCount = pFlac->currentSample; - - // The frame header for the first frame may not yet have been read. We need to do that if necessary. - if (pFlac->currentSample == 0 && pFlac->currentFrame.samplesRemaining == 0) { - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { - return DRFLAC_FALSE; - } - } else { - isMidFrame = DRFLAC_TRUE; - } - } else { - // Seeking backwards. Need to seek from the start of the file. - runningSampleCount = 0; + if (!drflac__read_uint8(bs, 4, &lpcPrecision)) { + return DRFLAC_FALSE; + } + if (lpcPrecision == 15) { + return DRFLAC_FALSE; /* Invalid. */ + } + lpcPrecision += 1; - // Move back to the start. - if (!drflac__seek_to_first_frame(pFlac)) { - return DRFLAC_FALSE; - } + if (!drflac__read_int8(bs, 5, &lpcShift)) { + return DRFLAC_FALSE; + } - // Decode the first frame in preparation for sample-exact seeking below. - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + DRFLAC_ZERO_MEMORY(coefficients, sizeof(coefficients)); + for (i = 0; i < lpcOrder; ++i) { + if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) { return DRFLAC_FALSE; } } - // We need to as quickly as possible find the frame that contains the target sample. To do this, we iterate over each frame and inspect its - // header. If based on the header we can determine that the frame contains the sample, we do a full decode of that frame. - for (;;) { - drflac_uint64 firstSampleInFrame = 0; - drflac_uint64 lastSampleInFrame = 0; - drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, &lastSampleInFrame); - - drflac_uint64 sampleCountInThisFrame = (lastSampleInFrame - firstSampleInFrame) + 1; - if (sampleIndex < (runningSampleCount + sampleCountInThisFrame)) { - // The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend - // it never existed and keep iterating. - drflac_uint64 samplesToDecode = sampleIndex - runningSampleCount; - - if (!isMidFrame) { - drflac_result result = drflac__decode_frame(pFlac); - if (result == DRFLAC_SUCCESS) { - // The frame is valid. We just need to skip over some samples to ensure it's sample-exact. - return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode; // <-- If this fails, something bad has happened (it should never fail). - } else { - if (result == DRFLAC_CRC_MISMATCH) { - goto next_iteration; // CRC mismatch. Pretend this frame never existed. - } else { - return DRFLAC_FALSE; - } - } - } else { - // We started seeking mid-frame which means we need to skip the frame decoding part. - return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode; - } - } else { - // It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this - // frame never existed and leave the running sample count untouched. - if (!isMidFrame) { - drflac_result result = drflac__seek_to_next_frame(pFlac); - if (result == DRFLAC_SUCCESS) { - runningSampleCount += sampleCountInThisFrame; - } else { - if (result == DRFLAC_CRC_MISMATCH) { - goto next_iteration; // CRC mismatch. Pretend this frame never existed. - } else { - return DRFLAC_FALSE; - } - } - } else { - // We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with - // drflac__seek_to_next_frame() which only works if the decoder is sitting on the byte just after the frame header. - runningSampleCount += pFlac->currentFrame.samplesRemaining; - pFlac->currentFrame.samplesRemaining = 0; - isMidFrame = DRFLAC_FALSE; - } - } - - next_iteration: - // Grab the next frame in preparation for the next iteration. - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { - return DRFLAC_FALSE; - } + if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) { + return DRFLAC_FALSE; } + + return DRFLAC_TRUE; } -static drflac_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, drflac_uint64 sampleIndex) +static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header) { - drflac_assert(pFlac != NULL); + const drflac_uint32 sampleRateTable[12] = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000}; + const drflac_uint8 bitsPerSampleTable[8] = {0, 8, 12, (drflac_uint8)-1, 16, 20, 24, (drflac_uint8)-1}; /* -1 = reserved. */ - if (pFlac->pSeekpoints == NULL || pFlac->seekpointCount == 0) { - return DRFLAC_FALSE; - } + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(header != NULL); + /* Keep looping until we find a valid sync code. */ + for (;;) { + drflac_uint8 crc8 = 0xCE; /* 0xCE = drflac_crc8(0, 0x3FFE, 14); */ + drflac_uint8 reserved = 0; + drflac_uint8 blockingStrategy = 0; + drflac_uint8 blockSize = 0; + drflac_uint8 sampleRate = 0; + drflac_uint8 channelAssignment = 0; + drflac_uint8 bitsPerSample = 0; + drflac_bool32 isVariableBlockSize; - drflac_uint32 iClosestSeekpoint = 0; - for (drflac_uint32 iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) { - if (pFlac->pSeekpoints[iSeekpoint].firstSample*pFlac->channels >= sampleIndex) { - break; + if (!drflac__find_and_seek_to_next_sync_code(bs)) { + return DRFLAC_FALSE; } - iClosestSeekpoint = iSeekpoint; - } - + if (!drflac__read_uint8(bs, 1, &reserved)) { + return DRFLAC_FALSE; + } + if (reserved == 1) { + continue; + } + crc8 = drflac_crc8(crc8, reserved, 1); - drflac_bool32 isMidFrame = DRFLAC_FALSE; + if (!drflac__read_uint8(bs, 1, &blockingStrategy)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, blockingStrategy, 1); - // At this point we should have found the seekpoint closest to our sample. If we are seeking forward and the closest seekpoint is _before_ the current sample, we - // just seek forward from where we are. Otherwise we start seeking from the seekpoint's first sample. - drflac_uint64 runningSampleCount; - if ((sampleIndex >= pFlac->currentSample) && (pFlac->pSeekpoints[iClosestSeekpoint].firstSample*pFlac->channels <= pFlac->currentSample)) { - // Optimized case. Just seek forward from where we are. - runningSampleCount = pFlac->currentSample; + if (!drflac__read_uint8(bs, 4, &blockSize)) { + return DRFLAC_FALSE; + } + if (blockSize == 0) { + continue; + } + crc8 = drflac_crc8(crc8, blockSize, 4); - // The frame header for the first frame may not yet have been read. We need to do that if necessary. - if (pFlac->currentSample == 0 && pFlac->currentFrame.samplesRemaining == 0) { - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { - return DRFLAC_FALSE; - } - } else { - isMidFrame = DRFLAC_TRUE; + if (!drflac__read_uint8(bs, 4, &sampleRate)) { + return DRFLAC_FALSE; } - } else { - // Slower case. Seek to the start of the seekpoint and then seek forward from there. - runningSampleCount = pFlac->pSeekpoints[iClosestSeekpoint].firstSample*pFlac->channels; + crc8 = drflac_crc8(crc8, sampleRate, 4); - if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFramePos + pFlac->pSeekpoints[iClosestSeekpoint].frameOffset)) { + if (!drflac__read_uint8(bs, 4, &channelAssignment)) { return DRFLAC_FALSE; } + if (channelAssignment > 10) { + continue; + } + crc8 = drflac_crc8(crc8, channelAssignment, 4); - // Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below. - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + if (!drflac__read_uint8(bs, 3, &bitsPerSample)) { return DRFLAC_FALSE; } - } + if (bitsPerSample == 3 || bitsPerSample == 7) { + continue; + } + crc8 = drflac_crc8(crc8, bitsPerSample, 3); - for (;;) { - drflac_uint64 firstSampleInFrame = 0; - drflac_uint64 lastSampleInFrame = 0; - drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, &lastSampleInFrame); - drflac_uint64 sampleCountInThisFrame = (lastSampleInFrame - firstSampleInFrame) + 1; - if (sampleIndex < (runningSampleCount + sampleCountInThisFrame)) { - // The sample should be in this frame. We need to fully decode it, but if it's an invalid frame (a CRC mismatch) we need to pretend - // it never existed and keep iterating. - drflac_uint64 samplesToDecode = sampleIndex - runningSampleCount; + if (!drflac__read_uint8(bs, 1, &reserved)) { + return DRFLAC_FALSE; + } + if (reserved == 1) { + continue; + } + crc8 = drflac_crc8(crc8, reserved, 1); + - if (!isMidFrame) { - drflac_result result = drflac__decode_frame(pFlac); - if (result == DRFLAC_SUCCESS) { - // The frame is valid. We just need to skip over some samples to ensure it's sample-exact. - return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode; // <-- If this fails, something bad has happened (it should never fail). + isVariableBlockSize = blockingStrategy == 1; + if (isVariableBlockSize) { + drflac_uint64 pcmFrameNumber; + drflac_result result = drflac__read_utf8_coded_number(bs, &pcmFrameNumber, &crc8); + if (result != DRFLAC_SUCCESS) { + if (result == DRFLAC_END_OF_STREAM) { + return DRFLAC_FALSE; } else { - if (result == DRFLAC_CRC_MISMATCH) { - goto next_iteration; // CRC mismatch. Pretend this frame never existed. - } else { - return DRFLAC_FALSE; - } + continue; } - } else { - // We started seeking mid-frame which means we need to skip the frame decoding part. - return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode; } + header->flacFrameNumber = 0; + header->pcmFrameNumber = pcmFrameNumber; } else { - // It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this - // frame never existed and leave the running sample count untouched. - if (!isMidFrame) { - drflac_result result = drflac__seek_to_next_frame(pFlac); - if (result == DRFLAC_SUCCESS) { - runningSampleCount += sampleCountInThisFrame; + drflac_uint64 flacFrameNumber = 0; + drflac_result result = drflac__read_utf8_coded_number(bs, &flacFrameNumber, &crc8); + if (result != DRFLAC_SUCCESS) { + if (result == DRFLAC_END_OF_STREAM) { + return DRFLAC_FALSE; } else { - if (result == DRFLAC_CRC_MISMATCH) { - goto next_iteration; // CRC mismatch. Pretend this frame never existed. - } else { - return DRFLAC_FALSE; - } + continue; } - } else { - // We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with - // drflac__seek_to_next_frame() which only works if the decoder is sitting on the byte just after the frame header. - runningSampleCount += pFlac->currentFrame.samplesRemaining; - pFlac->currentFrame.samplesRemaining = 0; - isMidFrame = DRFLAC_FALSE; } + header->flacFrameNumber = (drflac_uint32)flacFrameNumber; /* <-- Safe cast. */ + header->pcmFrameNumber = 0; } - next_iteration: - // Grab the next frame in preparation for the next iteration. - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { - return DRFLAC_FALSE; + + if (blockSize == 1) { + header->blockSizeInPCMFrames = 192; + } else if (blockSize >= 2 && blockSize <= 5) { + header->blockSizeInPCMFrames = 576 * (1 << (blockSize - 2)); + } else if (blockSize == 6) { + if (!drflac__read_uint16(bs, 8, &header->blockSizeInPCMFrames)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 8); + header->blockSizeInPCMFrames += 1; + } else if (blockSize == 7) { + if (!drflac__read_uint16(bs, 16, &header->blockSizeInPCMFrames)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16); + header->blockSizeInPCMFrames += 1; + } else { + header->blockSizeInPCMFrames = 256 * (1 << (blockSize - 8)); } - } -} -#ifndef DR_FLAC_NO_OGG -typedef struct -{ - drflac_uint8 capturePattern[4]; // Should be "OggS" - drflac_uint8 structureVersion; // Always 0. - drflac_uint8 headerType; - drflac_uint64 granulePosition; - drflac_uint32 serialNumber; - drflac_uint32 sequenceNumber; - drflac_uint32 checksum; - drflac_uint8 segmentCount; - drflac_uint8 segmentTable[255]; -} drflac_ogg_page_header; -#endif + if (sampleRate <= 11) { + header->sampleRate = sampleRateTable[sampleRate]; + } else if (sampleRate == 12) { + if (!drflac__read_uint32(bs, 8, &header->sampleRate)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->sampleRate, 8); + header->sampleRate *= 1000; + } else if (sampleRate == 13) { + if (!drflac__read_uint32(bs, 16, &header->sampleRate)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->sampleRate, 16); + } else if (sampleRate == 14) { + if (!drflac__read_uint32(bs, 16, &header->sampleRate)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->sampleRate, 16); + header->sampleRate *= 10; + } else { + continue; /* Invalid. Assume an invalid block. */ + } -typedef struct -{ - drflac_read_proc onRead; - drflac_seek_proc onSeek; - drflac_meta_proc onMeta; - drflac_container container; - void* pUserData; - void* pUserDataMD; - drflac_uint32 sampleRate; - drflac_uint8 channels; - drflac_uint8 bitsPerSample; - drflac_uint64 totalSampleCount; - drflac_uint16 maxBlockSize; - drflac_uint64 runningFilePos; - drflac_bool32 hasStreamInfoBlock; - drflac_bool32 hasMetadataBlocks; - drflac_bs bs; // <-- A bit streamer is required for loading data during initialization. - drflac_frame_header firstFrameHeader; // <-- The header of the first frame that was read during relaxed initalization. Only set if there is no STREAMINFO block. -#ifndef DR_FLAC_NO_OGG - drflac_uint32 oggSerial; - drflac_uint64 oggFirstBytePos; - drflac_ogg_page_header oggBosHeader; -#endif -} drflac_init_info; + header->channelAssignment = channelAssignment; -static DRFLAC_INLINE void drflac__decode_block_header(drflac_uint32 blockHeader, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize) -{ - blockHeader = drflac__be2host_32(blockHeader); - *isLastBlock = (blockHeader & (0x01 << 31)) >> 31; - *blockType = (blockHeader & (0x7F << 24)) >> 24; - *blockSize = (blockHeader & 0xFFFFFF); -} + header->bitsPerSample = bitsPerSampleTable[bitsPerSample]; + if (header->bitsPerSample == 0) { + header->bitsPerSample = streaminfoBitsPerSample; + } -static DRFLAC_INLINE drflac_bool32 drflac__read_and_decode_block_header(drflac_read_proc onRead, void* pUserData, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize) -{ - drflac_uint32 blockHeader; - if (onRead(pUserData, &blockHeader, 4) != 4) { - return DRFLAC_FALSE; - } + if (!drflac__read_uint8(bs, 8, &header->crc8)) { + return DRFLAC_FALSE; + } - drflac__decode_block_header(blockHeader, isLastBlock, blockType, blockSize); - return DRFLAC_TRUE; +#ifndef DR_FLAC_NO_CRC + if (header->crc8 != crc8) { + continue; /* CRC mismatch. Loop back to the top and find the next sync code. */ + } +#endif + return DRFLAC_TRUE; + } } -drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drflac_streaminfo* pStreamInfo) +static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pSubframe) { - // min/max block size. - drflac_uint32 blockSizes; - if (onRead(pUserData, &blockSizes, 4) != 4) { - return DRFLAC_FALSE; - } + drflac_uint8 header; + int type; - // min/max frame size. - drflac_uint64 frameSizes = 0; - if (onRead(pUserData, &frameSizes, 6) != 6) { + if (!drflac__read_uint8(bs, 8, &header)) { return DRFLAC_FALSE; } - // Sample rate, channels, bits per sample and total sample count. - drflac_uint64 importantProps; - if (onRead(pUserData, &importantProps, 8) != 8) { + /* First bit should always be 0. */ + if ((header & 0x80) != 0) { return DRFLAC_FALSE; } - // MD5 - drflac_uint8 md5[16]; + type = (header & 0x7E) >> 1; + if (type == 0) { + pSubframe->subframeType = DRFLAC_SUBFRAME_CONSTANT; + } else if (type == 1) { + pSubframe->subframeType = DRFLAC_SUBFRAME_VERBATIM; + } else { + if ((type & 0x20) != 0) { + pSubframe->subframeType = DRFLAC_SUBFRAME_LPC; + pSubframe->lpcOrder = (type & 0x1F) + 1; + } else if ((type & 0x08) != 0) { + pSubframe->subframeType = DRFLAC_SUBFRAME_FIXED; + pSubframe->lpcOrder = (type & 0x07); + if (pSubframe->lpcOrder > 4) { + pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED; + pSubframe->lpcOrder = 0; + } + } else { + pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED; + } + } + + if (pSubframe->subframeType == DRFLAC_SUBFRAME_RESERVED) { + return DRFLAC_FALSE; + } + + /* Wasted bits per sample. */ + pSubframe->wastedBitsPerSample = 0; + if ((header & 0x01) == 1) { + unsigned int wastedBitsPerSample; + if (!drflac__seek_past_next_set_bit(bs, &wastedBitsPerSample)) { + return DRFLAC_FALSE; + } + pSubframe->wastedBitsPerSample = (unsigned char)wastedBitsPerSample + 1; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex, drflac_int32* pDecodedSamplesOut) +{ + drflac_subframe* pSubframe; + drflac_uint32 subframeBitsPerSample; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(frame != NULL); + + pSubframe = frame->subframes + subframeIndex; + if (!drflac__read_subframe_header(bs, pSubframe)) { + return DRFLAC_FALSE; + } + + /* Side channels require an extra bit per sample. Took a while to figure that one out... */ + subframeBitsPerSample = frame->header.bitsPerSample; + if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) { + subframeBitsPerSample += 1; + } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) { + subframeBitsPerSample += 1; + } + + /* Need to handle wasted bits per sample. */ + if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) { + return DRFLAC_FALSE; + } + subframeBitsPerSample -= pSubframe->wastedBitsPerSample; + + pSubframe->pSamplesS32 = pDecodedSamplesOut; + + switch (pSubframe->subframeType) + { + case DRFLAC_SUBFRAME_CONSTANT: + { + drflac__decode_samples__constant(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32); + } break; + + case DRFLAC_SUBFRAME_VERBATIM: + { + drflac__decode_samples__verbatim(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32); + } break; + + case DRFLAC_SUBFRAME_FIXED: + { + drflac__decode_samples__fixed(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32); + } break; + + case DRFLAC_SUBFRAME_LPC: + { + drflac__decode_samples__lpc(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32); + } break; + + default: return DRFLAC_FALSE; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex) +{ + drflac_subframe* pSubframe; + drflac_uint32 subframeBitsPerSample; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(frame != NULL); + + pSubframe = frame->subframes + subframeIndex; + if (!drflac__read_subframe_header(bs, pSubframe)) { + return DRFLAC_FALSE; + } + + /* Side channels require an extra bit per sample. Took a while to figure that one out... */ + subframeBitsPerSample = frame->header.bitsPerSample; + if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) { + subframeBitsPerSample += 1; + } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) { + subframeBitsPerSample += 1; + } + + /* Need to handle wasted bits per sample. */ + if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) { + return DRFLAC_FALSE; + } + subframeBitsPerSample -= pSubframe->wastedBitsPerSample; + + pSubframe->pSamplesS32 = NULL; + + switch (pSubframe->subframeType) + { + case DRFLAC_SUBFRAME_CONSTANT: + { + if (!drflac__seek_bits(bs, subframeBitsPerSample)) { + return DRFLAC_FALSE; + } + } break; + + case DRFLAC_SUBFRAME_VERBATIM: + { + unsigned int bitsToSeek = frame->header.blockSizeInPCMFrames * subframeBitsPerSample; + if (!drflac__seek_bits(bs, bitsToSeek)) { + return DRFLAC_FALSE; + } + } break; + + case DRFLAC_SUBFRAME_FIXED: + { + unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample; + if (!drflac__seek_bits(bs, bitsToSeek)) { + return DRFLAC_FALSE; + } + + if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) { + return DRFLAC_FALSE; + } + } break; + + case DRFLAC_SUBFRAME_LPC: + { + unsigned char lpcPrecision; + + unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample; + if (!drflac__seek_bits(bs, bitsToSeek)) { + return DRFLAC_FALSE; + } + + if (!drflac__read_uint8(bs, 4, &lpcPrecision)) { + return DRFLAC_FALSE; + } + if (lpcPrecision == 15) { + return DRFLAC_FALSE; /* Invalid. */ + } + lpcPrecision += 1; + + + bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5; /* +5 for shift. */ + if (!drflac__seek_bits(bs, bitsToSeek)) { + return DRFLAC_FALSE; + } + + if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) { + return DRFLAC_FALSE; + } + } break; + + default: return DRFLAC_FALSE; + } + + return DRFLAC_TRUE; +} + + +static DRFLAC_INLINE drflac_uint8 drflac__get_channel_count_from_channel_assignment(drflac_int8 channelAssignment) +{ + drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2}; + + DRFLAC_ASSERT(channelAssignment <= 10); + return lookup[channelAssignment]; +} + +static drflac_result drflac__decode_flac_frame(drflac* pFlac) +{ + int channelCount; + int i; + drflac_uint8 paddingSizeInBits; + drflac_uint16 desiredCRC16; +#ifndef DR_FLAC_NO_CRC + drflac_uint16 actualCRC16; +#endif + + /* This function should be called while the stream is sitting on the first byte after the frame header. */ + DRFLAC_ZERO_MEMORY(pFlac->currentFLACFrame.subframes, sizeof(pFlac->currentFLACFrame.subframes)); + + /* The frame block size must never be larger than the maximum block size defined by the FLAC stream. */ + if (pFlac->currentFLACFrame.header.blockSizeInPCMFrames > pFlac->maxBlockSizeInPCMFrames) { + return DRFLAC_ERROR; + } + + /* The number of channels in the frame must match the channel count from the STREAMINFO block. */ + channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + if (channelCount != (int)pFlac->channels) { + return DRFLAC_ERROR; + } + + for (i = 0; i < channelCount; ++i) { + if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i, pFlac->pDecodedSamples + (pFlac->currentFLACFrame.header.blockSizeInPCMFrames * i))) { + return DRFLAC_ERROR; + } + } + + paddingSizeInBits = DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7; + if (paddingSizeInBits > 0) { + drflac_uint8 padding = 0; + if (!drflac__read_uint8(&pFlac->bs, paddingSizeInBits, &padding)) { + return DRFLAC_END_OF_STREAM; + } + } + +#ifndef DR_FLAC_NO_CRC + actualCRC16 = drflac__flush_crc16(&pFlac->bs); +#endif + if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) { + return DRFLAC_END_OF_STREAM; + } + +#ifndef DR_FLAC_NO_CRC + if (actualCRC16 != desiredCRC16) { + return DRFLAC_CRC_MISMATCH; /* CRC mismatch. */ + } +#endif + + pFlac->currentFLACFrame.pcmFramesRemaining = pFlac->currentFLACFrame.header.blockSizeInPCMFrames; + + return DRFLAC_SUCCESS; +} + +static drflac_result drflac__seek_flac_frame(drflac* pFlac) +{ + int channelCount; + int i; + drflac_uint16 desiredCRC16; +#ifndef DR_FLAC_NO_CRC + drflac_uint16 actualCRC16; +#endif + + channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + for (i = 0; i < channelCount; ++i) { + if (!drflac__seek_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i)) { + return DRFLAC_ERROR; + } + } + + /* Padding. */ + if (!drflac__seek_bits(&pFlac->bs, DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7)) { + return DRFLAC_ERROR; + } + + /* CRC. */ +#ifndef DR_FLAC_NO_CRC + actualCRC16 = drflac__flush_crc16(&pFlac->bs); +#endif + if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) { + return DRFLAC_END_OF_STREAM; + } + +#ifndef DR_FLAC_NO_CRC + if (actualCRC16 != desiredCRC16) { + return DRFLAC_CRC_MISMATCH; /* CRC mismatch. */ + } +#endif + + return DRFLAC_SUCCESS; +} + +static drflac_bool32 drflac__read_and_decode_next_flac_frame(drflac* pFlac) +{ + DRFLAC_ASSERT(pFlac != NULL); + + for (;;) { + drflac_result result; + + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + + result = drflac__decode_flac_frame(pFlac); + if (result != DRFLAC_SUCCESS) { + if (result == DRFLAC_CRC_MISMATCH) { + continue; /* CRC mismatch. Skip to the next frame. */ + } else { + return DRFLAC_FALSE; + } + } + + return DRFLAC_TRUE; + } +} + +static void drflac__get_pcm_frame_range_of_current_flac_frame(drflac* pFlac, drflac_uint64* pFirstPCMFrame, drflac_uint64* pLastPCMFrame) +{ + drflac_uint64 firstPCMFrame; + drflac_uint64 lastPCMFrame; + + DRFLAC_ASSERT(pFlac != NULL); + + firstPCMFrame = pFlac->currentFLACFrame.header.pcmFrameNumber; + if (firstPCMFrame == 0) { + firstPCMFrame = pFlac->currentFLACFrame.header.flacFrameNumber * pFlac->maxBlockSizeInPCMFrames; + } + + lastPCMFrame = firstPCMFrame + (pFlac->currentFLACFrame.header.blockSizeInPCMFrames); + if (lastPCMFrame > 0) { + lastPCMFrame -= 1; /* Needs to be zero based. */ + } + + if (pFirstPCMFrame) { + *pFirstPCMFrame = firstPCMFrame; + } + if (pLastPCMFrame) { + *pLastPCMFrame = lastPCMFrame; + } +} + +static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac) +{ + drflac_bool32 result; + + DRFLAC_ASSERT(pFlac != NULL); + + result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes); + + DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame)); + pFlac->currentPCMFrame = 0; + + return result; +} + +static DRFLAC_INLINE drflac_result drflac__seek_to_next_flac_frame(drflac* pFlac) +{ + /* This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section. */ + DRFLAC_ASSERT(pFlac != NULL); + return drflac__seek_flac_frame(pFlac); +} + + +drflac_uint64 drflac__seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 pcmFramesToSeek) +{ + drflac_uint64 pcmFramesRead = 0; + while (pcmFramesToSeek > 0) { + if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + break; /* Couldn't read the next frame, so just break from the loop and return. */ + } + } else { + if (pFlac->currentFLACFrame.pcmFramesRemaining > pcmFramesToSeek) { + pcmFramesRead += pcmFramesToSeek; + pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)pcmFramesToSeek; /* <-- Safe cast. Will always be < currentFrame.pcmFramesRemaining < 65536. */ + pcmFramesToSeek = 0; + } else { + pcmFramesRead += pFlac->currentFLACFrame.pcmFramesRemaining; + pcmFramesToSeek -= pFlac->currentFLACFrame.pcmFramesRemaining; + pFlac->currentFLACFrame.pcmFramesRemaining = 0; + } + } + } + + pFlac->currentPCMFrame += pcmFramesRead; + return pcmFramesRead; +} + + +static drflac_bool32 drflac__seek_to_pcm_frame__brute_force(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + drflac_bool32 isMidFrame = DRFLAC_FALSE; + drflac_uint64 runningPCMFrameCount; + + DRFLAC_ASSERT(pFlac != NULL); + + /* If we are seeking forward we start from the current position. Otherwise we need to start all the way from the start of the file. */ + if (pcmFrameIndex >= pFlac->currentPCMFrame) { + /* Seeking forward. Need to seek from the current position. */ + runningPCMFrameCount = pFlac->currentPCMFrame; + + /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */ + if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } else { + isMidFrame = DRFLAC_TRUE; + } + } else { + /* Seeking backwards. Need to seek from the start of the file. */ + runningPCMFrameCount = 0; + + /* Move back to the start. */ + if (!drflac__seek_to_first_frame(pFlac)) { + return DRFLAC_FALSE; + } + + /* Decode the first frame in preparation for sample-exact seeking below. */ + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } + + /* + We need to as quickly as possible find the frame that contains the target sample. To do this, we iterate over each frame and inspect its + header. If based on the header we can determine that the frame contains the sample, we do a full decode of that frame. + */ + for (;;) { + drflac_uint64 pcmFrameCountInThisFLACFrame; + drflac_uint64 firstPCMFrameInFLACFrame = 0; + drflac_uint64 lastPCMFrameInFLACFrame = 0; + + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame); + + pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1; + if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) { + /* + The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend + it never existed and keep iterating. + */ + drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount; + + if (!isMidFrame) { + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */ + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; /* <-- If this fails, something bad has happened (it should never fail). */ + } else { + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* We started seeking mid-frame which means we need to skip the frame decoding part. */ + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; + } + } else { + /* + It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this + frame never existed and leave the running sample count untouched. + */ + if (!isMidFrame) { + drflac_result result = drflac__seek_to_next_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + runningPCMFrameCount += pcmFrameCountInThisFLACFrame; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* + We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with + drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header. + */ + runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining; + pFlac->currentFLACFrame.pcmFramesRemaining = 0; + isMidFrame = DRFLAC_FALSE; + } + + /* If we are seeking to the end of the file and we've just hit it, we're done. */ + if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) { + return DRFLAC_TRUE; + } + } + + next_iteration: + /* Grab the next frame in preparation for the next iteration. */ + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } +} + + +#if !defined(DR_FLAC_NO_CRC) +/* +We use an average compression ratio to determine our approximate start location. FLAC files are generally about 50%-70% the size of their +uncompressed counterparts so we'll use this as a basis. I'm going to split the middle and use a factor of 0.6 to determine the starting +location. +*/ +#define DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO 0.6f + +static drflac_bool32 drflac__seek_to_approximate_flac_frame_to_byte(drflac* pFlac, drflac_uint64 targetByte, drflac_uint64 rangeLo, drflac_uint64 rangeHi, drflac_uint64* pLastSuccessfulSeekOffset) +{ + DRFLAC_ASSERT(pFlac != NULL); + DRFLAC_ASSERT(pLastSuccessfulSeekOffset != NULL); + DRFLAC_ASSERT(targetByte >= rangeLo); + DRFLAC_ASSERT(targetByte <= rangeHi); + + *pLastSuccessfulSeekOffset = pFlac->firstFLACFramePosInBytes; + + for (;;) { + /* When seeking to a byte, failure probably means we've attempted to seek beyond the end of the stream. To counter this we just halve it each attempt. */ + if (!drflac__seek_to_byte(&pFlac->bs, targetByte)) { + /* If we couldn't even seek to the first byte in the stream we have a problem. Just abandon the whole thing. */ + if (targetByte == 0) { + drflac__seek_to_first_frame(pFlac); /* Try to recover. */ + return DRFLAC_FALSE; + } + + /* Halve the byte location and continue. */ + targetByte = rangeLo + ((rangeHi - rangeLo)/2); + rangeHi = targetByte; + } else { + /* Getting here should mean that we have seeked to an appropriate byte. */ + + /* Clear the details of the FLAC frame so we don't misreport data. */ + DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame)); + + /* + Now seek to the next FLAC frame. We need to decode the entire frame (not just the header) because it's possible for the header to incorrectly pass the + CRC check and return bad data. We need to decode the entire frame to be more certain. Although this seems unlikely, this has happened to me in testing + to it needs to stay this way for now. + */ +#if 1 + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + /* Halve the byte location and continue. */ + targetByte = rangeLo + ((rangeHi - rangeLo)/2); + rangeHi = targetByte; + } else { + break; + } +#else + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + /* Halve the byte location and continue. */ + targetByte = rangeLo + ((rangeHi - rangeLo)/2); + rangeHi = targetByte; + } else { + break; + } +#endif + } + } + + /* The current PCM frame needs to be updated based on the frame we just seeked to. */ + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL); + + DRFLAC_ASSERT(targetByte <= rangeHi); + + *pLastSuccessfulSeekOffset = targetByte; + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 offset) +{ + /* This section of code would be used if we were only decoding the FLAC frame header when calling drflac__seek_to_approximate_flac_frame_to_byte(). */ +#if 0 + if (drflac__decode_flac_frame(pFlac) != DRFLAC_SUCCESS) { + /* We failed to decode this frame which may be due to it being corrupt. We'll just use the next valid FLAC frame. */ + if (drflac__read_and_decode_next_flac_frame(pFlac) == DRFLAC_FALSE) { + return DRFLAC_FALSE; + } + } +#endif + + return drflac__seek_forward_by_pcm_frames(pFlac, offset) == offset; +} + + +static drflac_bool32 drflac__seek_to_pcm_frame__binary_search_internal(drflac* pFlac, drflac_uint64 pcmFrameIndex, drflac_uint64 byteRangeLo, drflac_uint64 byteRangeHi) +{ + /* This assumes pFlac->currentPCMFrame is sitting on byteRangeLo upon entry. */ + + drflac_uint64 targetByte; + drflac_uint64 pcmRangeLo = pFlac->totalPCMFrameCount; + drflac_uint64 pcmRangeHi = 0; + drflac_uint64 lastSuccessfulSeekOffset = (drflac_uint64)-1; + drflac_uint64 closestSeekOffsetBeforeTargetPCMFrame = byteRangeLo; + drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096; + + targetByte = byteRangeLo + (drflac_uint64)((pcmFrameIndex - pFlac->currentPCMFrame) * pFlac->channels * pFlac->bitsPerSample/8 * DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO); + if (targetByte > byteRangeHi) { + targetByte = byteRangeHi; + } + + for (;;) { + if (drflac__seek_to_approximate_flac_frame_to_byte(pFlac, targetByte, byteRangeLo, byteRangeHi, &lastSuccessfulSeekOffset)) { + /* We found a FLAC frame. We need to check if it contains the sample we're looking for. */ + drflac_uint64 newPCMRangeLo; + drflac_uint64 newPCMRangeHi; + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &newPCMRangeLo, &newPCMRangeHi); + + /* If we selected the same frame, it means we should be pretty close. Just decode the rest. */ + if (pcmRangeLo == newPCMRangeLo) { + if (!drflac__seek_to_approximate_flac_frame_to_byte(pFlac, closestSeekOffsetBeforeTargetPCMFrame, closestSeekOffsetBeforeTargetPCMFrame, byteRangeHi, &lastSuccessfulSeekOffset)) { + break; /* Failed to seek to closest frame. */ + } + + if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) { + return DRFLAC_TRUE; + } else { + break; /* Failed to seek forward. */ + } + } + + pcmRangeLo = newPCMRangeLo; + pcmRangeHi = newPCMRangeHi; + + if (pcmRangeLo <= pcmFrameIndex && pcmRangeHi >= pcmFrameIndex) { + /* The target PCM frame is in this FLAC frame. */ + if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame) ) { + return DRFLAC_TRUE; + } else { + break; /* Failed to seek to FLAC frame. */ + } + } else { + const float approxCompressionRatio = (lastSuccessfulSeekOffset - pFlac->firstFLACFramePosInBytes) / (pcmRangeLo * pFlac->channels * pFlac->bitsPerSample/8.0f); + + if (pcmRangeLo > pcmFrameIndex) { + /* We seeked too far forward. We need to move our target byte backward and try again. */ + byteRangeHi = lastSuccessfulSeekOffset; + if (byteRangeLo > byteRangeHi) { + byteRangeLo = byteRangeHi; + } + + /*targetByte = lastSuccessfulSeekOffset - (drflac_uint64)((pcmRangeLo-pcmFrameIndex) * pFlac->channels * pFlac->bitsPerSample/8 * approxCompressionRatio);*/ + targetByte = byteRangeLo + ((byteRangeHi - byteRangeLo) / 2); + if (targetByte < byteRangeLo) { + targetByte = byteRangeLo; + } + } else /*if (pcmRangeHi < pcmFrameIndex)*/ { + /* We didn't seek far enough. We need to move our target byte forward and try again. */ + + /* If we're close enough we can just seek forward. */ + if ((pcmFrameIndex - pcmRangeLo) < seekForwardThreshold) { + if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) { + return DRFLAC_TRUE; + } else { + break; /* Failed to seek to FLAC frame. */ + } + } else { + byteRangeLo = lastSuccessfulSeekOffset; + if (byteRangeHi < byteRangeLo) { + byteRangeHi = byteRangeLo; + } + + /*targetByte = byteRangeLo + (drflac_uint64)((pcmFrameIndex-pcmRangeLo) * pFlac->channels * pFlac->bitsPerSample/8 * approxCompressionRatio);*/ + targetByte = lastSuccessfulSeekOffset + (drflac_uint64)((pcmFrameIndex-pcmRangeLo) * pFlac->channels * pFlac->bitsPerSample/8 * approxCompressionRatio); + /*targetByte = byteRangeLo + ((byteRangeHi - byteRangeLo) / 2);*/ + + if (targetByte > byteRangeHi) { + targetByte = byteRangeHi; + } + + if (closestSeekOffsetBeforeTargetPCMFrame < lastSuccessfulSeekOffset) { + closestSeekOffsetBeforeTargetPCMFrame = lastSuccessfulSeekOffset; + } + } + } + } + } else { + /* Getting here is really bad. We just recover as best we can, but moving to the first frame in the stream, and then abort. */ + break; + } + } + + drflac__seek_to_first_frame(pFlac); /* <-- Try to recover. */ + return DRFLAC_FALSE; +} + +static drflac_bool32 drflac__seek_to_pcm_frame__binary_search(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + drflac_uint64 byteRangeLo; + drflac_uint64 byteRangeHi; + drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096; + + /* Our algorithm currently assumes the PCM frame */ + if (drflac__seek_to_first_frame(pFlac) == DRFLAC_FALSE) { + return DRFLAC_FALSE; + } + + /* If we're close enough to the start, just move to the start and seek forward. */ + if (pcmFrameIndex < seekForwardThreshold) { + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFrameIndex) == pcmFrameIndex; + } + + /* + Our starting byte range is the byte position of the first FLAC frame and the approximate end of the file as if it were completely uncompressed. This ensures + the entire file is included, even though most of the time it'll exceed the end of the actual stream. This is OK as the frame searching logic will handle it. + */ + byteRangeLo = pFlac->firstFLACFramePosInBytes; + byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample/8); + + return drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi); +} +#endif /* !DR_FLAC_NO_CRC */ + +static drflac_bool32 drflac__seek_to_pcm_frame__seek_table(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + drflac_uint32 iClosestSeekpoint = 0; + drflac_bool32 isMidFrame = DRFLAC_FALSE; + drflac_uint64 runningPCMFrameCount; + drflac_uint32 iSeekpoint; + + + DRFLAC_ASSERT(pFlac != NULL); + + if (pFlac->pSeekpoints == NULL || pFlac->seekpointCount == 0) { + return DRFLAC_FALSE; + } + + for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) { + if (pFlac->pSeekpoints[iSeekpoint].firstPCMFrame >= pcmFrameIndex) { + break; + } + + iClosestSeekpoint = iSeekpoint; + } + +#if !defined(DR_FLAC_NO_CRC) + /* At this point we should know the closest seek point. We can use a binary search for this. We need to know the total sample count for this. */ + if (pFlac->totalPCMFrameCount > 0) { + drflac_uint64 byteRangeLo; + drflac_uint64 byteRangeHi; + + byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample/8); + byteRangeLo = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset; + + if (iClosestSeekpoint < pFlac->seekpointCount-1) { + if (pFlac->pSeekpoints[iClosestSeekpoint+1].firstPCMFrame != (((drflac_uint64)0xFFFFFFFF << 32) | 0xFFFFFFFF)) { /* Is it a placeholder seekpoint. */ + byteRangeHi = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint+1].flacFrameOffset-1; /* Must be zero based. */ + } + } + + if (drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) { + if (drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL); + + if (drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi)) { + return DRFLAC_TRUE; + } + } + } + } +#endif /* !DR_FLAC_NO_CRC */ + + /* Getting here means we need to use a slower algorithm because the binary search method failed or cannot be used. */ + + /* + If we are seeking forward and the closest seekpoint is _before_ the current sample, we just seek forward from where we are. Otherwise we start seeking + from the seekpoint's first sample. + */ + if (pcmFrameIndex >= pFlac->currentPCMFrame && pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame <= pFlac->currentPCMFrame) { + /* Optimized case. Just seek forward from where we are. */ + runningPCMFrameCount = pFlac->currentPCMFrame; + + /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */ + if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } else { + isMidFrame = DRFLAC_TRUE; + } + } else { + /* Slower case. Seek to the start of the seekpoint and then seek forward from there. */ + runningPCMFrameCount = pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame; + + if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) { + return DRFLAC_FALSE; + } + + /* Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below. */ + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } + + for (;;) { + drflac_uint64 pcmFrameCountInThisFLACFrame; + drflac_uint64 firstPCMFrameInFLACFrame = 0; + drflac_uint64 lastPCMFrameInFLACFrame = 0; + + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame); + + pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1; + if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) { + /* + The sample should be in this frame. We need to fully decode it, but if it's an invalid frame (a CRC mismatch) we need to pretend + it never existed and keep iterating. + */ + drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount; + + if (!isMidFrame) { + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */ + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; /* <-- If this fails, something bad has happened (it should never fail). */ + } else { + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* We started seeking mid-frame which means we need to skip the frame decoding part. */ + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; + } + } else { + /* + It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this + frame never existed and leave the running sample count untouched. + */ + if (!isMidFrame) { + drflac_result result = drflac__seek_to_next_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + runningPCMFrameCount += pcmFrameCountInThisFLACFrame; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* + We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with + drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header. + */ + runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining; + pFlac->currentFLACFrame.pcmFramesRemaining = 0; + isMidFrame = DRFLAC_FALSE; + } + + /* If we are seeking to the end of the file and we've just hit it, we're done. */ + if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) { + return DRFLAC_TRUE; + } + } + + next_iteration: + /* Grab the next frame in preparation for the next iteration. */ + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } +} + + +#ifndef DR_FLAC_NO_OGG +typedef struct +{ + drflac_uint8 capturePattern[4]; /* Should be "OggS" */ + drflac_uint8 structureVersion; /* Always 0. */ + drflac_uint8 headerType; + drflac_uint64 granulePosition; + drflac_uint32 serialNumber; + drflac_uint32 sequenceNumber; + drflac_uint32 checksum; + drflac_uint8 segmentCount; + drflac_uint8 segmentTable[255]; +} drflac_ogg_page_header; +#endif + +typedef struct +{ + drflac_read_proc onRead; + drflac_seek_proc onSeek; + drflac_meta_proc onMeta; + drflac_container container; + void* pUserData; + void* pUserDataMD; + drflac_uint32 sampleRate; + drflac_uint8 channels; + drflac_uint8 bitsPerSample; + drflac_uint64 totalPCMFrameCount; + drflac_uint16 maxBlockSizeInPCMFrames; + drflac_uint64 runningFilePos; + drflac_bool32 hasStreamInfoBlock; + drflac_bool32 hasMetadataBlocks; + drflac_bs bs; /* <-- A bit streamer is required for loading data during initialization. */ + drflac_frame_header firstFrameHeader; /* <-- The header of the first frame that was read during relaxed initalization. Only set if there is no STREAMINFO block. */ + +#ifndef DR_FLAC_NO_OGG + drflac_uint32 oggSerial; + drflac_uint64 oggFirstBytePos; + drflac_ogg_page_header oggBosHeader; +#endif +} drflac_init_info; + +static DRFLAC_INLINE void drflac__decode_block_header(drflac_uint32 blockHeader, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize) +{ + blockHeader = drflac__be2host_32(blockHeader); + *isLastBlock = (blockHeader & 0x80000000UL) >> 31; + *blockType = (blockHeader & 0x7F000000UL) >> 24; + *blockSize = (blockHeader & 0x00FFFFFFUL); +} + +static DRFLAC_INLINE drflac_bool32 drflac__read_and_decode_block_header(drflac_read_proc onRead, void* pUserData, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize) +{ + drflac_uint32 blockHeader; + if (onRead(pUserData, &blockHeader, 4) != 4) { + return DRFLAC_FALSE; + } + + drflac__decode_block_header(blockHeader, isLastBlock, blockType, blockSize); + return DRFLAC_TRUE; +} + +drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drflac_streaminfo* pStreamInfo) +{ + drflac_uint32 blockSizes; + drflac_uint64 frameSizes = 0; + drflac_uint64 importantProps; + drflac_uint8 md5[16]; + + /* min/max block size. */ + if (onRead(pUserData, &blockSizes, 4) != 4) { + return DRFLAC_FALSE; + } + + /* min/max frame size. */ + if (onRead(pUserData, &frameSizes, 6) != 6) { + return DRFLAC_FALSE; + } + + /* Sample rate, channels, bits per sample and total sample count. */ + if (onRead(pUserData, &importantProps, 8) != 8) { + return DRFLAC_FALSE; + } + + /* MD5 */ if (onRead(pUserData, md5, sizeof(md5)) != sizeof(md5)) { return DRFLAC_FALSE; } - blockSizes = drflac__be2host_32(blockSizes); - frameSizes = drflac__be2host_64(frameSizes); - importantProps = drflac__be2host_64(importantProps); + blockSizes = drflac__be2host_32(blockSizes); + frameSizes = drflac__be2host_64(frameSizes); + importantProps = drflac__be2host_64(importantProps); + + pStreamInfo->minBlockSizeInPCMFrames = (blockSizes & 0xFFFF0000) >> 16; + pStreamInfo->maxBlockSizeInPCMFrames = (blockSizes & 0x0000FFFF); + pStreamInfo->minFrameSizeInPCMFrames = (drflac_uint32)((frameSizes & (((drflac_uint64)0x00FFFFFF << 16) << 24)) >> 40); + pStreamInfo->maxFrameSizeInPCMFrames = (drflac_uint32)((frameSizes & (((drflac_uint64)0x00FFFFFF << 16) << 0)) >> 16); + pStreamInfo->sampleRate = (drflac_uint32)((importantProps & (((drflac_uint64)0x000FFFFF << 16) << 28)) >> 44); + pStreamInfo->channels = (drflac_uint8 )((importantProps & (((drflac_uint64)0x0000000E << 16) << 24)) >> 41) + 1; + pStreamInfo->bitsPerSample = (drflac_uint8 )((importantProps & (((drflac_uint64)0x0000001F << 16) << 20)) >> 36) + 1; + pStreamInfo->totalPCMFrameCount = ((importantProps & ((((drflac_uint64)0x0000000F << 16) << 16) | 0xFFFFFFFF))); + DRFLAC_COPY_MEMORY(pStreamInfo->md5, md5, sizeof(md5)); + + return DRFLAC_TRUE; +} + + +static void* drflac__malloc_default(size_t sz, void* pUserData) +{ + (void)pUserData; + return DRFLAC_MALLOC(sz); +} + +static void* drflac__realloc_default(void* p, size_t sz, void* pUserData) +{ + (void)pUserData; + return DRFLAC_REALLOC(p, sz); +} + +static void drflac__free_default(void* p, void* pUserData) +{ + (void)pUserData; + DRFLAC_FREE(p); +} + + +static void* drflac__malloc_from_callbacks(size_t sz, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks == NULL) { + return NULL; + } + + if (pAllocationCallbacks->onMalloc != NULL) { + return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData); + } + + /* Try using realloc(). */ + if (pAllocationCallbacks->onRealloc != NULL) { + return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData); + } + + return NULL; +} + +static void* drflac__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks == NULL) { + return NULL; + } + + if (pAllocationCallbacks->onRealloc != NULL) { + return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData); + } + + /* Try emulating realloc() in terms of malloc()/free(). */ + if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) { + void* p2; + + p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData); + if (p2 == NULL) { + return NULL; + } + + DRFLAC_COPY_MEMORY(p2, p, szOld); + pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData); + + return p2; + } + + return NULL; +} + +static void drflac__free_from_callbacks(void* p, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + if (p == NULL || pAllocationCallbacks == NULL) { + return; + } + + if (pAllocationCallbacks->onFree != NULL) { + pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData); + } +} + + +drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeektableSize, drflac_allocation_callbacks* pAllocationCallbacks) +{ + /* + We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that + we'll be sitting on byte 42. + */ + drflac_uint64 runningFilePos = 42; + drflac_uint64 seektablePos = 0; + drflac_uint32 seektableSize = 0; + + for (;;) { + drflac_metadata metadata; + drflac_uint8 isLastBlock = 0; + drflac_uint8 blockType; + drflac_uint32 blockSize; + if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) { + return DRFLAC_FALSE; + } + runningFilePos += 4; + + metadata.type = blockType; + metadata.pRawData = NULL; + metadata.rawDataSize = 0; + + switch (blockType) + { + case DRFLAC_METADATA_BLOCK_TYPE_APPLICATION: + { + if (blockSize < 4) { + return DRFLAC_FALSE; + } + + if (onMeta) { + void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + metadata.data.application.id = drflac__be2host_32(*(drflac_uint32*)pRawData); + metadata.data.application.pData = (const void*)((drflac_uint8*)pRawData + sizeof(drflac_uint32)); + metadata.data.application.dataSize = blockSize - sizeof(drflac_uint32); + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE: + { + seektablePos = runningFilePos; + seektableSize = blockSize; + + if (onMeta) { + drflac_uint32 iSeekpoint; + void* pRawData; + + pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + metadata.data.seektable.seekpointCount = blockSize/sizeof(drflac_seekpoint); + metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData; + + /* Endian swap. */ + for (iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) { + drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint; + pSeekpoint->firstPCMFrame = drflac__be2host_64(pSeekpoint->firstPCMFrame); + pSeekpoint->flacFrameOffset = drflac__be2host_64(pSeekpoint->flacFrameOffset); + pSeekpoint->pcmFrameCount = drflac__be2host_16(pSeekpoint->pcmFrameCount); + } + + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT: + { + if (blockSize < 8) { + return DRFLAC_FALSE; + } + + if (onMeta) { + void* pRawData; + const char* pRunningData; + const char* pRunningDataEnd; + drflac_uint32 i; + + pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + + pRunningData = (const char*)pRawData; + pRunningDataEnd = (const char*)pRawData + blockSize; + + metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + + /* Need space for the rest of the block */ + if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + metadata.data.vorbis_comment.vendor = pRunningData; pRunningData += metadata.data.vorbis_comment.vendorLength; + metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + + /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */ + if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + metadata.data.vorbis_comment.pComments = pRunningData; + + /* Check that the comments section is valid before passing it to the callback */ + for (i = 0; i < metadata.data.vorbis_comment.commentCount; ++i) { + drflac_uint32 commentLength; + + if (pRunningDataEnd - pRunningData < 4) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + pRunningData += commentLength; + } + + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_CUESHEET: + { + if (blockSize < 396) { + return DRFLAC_FALSE; + } + + if (onMeta) { + void* pRawData; + const char* pRunningData; + const char* pRunningDataEnd; + drflac_uint8 iTrack; + drflac_uint8 iIndex; + + pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + + pRunningData = (const char*)pRawData; + pRunningDataEnd = (const char*)pRawData + blockSize; + + DRFLAC_COPY_MEMORY(metadata.data.cuesheet.catalog, pRunningData, 128); pRunningData += 128; + metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8; + metadata.data.cuesheet.isCD = (pRunningData[0] & 0x80) != 0; pRunningData += 259; + metadata.data.cuesheet.trackCount = pRunningData[0]; pRunningData += 1; + metadata.data.cuesheet.pTrackData = pRunningData; + + /* Check that the cuesheet tracks are valid before passing it to the callback */ + for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) { + drflac_uint8 indexCount; + drflac_uint32 indexPointSize; + + if (pRunningDataEnd - pRunningData < 36) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + /* Skip to the index point count */ + pRunningData += 35; + indexCount = pRunningData[0]; pRunningData += 1; + indexPointSize = indexCount * sizeof(drflac_cuesheet_track_index); + if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + /* Endian swap. */ + for (iIndex = 0; iIndex < indexCount; ++iIndex) { + drflac_cuesheet_track_index* pTrack = (drflac_cuesheet_track_index*)pRunningData; + pRunningData += sizeof(drflac_cuesheet_track_index); + pTrack->offset = drflac__be2host_64(pTrack->offset); + } + } + + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_PICTURE: + { + if (blockSize < 32) { + return DRFLAC_FALSE; + } + + if (onMeta) { + void* pRawData; + const char* pRunningData; + const char* pRunningDataEnd; + + pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + + pRunningData = (const char*)pRawData; + pRunningDataEnd = (const char*)pRawData + blockSize; + + metadata.data.picture.type = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + + /* Need space for the rest of the block */ + if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + metadata.data.picture.mime = pRunningData; pRunningData += metadata.data.picture.mimeLength; + metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + + /* Need space for the rest of the block */ + if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + metadata.data.picture.description = pRunningData; pRunningData += metadata.data.picture.descriptionLength; + metadata.data.picture.width = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.height = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.colorDepth = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.pPictureData = (const drflac_uint8*)pRunningData; + + /* Need space for the picture after the block */ + if (pRunningDataEnd - pRunningData < (drflac_int64)metadata.data.picture.pictureDataSize) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_PADDING: + { + if (onMeta) { + metadata.data.padding.unused = 0; + + /* Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback. */ + if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { + isLastBlock = DRFLAC_TRUE; /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */ + } else { + onMeta(pUserDataMD, &metadata); + } + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_INVALID: + { + /* Invalid chunk. Just skip over this one. */ + if (onMeta) { + if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { + isLastBlock = DRFLAC_TRUE; /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */ + } + } + } break; + + default: + { + /* + It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we + can at the very least report the chunk to the application and let it look at the raw data. + */ + if (onMeta) { + void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + } + + /* If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above. */ + if (onMeta == NULL && blockSize > 0) { + if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { + isLastBlock = DRFLAC_TRUE; + } + } + + runningFilePos += blockSize; + if (isLastBlock) { + break; + } + } + + *pSeektablePos = seektablePos; + *pSeektableSize = seektableSize; + *pFirstFramePos = runningFilePos; + + return DRFLAC_TRUE; +} + +drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed) +{ + /* Pre Condition: The bit stream should be sitting just past the 4-byte id header. */ + + drflac_uint8 isLastBlock; + drflac_uint8 blockType; + drflac_uint32 blockSize; + + (void)onSeek; + + pInit->container = drflac_container_native; + + /* The first metadata block should be the STREAMINFO block. */ + if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) { + return DRFLAC_FALSE; + } + + if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) { + if (!relaxed) { + /* We're opening in strict mode and the first block is not the STREAMINFO block. Error. */ + return DRFLAC_FALSE; + } else { + /* + Relaxed mode. To open from here we need to just find the first frame and set the sample rate, etc. to whatever is defined + for that frame. + */ + pInit->hasStreamInfoBlock = DRFLAC_FALSE; + pInit->hasMetadataBlocks = DRFLAC_FALSE; + + if (!drflac__read_next_flac_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) { + return DRFLAC_FALSE; /* Couldn't find a frame. */ + } + + if (pInit->firstFrameHeader.bitsPerSample == 0) { + return DRFLAC_FALSE; /* Failed to initialize because the first frame depends on the STREAMINFO block, which does not exist. */ + } + + pInit->sampleRate = pInit->firstFrameHeader.sampleRate; + pInit->channels = drflac__get_channel_count_from_channel_assignment(pInit->firstFrameHeader.channelAssignment); + pInit->bitsPerSample = pInit->firstFrameHeader.bitsPerSample; + pInit->maxBlockSizeInPCMFrames = 65535; /* <-- See notes here: https://xiph.org/flac/format.html#metadata_block_streaminfo */ + return DRFLAC_TRUE; + } + } else { + drflac_streaminfo streaminfo; + if (!drflac__read_streaminfo(onRead, pUserData, &streaminfo)) { + return DRFLAC_FALSE; + } + + pInit->hasStreamInfoBlock = DRFLAC_TRUE; + pInit->sampleRate = streaminfo.sampleRate; + pInit->channels = streaminfo.channels; + pInit->bitsPerSample = streaminfo.bitsPerSample; + pInit->totalPCMFrameCount = streaminfo.totalPCMFrameCount; + pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames; /* Don't care about the min block size - only the max (used for determining the size of the memory allocation). */ + pInit->hasMetadataBlocks = !isLastBlock; + + if (onMeta) { + drflac_metadata metadata; + metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO; + metadata.pRawData = NULL; + metadata.rawDataSize = 0; + metadata.data.streaminfo = streaminfo; + onMeta(pUserDataMD, &metadata); + } + + return DRFLAC_TRUE; + } +} + +#ifndef DR_FLAC_NO_OGG +#define DRFLAC_OGG_MAX_PAGE_SIZE 65307 +#define DRFLAC_OGG_CAPTURE_PATTERN_CRC32 1605413199 /* CRC-32 of "OggS". */ + +typedef enum +{ + drflac_ogg_recover_on_crc_mismatch, + drflac_ogg_fail_on_crc_mismatch +} drflac_ogg_crc_mismatch_recovery; + +#ifndef DR_FLAC_NO_CRC +static drflac_uint32 drflac__crc32_table[] = { + 0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L, + 0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L, + 0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L, + 0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL, + 0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L, + 0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L, + 0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L, + 0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL, + 0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L, + 0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L, + 0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L, + 0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL, + 0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L, + 0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L, + 0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L, + 0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL, + 0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL, + 0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L, + 0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L, + 0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL, + 0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL, + 0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L, + 0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L, + 0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL, + 0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL, + 0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L, + 0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L, + 0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL, + 0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL, + 0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L, + 0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L, + 0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL, + 0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L, + 0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL, + 0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL, + 0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L, + 0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L, + 0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL, + 0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL, + 0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L, + 0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L, + 0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL, + 0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL, + 0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L, + 0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L, + 0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL, + 0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL, + 0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L, + 0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L, + 0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL, + 0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L, + 0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L, + 0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L, + 0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL, + 0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L, + 0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L, + 0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L, + 0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL, + 0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L, + 0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L, + 0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L, + 0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL, + 0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L, + 0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L +}; +#endif + +static DRFLAC_INLINE drflac_uint32 drflac_crc32_byte(drflac_uint32 crc32, drflac_uint8 data) +{ +#ifndef DR_FLAC_NO_CRC + return (crc32 << 8) ^ drflac__crc32_table[(drflac_uint8)((crc32 >> 24) & 0xFF) ^ data]; +#else + (void)data; + return crc32; +#endif +} + +#if 0 +static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint32(drflac_uint32 crc32, drflac_uint32 data) +{ + crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 24) & 0xFF)); + crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 16) & 0xFF)); + crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 8) & 0xFF)); + crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 0) & 0xFF)); + return crc32; +} + +static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint64(drflac_uint32 crc32, drflac_uint64 data) +{ + crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 32) & 0xFFFFFFFF)); + crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 0) & 0xFFFFFFFF)); + return crc32; +} +#endif + +static DRFLAC_INLINE drflac_uint32 drflac_crc32_buffer(drflac_uint32 crc32, drflac_uint8* pData, drflac_uint32 dataSize) +{ + /* This can be optimized. */ + drflac_uint32 i; + for (i = 0; i < dataSize; ++i) { + crc32 = drflac_crc32_byte(crc32, pData[i]); + } + return crc32; +} + + +static DRFLAC_INLINE drflac_bool32 drflac_ogg__is_capture_pattern(drflac_uint8 pattern[4]) +{ + return pattern[0] == 'O' && pattern[1] == 'g' && pattern[2] == 'g' && pattern[3] == 'S'; +} + +static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_header_size(drflac_ogg_page_header* pHeader) +{ + return 27 + pHeader->segmentCount; +} + +static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_body_size(drflac_ogg_page_header* pHeader) +{ + drflac_uint32 pageBodySize = 0; + int i; + + for (i = 0; i < pHeader->segmentCount; ++i) { + pageBodySize += pHeader->segmentTable[i]; + } + + return pageBodySize; +} + +drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32) +{ + drflac_uint8 data[23]; + drflac_uint32 i; + + DRFLAC_ASSERT(*pCRC32 == DRFLAC_OGG_CAPTURE_PATTERN_CRC32); + + if (onRead(pUserData, data, 23) != 23) { + return DRFLAC_END_OF_STREAM; + } + *pBytesRead += 23; + + pHeader->structureVersion = data[0]; + pHeader->headerType = data[1]; + DRFLAC_COPY_MEMORY(&pHeader->granulePosition, &data[ 2], 8); + DRFLAC_COPY_MEMORY(&pHeader->serialNumber, &data[10], 4); + DRFLAC_COPY_MEMORY(&pHeader->sequenceNumber, &data[14], 4); + DRFLAC_COPY_MEMORY(&pHeader->checksum, &data[18], 4); + pHeader->segmentCount = data[22]; + + /* Calculate the CRC. Note that for the calculation the checksum part of the page needs to be set to 0. */ + data[18] = 0; + data[19] = 0; + data[20] = 0; + data[21] = 0; + + for (i = 0; i < 23; ++i) { + *pCRC32 = drflac_crc32_byte(*pCRC32, data[i]); + } + + + if (onRead(pUserData, pHeader->segmentTable, pHeader->segmentCount) != pHeader->segmentCount) { + return DRFLAC_END_OF_STREAM; + } + *pBytesRead += pHeader->segmentCount; + + for (i = 0; i < pHeader->segmentCount; ++i) { + *pCRC32 = drflac_crc32_byte(*pCRC32, pHeader->segmentTable[i]); + } + + return DRFLAC_SUCCESS; +} + +drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32) +{ + drflac_uint8 id[4]; + + *pBytesRead = 0; + + if (onRead(pUserData, id, 4) != 4) { + return DRFLAC_END_OF_STREAM; + } + *pBytesRead += 4; + + /* We need to read byte-by-byte until we find the OggS capture pattern. */ + for (;;) { + if (drflac_ogg__is_capture_pattern(id)) { + drflac_result result; + + *pCRC32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32; + + result = drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pBytesRead, pCRC32); + if (result == DRFLAC_SUCCESS) { + return DRFLAC_SUCCESS; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + continue; + } else { + return result; + } + } + } else { + /* The first 4 bytes did not equal the capture pattern. Read the next byte and try again. */ + id[0] = id[1]; + id[1] = id[2]; + id[2] = id[3]; + if (onRead(pUserData, &id[3], 1) != 1) { + return DRFLAC_END_OF_STREAM; + } + *pBytesRead += 1; + } + } +} + + +/* +The main part of the Ogg encapsulation is the conversion from the physical Ogg bitstream to the native FLAC bitstream. It works +in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is designed +in such a way that the core sections assume everything is delivered in native format. Therefore, for each encapsulation type +dr_flac is supporting there needs to be a layer sitting on top of the onRead and onSeek callbacks that ensures the bits read from +the physical Ogg bitstream are converted and delivered in native FLAC format. +*/ +typedef struct +{ + drflac_read_proc onRead; /* The original onRead callback from drflac_open() and family. */ + drflac_seek_proc onSeek; /* The original onSeek callback from drflac_open() and family. */ + void* pUserData; /* The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family. */ + drflac_uint64 currentBytePos; /* The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking. */ + drflac_uint64 firstBytePos; /* The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page. */ + drflac_uint32 serialNumber; /* The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization. */ + drflac_ogg_page_header bosPageHeader; /* Used for seeking. */ + drflac_ogg_page_header currentPageHeader; + drflac_uint32 bytesRemainingInPage; + drflac_uint32 pageDataSize; + drflac_uint8 pageData[DRFLAC_OGG_MAX_PAGE_SIZE]; +} drflac_oggbs; /* oggbs = Ogg Bitstream */ + +static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut, size_t bytesToRead) +{ + size_t bytesActuallyRead = oggbs->onRead(oggbs->pUserData, bufferOut, bytesToRead); + oggbs->currentBytePos += bytesActuallyRead; + + return bytesActuallyRead; +} + +static drflac_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, drflac_uint64 offset, drflac_seek_origin origin) +{ + if (origin == drflac_seek_origin_start) { + if (offset <= 0x7FFFFFFF) { + if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_start)) { + return DRFLAC_FALSE; + } + oggbs->currentBytePos = offset; + + return DRFLAC_TRUE; + } else { + if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) { + return DRFLAC_FALSE; + } + oggbs->currentBytePos = offset; + + return drflac_oggbs__seek_physical(oggbs, offset - 0x7FFFFFFF, drflac_seek_origin_current); + } + } else { + while (offset > 0x7FFFFFFF) { + if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) { + return DRFLAC_FALSE; + } + oggbs->currentBytePos += 0x7FFFFFFF; + offset -= 0x7FFFFFFF; + } + + if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_current)) { /* <-- Safe cast thanks to the loop above. */ + return DRFLAC_FALSE; + } + oggbs->currentBytePos += offset; + + return DRFLAC_TRUE; + } +} + +static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_ogg_crc_mismatch_recovery recoveryMethod) +{ + drflac_ogg_page_header header; + for (;;) { + drflac_uint32 crc32 = 0; + drflac_uint32 bytesRead; + drflac_uint32 pageBodySize; +#ifndef DR_FLAC_NO_CRC + drflac_uint32 actualCRC32; +#endif + + if (drflac_ogg__read_page_header(oggbs->onRead, oggbs->pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { + return DRFLAC_FALSE; + } + oggbs->currentBytePos += bytesRead; + + pageBodySize = drflac_ogg__get_page_body_size(&header); + if (pageBodySize > DRFLAC_OGG_MAX_PAGE_SIZE) { + continue; /* Invalid page size. Assume it's corrupted and just move to the next page. */ + } + + if (header.serialNumber != oggbs->serialNumber) { + /* It's not a FLAC page. Skip it. */ + if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, drflac_seek_origin_current)) { + return DRFLAC_FALSE; + } + continue; + } + + + /* We need to read the entire page and then do a CRC check on it. If there's a CRC mismatch we need to skip this page. */ + if (drflac_oggbs__read_physical(oggbs, oggbs->pageData, pageBodySize) != pageBodySize) { + return DRFLAC_FALSE; + } + oggbs->pageDataSize = pageBodySize; + +#ifndef DR_FLAC_NO_CRC + actualCRC32 = drflac_crc32_buffer(crc32, oggbs->pageData, oggbs->pageDataSize); + if (actualCRC32 != header.checksum) { + if (recoveryMethod == drflac_ogg_recover_on_crc_mismatch) { + continue; /* CRC mismatch. Skip this page. */ + } else { + /* + Even though we are failing on a CRC mismatch, we still want our stream to be in a good state. Therefore we + go to the next valid page to ensure we're in a good state, but return false to let the caller know that the + seek did not fully complete. + */ + drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch); + return DRFLAC_FALSE; + } + } +#else + (void)recoveryMethod; /* <-- Silence a warning. */ +#endif + + oggbs->currentPageHeader = header; + oggbs->bytesRemainingInPage = pageBodySize; + return DRFLAC_TRUE; + } +} + +/* Function below is unused at the moment, but I might be re-adding it later. */ +#if 0 +static drflac_uint8 drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs, drflac_uint8* pBytesRemainingInSeg) +{ + drflac_uint32 bytesConsumedInPage = drflac_ogg__get_page_body_size(&oggbs->currentPageHeader) - oggbs->bytesRemainingInPage; + drflac_uint8 iSeg = 0; + drflac_uint32 iByte = 0; + while (iByte < bytesConsumedInPage) { + drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg]; + if (iByte + segmentSize > bytesConsumedInPage) { + break; + } else { + iSeg += 1; + iByte += segmentSize; + } + } + + *pBytesRemainingInSeg = oggbs->currentPageHeader.segmentTable[iSeg] - (drflac_uint8)(bytesConsumedInPage - iByte); + return iSeg; +} + +static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs) +{ + /* The current packet ends when we get to the segment with a lacing value of < 255 which is not at the end of a page. */ + for (;;) { + drflac_bool32 atEndOfPage = DRFLAC_FALSE; + + drflac_uint8 bytesRemainingInSeg; + drflac_uint8 iFirstSeg = drflac_oggbs__get_current_segment_index(oggbs, &bytesRemainingInSeg); + + drflac_uint32 bytesToEndOfPacketOrPage = bytesRemainingInSeg; + for (drflac_uint8 iSeg = iFirstSeg; iSeg < oggbs->currentPageHeader.segmentCount; ++iSeg) { + drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg]; + if (segmentSize < 255) { + if (iSeg == oggbs->currentPageHeader.segmentCount-1) { + atEndOfPage = DRFLAC_TRUE; + } + + break; + } + + bytesToEndOfPacketOrPage += segmentSize; + } + + /* + At this point we will have found either the packet or the end of the page. If were at the end of the page we'll + want to load the next page and keep searching for the end of the packet. + */ + drflac_oggbs__seek_physical(oggbs, bytesToEndOfPacketOrPage, drflac_seek_origin_current); + oggbs->bytesRemainingInPage -= bytesToEndOfPacketOrPage; + + if (atEndOfPage) { + /* + We're potentially at the next packet, but we need to check the next page first to be sure because the packet may + straddle pages. + */ + if (!drflac_oggbs__goto_next_page(oggbs)) { + return DRFLAC_FALSE; + } + + /* If it's a fresh packet it most likely means we're at the next packet. */ + if ((oggbs->currentPageHeader.headerType & 0x01) == 0) { + return DRFLAC_TRUE; + } + } else { + /* We're at the next packet. */ + return DRFLAC_TRUE; + } + } +} + +static drflac_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs) +{ + /* The bitstream should be sitting on the first byte just after the header of the frame. */ + + /* What we're actually doing here is seeking to the start of the next packet. */ + return drflac_oggbs__seek_to_next_packet(oggbs); +} +#endif + +static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytesToRead) +{ + drflac_oggbs* oggbs = (drflac_oggbs*)pUserData; + drflac_uint8* pRunningBufferOut = (drflac_uint8*)bufferOut; + size_t bytesRead = 0; + + DRFLAC_ASSERT(oggbs != NULL); + DRFLAC_ASSERT(pRunningBufferOut != NULL); + + /* Reading is done page-by-page. If we've run out of bytes in the page we need to move to the next one. */ + while (bytesRead < bytesToRead) { + size_t bytesRemainingToRead = bytesToRead - bytesRead; + + if (oggbs->bytesRemainingInPage >= bytesRemainingToRead) { + DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), bytesRemainingToRead); + bytesRead += bytesRemainingToRead; + oggbs->bytesRemainingInPage -= (drflac_uint32)bytesRemainingToRead; + break; + } + + /* If we get here it means some of the requested data is contained in the next pages. */ + if (oggbs->bytesRemainingInPage > 0) { + DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), oggbs->bytesRemainingInPage); + bytesRead += oggbs->bytesRemainingInPage; + pRunningBufferOut += oggbs->bytesRemainingInPage; + oggbs->bytesRemainingInPage = 0; + } + + DRFLAC_ASSERT(bytesRemainingToRead > 0); + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { + break; /* Failed to go to the next page. Might have simply hit the end of the stream. */ + } + } + + return bytesRead; +} + +static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_origin origin) +{ + drflac_oggbs* oggbs = (drflac_oggbs*)pUserData; + int bytesSeeked = 0; + + DRFLAC_ASSERT(oggbs != NULL); + DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */ + + /* Seeking is always forward which makes things a lot simpler. */ + if (origin == drflac_seek_origin_start) { + if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, drflac_seek_origin_start)) { + return DRFLAC_FALSE; + } + + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) { + return DRFLAC_FALSE; + } + + return drflac__on_seek_ogg(pUserData, offset, drflac_seek_origin_current); + } + + DRFLAC_ASSERT(origin == drflac_seek_origin_current); + + while (bytesSeeked < offset) { + int bytesRemainingToSeek = offset - bytesSeeked; + DRFLAC_ASSERT(bytesRemainingToSeek >= 0); + + if (oggbs->bytesRemainingInPage >= (size_t)bytesRemainingToSeek) { + bytesSeeked += bytesRemainingToSeek; + (void)bytesSeeked; /* <-- Silence a dead store warning emitted by Clang Static Analyzer. */ + oggbs->bytesRemainingInPage -= bytesRemainingToSeek; + break; + } + + /* If we get here it means some of the requested data is contained in the next pages. */ + if (oggbs->bytesRemainingInPage > 0) { + bytesSeeked += (int)oggbs->bytesRemainingInPage; + oggbs->bytesRemainingInPage = 0; + } + + DRFLAC_ASSERT(bytesRemainingToSeek > 0); + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) { + /* Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch. */ + return DRFLAC_FALSE; + } + } + + return DRFLAC_TRUE; +} + + +drflac_bool32 drflac_ogg__seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; + drflac_uint64 originalBytePos; + drflac_uint64 runningGranulePosition; + drflac_uint64 runningFrameBytePos; + drflac_uint64 runningPCMFrameCount; + + DRFLAC_ASSERT(oggbs != NULL); + + originalBytePos = oggbs->currentBytePos; /* For recovery. Points to the OggS identifier. */ + + /* First seek to the first frame. */ + if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes)) { + return DRFLAC_FALSE; + } + oggbs->bytesRemainingInPage = 0; + + runningGranulePosition = 0; + for (;;) { + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { + drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start); + return DRFLAC_FALSE; /* Never did find that sample... */ + } + + runningFrameBytePos = oggbs->currentBytePos - drflac_ogg__get_page_header_size(&oggbs->currentPageHeader) - oggbs->pageDataSize; + if (oggbs->currentPageHeader.granulePosition >= pcmFrameIndex) { + break; /* The sample is somewhere in the previous page. */ + } + + /* + At this point we know the sample is not in the previous page. It could possibly be in this page. For simplicity we + disregard any pages that do not begin a fresh packet. + */ + if ((oggbs->currentPageHeader.headerType & 0x01) == 0) { /* <-- Is it a fresh page? */ + if (oggbs->currentPageHeader.segmentTable[0] >= 2) { + drflac_uint8 firstBytesInPage[2]; + firstBytesInPage[0] = oggbs->pageData[0]; + firstBytesInPage[1] = oggbs->pageData[1]; + + if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) { /* <-- Does the page begin with a frame's sync code? */ + runningGranulePosition = oggbs->currentPageHeader.granulePosition; + } + + continue; + } + } + } + + /* + We found the page that that is closest to the sample, so now we need to find it. The first thing to do is seek to the + start of that page. In the loop above we checked that it was a fresh page which means this page is also the start of + a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until + we find the one containing the target sample. + */ + if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, drflac_seek_origin_start)) { + return DRFLAC_FALSE; + } + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { + return DRFLAC_FALSE; + } + + /* + At this point we'll be sitting on the first byte of the frame header of the first frame in the page. We just keep + looping over these frames until we find the one containing the sample we're after. + */ + runningPCMFrameCount = runningGranulePosition; + for (;;) { + /* + There are two ways to find the sample and seek past irrelevant frames: + 1) Use the native FLAC decoder. + 2) Use Ogg's framing system. + + Both of these options have their own pros and cons. Using the native FLAC decoder is slower because it needs to + do a full decode of the frame. Using Ogg's framing system is faster, but more complicated and involves some code + duplication for the decoding of frame headers. + + Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg + bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the + standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks + the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read + using the native FLAC decoding APIs, such as drflac__read_next_flac_frame_header(), need to be re-implemented so as to + avoid the use of the drflac_bs object. + + Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons: + 1) Seeking is already partially accelerated using Ogg's paging system in the code block above. + 2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon. + 3) Simplicity. + */ + drflac_uint64 firstPCMFrameInFLACFrame = 0; + drflac_uint64 lastPCMFrameInFLACFrame = 0; + drflac_uint64 pcmFrameCountInThisFrame; + + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame); + + pcmFrameCountInThisFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1; + + /* If we are seeking to the end of the file and we've just hit it, we're done. */ + if (pcmFrameIndex == pFlac->totalPCMFrameCount && (runningPCMFrameCount + pcmFrameCountInThisFrame) == pFlac->totalPCMFrameCount) { + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + pFlac->currentPCMFrame = pcmFrameIndex; + pFlac->currentFLACFrame.pcmFramesRemaining = 0; + return DRFLAC_TRUE; + } else { + return DRFLAC_FALSE; + } + } + + if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFrame)) { + /* + The sample should be in this FLAC frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend + it never existed and keep iterating. + */ + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */ + drflac_uint64 pcmFramesToDecode = (size_t)(pcmFrameIndex - runningPCMFrameCount); /* <-- Safe cast because the maximum number of samples in a frame is 65535. */ + if (pcmFramesToDecode == 0) { + return DRFLAC_TRUE; + } + + pFlac->currentPCMFrame = runningPCMFrameCount; + + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; /* <-- If this fails, something bad has happened (it should never fail). */ + } else { + if (result == DRFLAC_CRC_MISMATCH) { + continue; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* + It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this + frame never existed and leave the running sample count untouched. + */ + drflac_result result = drflac__seek_to_next_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + runningPCMFrameCount += pcmFrameCountInThisFrame; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + continue; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } + } +} + + + +drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed) +{ + drflac_ogg_page_header header; + drflac_uint32 crc32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32; + drflac_uint32 bytesRead = 0; + + /* Pre Condition: The bit stream should be sitting just past the 4-byte OggS capture pattern. */ + (void)relaxed; + + pInit->container = drflac_container_ogg; + pInit->oggFirstBytePos = 0; + + /* + We'll get here if the first 4 bytes of the stream were the OggS capture pattern, however it doesn't necessarily mean the + stream includes FLAC encoded audio. To check for this we need to scan the beginning-of-stream page markers and check if + any match the FLAC specification. Important to keep in mind that the stream may be multiplexed. + */ + if (drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { + return DRFLAC_FALSE; + } + pInit->runningFilePos += bytesRead; + + for (;;) { + int pageBodySize; + + /* Break if we're past the beginning of stream page. */ + if ((header.headerType & 0x02) == 0) { + return DRFLAC_FALSE; + } + + /* Check if it's a FLAC header. */ + pageBodySize = drflac_ogg__get_page_body_size(&header); + if (pageBodySize == 51) { /* 51 = the lacing value of the FLAC header packet. */ + /* It could be a FLAC page... */ + drflac_uint32 bytesRemainingInPage = pageBodySize; + drflac_uint8 packetType; + + if (onRead(pUserData, &packetType, 1) != 1) { + return DRFLAC_FALSE; + } + + bytesRemainingInPage -= 1; + if (packetType == 0x7F) { + /* Increasingly more likely to be a FLAC page... */ + drflac_uint8 sig[4]; + if (onRead(pUserData, sig, 4) != 4) { + return DRFLAC_FALSE; + } + + bytesRemainingInPage -= 4; + if (sig[0] == 'F' && sig[1] == 'L' && sig[2] == 'A' && sig[3] == 'C') { + /* Almost certainly a FLAC page... */ + drflac_uint8 mappingVersion[2]; + if (onRead(pUserData, mappingVersion, 2) != 2) { + return DRFLAC_FALSE; + } + + if (mappingVersion[0] != 1) { + return DRFLAC_FALSE; /* Only supporting version 1.x of the Ogg mapping. */ + } + + /* + The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to + be handling it in a generic way based on the serial number and packet types. + */ + if (!onSeek(pUserData, 2, drflac_seek_origin_current)) { + return DRFLAC_FALSE; + } + + /* Expecting the native FLAC signature "fLaC". */ + if (onRead(pUserData, sig, 4) != 4) { + return DRFLAC_FALSE; + } + + if (sig[0] == 'f' && sig[1] == 'L' && sig[2] == 'a' && sig[3] == 'C') { + /* The remaining data in the page should be the STREAMINFO block. */ + drflac_streaminfo streaminfo; + drflac_uint8 isLastBlock; + drflac_uint8 blockType; + drflac_uint32 blockSize; + if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) { + return DRFLAC_FALSE; + } + + if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) { + return DRFLAC_FALSE; /* Invalid block type. First block must be the STREAMINFO block. */ + } + + if (drflac__read_streaminfo(onRead, pUserData, &streaminfo)) { + /* Success! */ + pInit->hasStreamInfoBlock = DRFLAC_TRUE; + pInit->sampleRate = streaminfo.sampleRate; + pInit->channels = streaminfo.channels; + pInit->bitsPerSample = streaminfo.bitsPerSample; + pInit->totalPCMFrameCount = streaminfo.totalPCMFrameCount; + pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames; + pInit->hasMetadataBlocks = !isLastBlock; + + if (onMeta) { + drflac_metadata metadata; + metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO; + metadata.pRawData = NULL; + metadata.rawDataSize = 0; + metadata.data.streaminfo = streaminfo; + onMeta(pUserDataMD, &metadata); + } + + pInit->runningFilePos += pageBodySize; + pInit->oggFirstBytePos = pInit->runningFilePos - 79; /* Subtracting 79 will place us right on top of the "OggS" identifier of the FLAC bos page. */ + pInit->oggSerial = header.serialNumber; + pInit->oggBosHeader = header; + break; + } else { + /* Failed to read STREAMINFO block. Aww, so close... */ + return DRFLAC_FALSE; + } + } else { + /* Invalid file. */ + return DRFLAC_FALSE; + } + } else { + /* Not a FLAC header. Skip it. */ + if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) { + return DRFLAC_FALSE; + } + } + } else { + /* Not a FLAC header. Seek past the entire page and move on to the next. */ + if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) { + return DRFLAC_FALSE; + } + } + } else { + if (!onSeek(pUserData, pageBodySize, drflac_seek_origin_current)) { + return DRFLAC_FALSE; + } + } + + pInit->runningFilePos += pageBodySize; + + + /* Read the header of the next page. */ + if (drflac_ogg__read_page_header(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { + return DRFLAC_FALSE; + } + pInit->runningFilePos += bytesRead; + } + + /* + If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next + packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialization phase for Ogg is to create the + Ogg bistream object. + */ + pInit->hasMetadataBlocks = DRFLAC_TRUE; /* <-- Always have at least VORBIS_COMMENT metadata block. */ + return DRFLAC_TRUE; +} +#endif + +drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD) +{ + drflac_bool32 relaxed; + drflac_uint8 id[4]; + + if (pInit == NULL || onRead == NULL || onSeek == NULL) { + return DRFLAC_FALSE; + } + + DRFLAC_ZERO_MEMORY(pInit, sizeof(*pInit)); + pInit->onRead = onRead; + pInit->onSeek = onSeek; + pInit->onMeta = onMeta; + pInit->container = container; + pInit->pUserData = pUserData; + pInit->pUserDataMD = pUserDataMD; + + pInit->bs.onRead = onRead; + pInit->bs.onSeek = onSeek; + pInit->bs.pUserData = pUserData; + drflac__reset_cache(&pInit->bs); + + + /* If the container is explicitly defined then we can try opening in relaxed mode. */ + relaxed = container != drflac_container_unknown; + + /* Skip over any ID3 tags. */ + for (;;) { + if (onRead(pUserData, id, 4) != 4) { + return DRFLAC_FALSE; /* Ran out of data. */ + } + pInit->runningFilePos += 4; + + if (id[0] == 'I' && id[1] == 'D' && id[2] == '3') { + drflac_uint8 header[6]; + drflac_uint8 flags; + drflac_uint32 headerSize; + + if (onRead(pUserData, header, 6) != 6) { + return DRFLAC_FALSE; /* Ran out of data. */ + } + pInit->runningFilePos += 6; + + flags = header[1]; + + DRFLAC_COPY_MEMORY(&headerSize, header+2, 4); + headerSize = drflac__unsynchsafe_32(drflac__be2host_32(headerSize)); + if (flags & 0x10) { + headerSize += 10; + } + + if (!onSeek(pUserData, headerSize, drflac_seek_origin_current)) { + return DRFLAC_FALSE; /* Failed to seek past the tag. */ + } + pInit->runningFilePos += headerSize; + } else { + break; + } + } + + if (id[0] == 'f' && id[1] == 'L' && id[2] == 'a' && id[3] == 'C') { + return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); + } +#ifndef DR_FLAC_NO_OGG + if (id[0] == 'O' && id[1] == 'g' && id[2] == 'g' && id[3] == 'S') { + return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); + } +#endif + + /* If we get here it means we likely don't have a header. Try opening in relaxed mode, if applicable. */ + if (relaxed) { + if (container == drflac_container_native) { + return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); + } +#ifndef DR_FLAC_NO_OGG + if (container == drflac_container_ogg) { + return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); + } +#endif + } + + /* Unsupported container. */ + return DRFLAC_FALSE; +} + +void drflac__init_from_info(drflac* pFlac, drflac_init_info* pInit) +{ + DRFLAC_ASSERT(pFlac != NULL); + DRFLAC_ASSERT(pInit != NULL); + + DRFLAC_ZERO_MEMORY(pFlac, sizeof(*pFlac)); + pFlac->bs = pInit->bs; + pFlac->onMeta = pInit->onMeta; + pFlac->pUserDataMD = pInit->pUserDataMD; + pFlac->maxBlockSizeInPCMFrames = pInit->maxBlockSizeInPCMFrames; + pFlac->sampleRate = pInit->sampleRate; + pFlac->channels = (drflac_uint8)pInit->channels; + pFlac->bitsPerSample = (drflac_uint8)pInit->bitsPerSample; + pFlac->totalPCMFrameCount = pInit->totalPCMFrameCount; + pFlac->container = pInit->container; +} + + +drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac_init_info init; + drflac_uint32 allocationSize; + drflac_uint32 wholeSIMDVectorCountPerChannel; + drflac_uint32 decodedSamplesAllocationSize; +#ifndef DR_FLAC_NO_OGG + drflac_oggbs oggbs; +#endif + drflac_uint64 firstFramePos; + drflac_uint64 seektablePos; + drflac_uint32 seektableSize; + drflac_allocation_callbacks allocationCallbacks; + drflac* pFlac; + + /* CPU support first. */ + drflac__init_cpu_caps(); + + if (!drflac__init_private(&init, onRead, onSeek, onMeta, container, pUserData, pUserDataMD)) { + return NULL; + } + + if (pAllocationCallbacks != NULL) { + allocationCallbacks = *pAllocationCallbacks; + if (allocationCallbacks.onFree == NULL || (allocationCallbacks.onMalloc == NULL && allocationCallbacks.onRealloc == NULL)) { + return NULL; /* Invalid allocation callbacks. */ + } + } else { + allocationCallbacks.pUserData = NULL; + allocationCallbacks.onMalloc = drflac__malloc_default; + allocationCallbacks.onRealloc = drflac__realloc_default; + allocationCallbacks.onFree = drflac__free_default; + } + + + /* + The size of the allocation for the drflac object needs to be large enough to fit the following: + 1) The main members of the drflac structure + 2) A block of memory large enough to store the decoded samples of the largest frame in the stream + 3) If the container is Ogg, a drflac_oggbs object + + The complicated part of the allocation is making sure there's enough room the decoded samples, taking into consideration + the different SIMD instruction sets. + */ + allocationSize = sizeof(drflac); + + /* + The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector + we are supporting. + */ + if ((init.maxBlockSizeInPCMFrames % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) { + wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))); + } else { + wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1; + } + + decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels; + + allocationSize += decodedSamplesAllocationSize; + allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE; /* Allocate extra bytes to ensure we have enough for alignment. */ + +#ifndef DR_FLAC_NO_OGG + /* There's additional data required for Ogg streams. */ + if (init.container == drflac_container_ogg) { + allocationSize += sizeof(drflac_oggbs); + } + + DRFLAC_ZERO_MEMORY(&oggbs, sizeof(oggbs)); + if (init.container == drflac_container_ogg) { + oggbs.onRead = onRead; + oggbs.onSeek = onSeek; + oggbs.pUserData = pUserData; + oggbs.currentBytePos = init.oggFirstBytePos; + oggbs.firstBytePos = init.oggFirstBytePos; + oggbs.serialNumber = init.oggSerial; + oggbs.bosPageHeader = init.oggBosHeader; + oggbs.bytesRemainingInPage = 0; + } +#endif + + /* + This part is a bit awkward. We need to load the seektable so that it can be referenced in-memory, but I want the drflac object to + consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading + and decoding the metadata. + */ + firstFramePos = 42; /* <-- We know we are at byte 42 at this point. */ + seektablePos = 0; + seektableSize = 0; + if (init.hasMetadataBlocks) { + drflac_read_proc onReadOverride = onRead; + drflac_seek_proc onSeekOverride = onSeek; + void* pUserDataOverride = pUserData; + +#ifndef DR_FLAC_NO_OGG + if (init.container == drflac_container_ogg) { + onReadOverride = drflac__on_read_ogg; + onSeekOverride = drflac__on_seek_ogg; + pUserDataOverride = (void*)&oggbs; + } +#endif + + if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize, &allocationCallbacks)) { + return NULL; + } + + allocationSize += seektableSize; + } + + + pFlac = (drflac*)drflac__malloc_from_callbacks(allocationSize, &allocationCallbacks); + drflac__init_from_info(pFlac, &init); + pFlac->allocationCallbacks = allocationCallbacks; + pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE); + +#ifndef DR_FLAC_NO_OGG + if (init.container == drflac_container_ogg) { + drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize); + *pInternalOggbs = oggbs; + + /* The Ogg bistream needs to be layered on top of the original bitstream. */ + pFlac->bs.onRead = drflac__on_read_ogg; + pFlac->bs.onSeek = drflac__on_seek_ogg; + pFlac->bs.pUserData = (void*)pInternalOggbs; + pFlac->_oggbs = (void*)pInternalOggbs; + } +#endif + + pFlac->firstFLACFramePosInBytes = firstFramePos; + + /* NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now. */ +#ifndef DR_FLAC_NO_OGG + if (init.container == drflac_container_ogg) + { + pFlac->pSeekpoints = NULL; + pFlac->seekpointCount = 0; + } + else +#endif + { + /* If we have a seektable we need to load it now, making sure we move back to where we were previously. */ + if (seektablePos != 0) { + pFlac->seekpointCount = seektableSize / sizeof(*pFlac->pSeekpoints); + pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize); + + /* Seek to the seektable, then just read directly into our seektable buffer. */ + if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) { + if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints, seektableSize) == seektableSize) { + /* Endian swap. */ + drflac_uint32 iSeekpoint; + for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) { + pFlac->pSeekpoints[iSeekpoint].firstPCMFrame = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstPCMFrame); + pFlac->pSeekpoints[iSeekpoint].flacFrameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].flacFrameOffset); + pFlac->pSeekpoints[iSeekpoint].pcmFrameCount = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].pcmFrameCount); + } + } else { + /* Failed to read the seektable. Pretend we don't have one. */ + pFlac->pSeekpoints = NULL; + pFlac->seekpointCount = 0; + } + + /* We need to seek back to where we were. If this fails it's a critical error. */ + if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFLACFramePosInBytes, drflac_seek_origin_start)) { + drflac__free_from_callbacks(pFlac, &allocationCallbacks); + return NULL; + } + } else { + /* Failed to seek to the seektable. Ominous sign, but for now we can just pretend we don't have one. */ + pFlac->pSeekpoints = NULL; + pFlac->seekpointCount = 0; + } + } + } + + + /* + If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode + the first frame. + */ + if (!init.hasStreamInfoBlock) { + pFlac->currentFLACFrame.header = init.firstFrameHeader; + do + { + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + break; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + drflac__free_from_callbacks(pFlac, &allocationCallbacks); + return NULL; + } + continue; + } else { + drflac__free_from_callbacks(pFlac, &allocationCallbacks); + return NULL; + } + } + } while (1); + } + + return pFlac; +} + + + +#ifndef DR_FLAC_NO_STDIO +#include <stdio.h> + +static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead) +{ + return fread(bufferOut, 1, bytesToRead, (FILE*)pUserData); +} + +static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin) +{ + DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */ + + return fseek((FILE*)pUserData, offset, (origin == drflac_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0; +} + +static FILE* drflac__fopen(const char* filename) +{ + FILE* pFile; +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (fopen_s(&pFile, filename, "rb") != 0) { + return NULL; + } +#else + pFile = fopen(filename, "rb"); + if (pFile == NULL) { + return NULL; + } +#endif + + return pFile; +} + + +drflac* drflac_open_file(const char* filename, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + FILE* pFile; + + pFile = drflac__fopen(filename); + if (pFile == NULL) { + return NULL; + } + + pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks); + if (pFlac == NULL) { + fclose(pFile); + return NULL; + } + + return pFlac; +} + +drflac* drflac_open_file_with_metadata(const char* filename, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + FILE* pFile; + + pFile = drflac__fopen(filename); + if (pFile == NULL) { + return NULL; + } + + pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks); + if (pFlac == NULL) { + fclose(pFile); + return pFlac; + } + + return pFlac; +} +#endif /* DR_FLAC_NO_STDIO */ + +static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead) +{ + drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData; + size_t bytesRemaining; + + DRFLAC_ASSERT(memoryStream != NULL); + DRFLAC_ASSERT(memoryStream->dataSize >= memoryStream->currentReadPos); + + bytesRemaining = memoryStream->dataSize - memoryStream->currentReadPos; + if (bytesToRead > bytesRemaining) { + bytesToRead = bytesRemaining; + } + + if (bytesToRead > 0) { + DRFLAC_COPY_MEMORY(bufferOut, memoryStream->data + memoryStream->currentReadPos, bytesToRead); + memoryStream->currentReadPos += bytesToRead; + } + + return bytesToRead; +} + +static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin) +{ + drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData; + + DRFLAC_ASSERT(memoryStream != NULL); + DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */ + + if (offset > (drflac_int64)memoryStream->dataSize) { + return DRFLAC_FALSE; + } + + if (origin == drflac_seek_origin_current) { + if (memoryStream->currentReadPos + offset <= memoryStream->dataSize) { + memoryStream->currentReadPos += offset; + } else { + return DRFLAC_FALSE; /* Trying to seek too far forward. */ + } + } else { + if ((drflac_uint32)offset <= memoryStream->dataSize) { + memoryStream->currentReadPos = offset; + } else { + return DRFLAC_FALSE; /* Trying to seek too far forward. */ + } + } + + return DRFLAC_TRUE; +} + +drflac* drflac_open_memory(const void* data, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac__memory_stream memoryStream; + drflac* pFlac; + + memoryStream.data = (const unsigned char*)data; + memoryStream.dataSize = dataSize; + memoryStream.currentReadPos = 0; + pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, &memoryStream, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + pFlac->memoryStream = memoryStream; + + /* This is an awful hack... */ +#ifndef DR_FLAC_NO_OGG + if (pFlac->container == drflac_container_ogg) + { + drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; + oggbs->pUserData = &pFlac->memoryStream; + } + else +#endif + { + pFlac->bs.pUserData = &pFlac->memoryStream; + } + + return pFlac; +} + +drflac* drflac_open_memory_with_metadata(const void* data, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac__memory_stream memoryStream; + drflac* pFlac; + + memoryStream.data = (const unsigned char*)data; + memoryStream.dataSize = dataSize; + memoryStream.currentReadPos = 0; + pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + pFlac->memoryStream = memoryStream; + + /* This is an awful hack... */ +#ifndef DR_FLAC_NO_OGG + if (pFlac->container == drflac_container_ogg) + { + drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; + oggbs->pUserData = &pFlac->memoryStream; + } + else +#endif + { + pFlac->bs.pUserData = &pFlac->memoryStream; + } + + return pFlac; +} + + + +drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + return drflac_open_with_metadata_private(onRead, onSeek, NULL, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks); +} +drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + return drflac_open_with_metadata_private(onRead, onSeek, NULL, container, pUserData, pUserData, pAllocationCallbacks); +} + +drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + return drflac_open_with_metadata_private(onRead, onSeek, onMeta, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks); +} +drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + return drflac_open_with_metadata_private(onRead, onSeek, onMeta, container, pUserData, pUserData, pAllocationCallbacks); +} + +void drflac_close(drflac* pFlac) +{ + if (pFlac == NULL) { + return; + } + +#ifndef DR_FLAC_NO_STDIO + /* + If we opened the file with drflac_open_file() we will want to close the file handle. We can know whether or not drflac_open_file() + was used by looking at the callbacks. + */ + if (pFlac->bs.onRead == drflac__on_read_stdio) { + fclose((FILE*)pFlac->bs.pUserData); + } + +#ifndef DR_FLAC_NO_OGG + /* Need to clean up Ogg streams a bit differently due to the way the bit streaming is chained. */ + if (pFlac->container == drflac_container_ogg) { + drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; + DRFLAC_ASSERT(pFlac->bs.onRead == drflac__on_read_ogg); + + if (oggbs->onRead == drflac__on_read_stdio) { + fclose((FILE*)oggbs->pUserData); + } + } +#endif +#endif + + drflac__free_from_callbacks(pFlac, &pFlac->allocationCallbacks); +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + for (i = 0; i < frameCount; ++i) { + drflac_int32 left = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_int32 side = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + drflac_int32 right = left - side; + + pOutputSamples[i*2+0] = left; + pOutputSamples[i*2+1] = right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + + drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + for (i = 0; i < frameCount4; ++i) { + drflac_int32 left0 = pInputSamples0[i*4+0] << shift0; + drflac_int32 left1 = pInputSamples0[i*4+1] << shift0; + drflac_int32 left2 = pInputSamples0[i*4+2] << shift0; + drflac_int32 left3 = pInputSamples0[i*4+3] << shift0; + + drflac_int32 side0 = pInputSamples1[i*4+0] << shift1; + drflac_int32 side1 = pInputSamples1[i*4+1] << shift1; + drflac_int32 side2 = pInputSamples1[i*4+2] << shift1; + drflac_int32 side3 = pInputSamples1[i*4+3] << shift1; + + drflac_int32 right0 = left0 - side0; + drflac_int32 right1 = left1 - side1; + drflac_int32 right2 = left2 - side2; + drflac_int32 right3 = left3 - side3; + + pOutputSamples[i*8+0] = left0; + pOutputSamples[i*8+1] = right0; + pOutputSamples[i*8+2] = left1; + pOutputSamples[i*8+3] = right1; + pOutputSamples[i*8+4] = left2; + pOutputSamples[i*8+5] = right2; + pOutputSamples[i*8+6] = left3; + pOutputSamples[i*8+7] = right3; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + int left = pInputSamples0[i] << shift0; + int side = pInputSamples1[i] << shift1; + int right = left - side; + + pOutputSamples[i*2+0] = left; + pOutputSamples[i*2+1] = right; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 frameCount4; + drflac_int32 shift0; + drflac_int32 shift1; + drflac_uint64 i; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + frameCount4 = frameCount >> 2; + + shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i right = _mm_sub_epi32(left, side); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 left = pInputSamples0[i] << shift0; + drflac_int32 side = pInputSamples1[i] << shift1; + drflac_int32 right = left - side; + + pOutputSamples[i*2+0] = left; + pOutputSamples[i*2+1] = right; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 frameCount4; + drflac_int32 shift0; + drflac_int32 shift1; + drflac_uint64 i; + int32x4_t shift0_4; + int32x4_t shift1_4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + frameCount4 = frameCount >> 2; + + shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + int32x4_t left; + int32x4_t side; + int32x4_t right; + + left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); + side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); + right = vsubq_s32(left, side); + + drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 left = pInputSamples0[i] << shift0; + drflac_int32 side = pInputSamples1[i] << shift1; + drflac_int32 right = left - side; + + pOutputSamples[i*2+0] = left; + pOutputSamples[i*2+1] = right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + for (i = 0; i < frameCount; ++i) { + drflac_int32 side = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_int32 right = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + drflac_int32 left = right + side; + + pOutputSamples[i*2+0] = left; + pOutputSamples[i*2+1] = right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + + drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + for (i = 0; i < frameCount4; ++i) { + drflac_int32 side0 = pInputSamples0[i*4+0] << shift0; + drflac_int32 side1 = pInputSamples0[i*4+1] << shift0; + drflac_int32 side2 = pInputSamples0[i*4+2] << shift0; + drflac_int32 side3 = pInputSamples0[i*4+3] << shift0; + + drflac_int32 right0 = pInputSamples1[i*4+0] << shift1; + drflac_int32 right1 = pInputSamples1[i*4+1] << shift1; + drflac_int32 right2 = pInputSamples1[i*4+2] << shift1; + drflac_int32 right3 = pInputSamples1[i*4+3] << shift1; + + drflac_int32 left0 = right0 + side0; + drflac_int32 left1 = right1 + side1; + drflac_int32 left2 = right2 + side2; + drflac_int32 left3 = right3 + side3; + + pOutputSamples[i*8+0] = left0; + pOutputSamples[i*8+1] = right0; + pOutputSamples[i*8+2] = left1; + pOutputSamples[i*8+3] = right1; + pOutputSamples[i*8+4] = left2; + pOutputSamples[i*8+5] = right2; + pOutputSamples[i*8+6] = left3; + pOutputSamples[i*8+7] = right3; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 side = pInputSamples0[i] << shift0; + drflac_int32 right = pInputSamples1[i] << shift1; + drflac_int32 left = right + side; + + pOutputSamples[i*2+0] = left; + pOutputSamples[i*2+1] = right; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 frameCount4; + drflac_int32 shift0; + drflac_int32 shift1; + drflac_uint64 i; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + frameCount4 = frameCount >> 2; + + shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + for (i = 0; i < frameCount4; ++i) { + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i left = _mm_add_epi32(right, side); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 side = pInputSamples0[i] << shift0; + drflac_int32 right = pInputSamples1[i] << shift1; + drflac_int32 left = right + side; + + pOutputSamples[i*2+0] = left; + pOutputSamples[i*2+1] = right; + } +} +#endif - pStreamInfo->minBlockSize = (blockSizes & 0xFFFF0000) >> 16; - pStreamInfo->maxBlockSize = blockSizes & 0x0000FFFF; - pStreamInfo->minFrameSize = (drflac_uint32)((frameSizes & (drflac_uint64)0xFFFFFF0000000000) >> 40); - pStreamInfo->maxFrameSize = (drflac_uint32)((frameSizes & (drflac_uint64)0x000000FFFFFF0000) >> 16); - pStreamInfo->sampleRate = (drflac_uint32)((importantProps & (drflac_uint64)0xFFFFF00000000000) >> 44); - pStreamInfo->channels = (drflac_uint8 )((importantProps & (drflac_uint64)0x00000E0000000000) >> 41) + 1; - pStreamInfo->bitsPerSample = (drflac_uint8 )((importantProps & (drflac_uint64)0x000001F000000000) >> 36) + 1; - pStreamInfo->totalSampleCount = (importantProps & (drflac_uint64)0x0000000FFFFFFFFF) * pStreamInfo->channels; - drflac_copy_memory(pStreamInfo->md5, md5, sizeof(md5)); +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 frameCount4; + drflac_int32 shift0; + drflac_int32 shift1; + drflac_uint64 i; + int32x4_t shift0_4; + int32x4_t shift1_4; - return DRFLAC_TRUE; + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + frameCount4 = frameCount >> 2; + + shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + int32x4_t side; + int32x4_t right; + int32x4_t left; + + side = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); + right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); + left = vaddq_s32(right, side); + + drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 side = pInputSamples0[i] << shift0; + drflac_int32 right = pInputSamples1[i] << shift1; + drflac_int32 left = right + side; + + pOutputSamples[i*2+0] = left; + pOutputSamples[i*2+1] = right; + } } +#endif -drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeektableSize) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) { - // We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that - // we'll be sitting on byte 42. - drflac_uint64 runningFilePos = 42; - drflac_uint64 seektablePos = 0; - drflac_uint32 seektableSize = 0; +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} - for (;;) { - drflac_uint8 isLastBlock = 0; - drflac_uint8 blockType; - drflac_uint32 blockSize; - if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) { - return DRFLAC_FALSE; + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + for (drflac_uint64 i = 0; i < frameCount; ++i) { + int mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + int side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = ((mid + side) >> 1) << unusedBitsPerSample; + pOutputSamples[i*2+1] = ((mid - side) >> 1) << unusedBitsPerSample; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + + drflac_int32 shift = unusedBitsPerSample; + if (shift > 0) { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + drflac_int32 temp0L; + drflac_int32 temp1L; + drflac_int32 temp2L; + drflac_int32 temp3L; + drflac_int32 temp0R; + drflac_int32 temp1R; + drflac_int32 temp2R; + drflac_int32 temp3R; + + drflac_int32 mid0 = pInputSamples0[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid1 = pInputSamples0[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid2 = pInputSamples0[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid3 = pInputSamples0[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_int32 side0 = pInputSamples1[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side1 = pInputSamples1[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side2 = pInputSamples1[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side3 = pInputSamples1[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01); + mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01); + mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01); + mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01); + + temp0L = ((mid0 + side0) << shift); + temp1L = ((mid1 + side1) << shift); + temp2L = ((mid2 + side2) << shift); + temp3L = ((mid3 + side3) << shift); + + temp0R = ((mid0 - side0) << shift); + temp1R = ((mid1 - side1) << shift); + temp2R = ((mid2 - side2) << shift); + temp3R = ((mid3 - side3) << shift); + + pOutputSamples[i*8+0] = temp0L; + pOutputSamples[i*8+1] = temp0R; + pOutputSamples[i*8+2] = temp1L; + pOutputSamples[i*8+3] = temp1R; + pOutputSamples[i*8+4] = temp2L; + pOutputSamples[i*8+5] = temp2R; + pOutputSamples[i*8+6] = temp3L; + pOutputSamples[i*8+7] = temp3R; } - runningFilePos += 4; + } else { + for (i = 0; i < frameCount4; ++i) { + drflac_int32 temp0L; + drflac_int32 temp1L; + drflac_int32 temp2L; + drflac_int32 temp3L; + drflac_int32 temp0R; + drflac_int32 temp1R; + drflac_int32 temp2R; + drflac_int32 temp3R; + drflac_int32 mid0 = pInputSamples0[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid1 = pInputSamples0[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid2 = pInputSamples0[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid3 = pInputSamples0[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; - drflac_metadata metadata; - metadata.type = blockType; - metadata.pRawData = NULL; - metadata.rawDataSize = 0; + drflac_int32 side0 = pInputSamples1[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side1 = pInputSamples1[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side2 = pInputSamples1[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side3 = pInputSamples1[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; - switch (blockType) - { - case DRFLAC_METADATA_BLOCK_TYPE_APPLICATION: - { - if (blockSize < 4) { - return DRFLAC_FALSE; - } + mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01); + mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01); + mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01); + mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01); - if (onMeta) { - void* pRawData = DRFLAC_MALLOC(blockSize); - if (pRawData == NULL) { - return DRFLAC_FALSE; - } + temp0L = ((mid0 + side0) >> 1); + temp1L = ((mid1 + side1) >> 1); + temp2L = ((mid2 + side2) >> 1); + temp3L = ((mid3 + side3) >> 1); - if (onRead(pUserData, pRawData, blockSize) != blockSize) { - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } + temp0R = ((mid0 - side0) >> 1); + temp1R = ((mid1 - side1) >> 1); + temp2R = ((mid2 - side2) >> 1); + temp3R = ((mid3 - side3) >> 1); - metadata.pRawData = pRawData; - metadata.rawDataSize = blockSize; - metadata.data.application.id = drflac__be2host_32(*(drflac_uint32*)pRawData); - metadata.data.application.pData = (const void*)((drflac_uint8*)pRawData + sizeof(drflac_uint32)); - metadata.data.application.dataSize = blockSize - sizeof(drflac_uint32); - onMeta(pUserDataMD, &metadata); + pOutputSamples[i*8+0] = temp0L; + pOutputSamples[i*8+1] = temp0R; + pOutputSamples[i*8+2] = temp1L; + pOutputSamples[i*8+3] = temp1R; + pOutputSamples[i*8+4] = temp2L; + pOutputSamples[i*8+5] = temp2R; + pOutputSamples[i*8+6] = temp3L; + pOutputSamples[i*8+7] = temp3R; + } + } - DRFLAC_FREE(pRawData); - } - } break; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; - case DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE: - { - seektablePos = runningFilePos; - seektableSize = blockSize; + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); - if (onMeta) { - void* pRawData = DRFLAC_MALLOC(blockSize); - if (pRawData == NULL) { - return DRFLAC_FALSE; - } + pOutputSamples[i*2+0] = ((mid + side) >> 1) << unusedBitsPerSample; + pOutputSamples[i*2+1] = ((mid - side) >> 1) << unusedBitsPerSample; + } +} - if (onRead(pUserData, pRawData, blockSize) != blockSize) { - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4; + int shift; - metadata.pRawData = pRawData; - metadata.rawDataSize = blockSize; - metadata.data.seektable.seekpointCount = blockSize/sizeof(drflac_seekpoint); - metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData; + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); - // Endian swap. - for (drflac_uint32 iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) { - drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint; - pSeekpoint->firstSample = drflac__be2host_64(pSeekpoint->firstSample); - pSeekpoint->frameOffset = drflac__be2host_64(pSeekpoint->frameOffset); - pSeekpoint->sampleCount = drflac__be2host_16(pSeekpoint->sampleCount); - } + frameCount4 = frameCount >> 2; - onMeta(pUserDataMD, &metadata); + shift = unusedBitsPerSample; + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i left; + __m128i right; - DRFLAC_FREE(pRawData); - } - } break; + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); - case DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT: - { - if (blockSize < 8) { - return DRFLAC_FALSE; - } + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); - if (onMeta) { - void* pRawData = DRFLAC_MALLOC(blockSize); - if (pRawData == NULL) { - return DRFLAC_FALSE; - } + left = _mm_srai_epi32(_mm_add_epi32(mid, side), 1); + right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1); - if (onRead(pUserData, pRawData, blockSize) != blockSize) { - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); + } - metadata.pRawData = pRawData; - metadata.rawDataSize = blockSize; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; - const char* pRunningData = (const char*)pRawData; - const char* const pRunningDataEnd = (const char*)pRawData + blockSize; + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); - metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + pOutputSamples[i*2+0] = ((mid + side) >> 1); + pOutputSamples[i*2+1] = ((mid - side) >> 1); + } + } else { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i left; + __m128i right; - // Need space for the rest of the block - if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { // <-- Note the order of operations to avoid overflow to a valid value - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } - metadata.data.vorbis_comment.vendor = pRunningData; pRunningData += metadata.data.vorbis_comment.vendorLength; - metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); - // Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment - if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { // <-- Note the order of operations to avoid overflow to a valid value - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } - metadata.data.vorbis_comment.pComments = pRunningData; + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); - // Check that the comments section is valid before passing it to the callback - for (drflac_uint32 i = 0; i < metadata.data.vorbis_comment.commentCount; ++i) { - if (pRunningDataEnd - pRunningData < 4) { - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } - const drflac_uint32 commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; - if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { // <-- Note the order of operations to avoid overflow to a valid value - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } - pRunningData += commentLength; - } + left = _mm_slli_epi32(_mm_add_epi32(mid, side), shift); + right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift); - onMeta(pUserDataMD, &metadata); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); + } - DRFLAC_FREE(pRawData); - } - } break; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; - case DRFLAC_METADATA_BLOCK_TYPE_CUESHEET: - { - if (blockSize < 396) { - return DRFLAC_FALSE; - } + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); - if (onMeta) { - void* pRawData = DRFLAC_MALLOC(blockSize); - if (pRawData == NULL) { - return DRFLAC_FALSE; - } + pOutputSamples[i*2+0] = ((mid + side) << shift); + pOutputSamples[i*2+1] = ((mid - side) << shift); + } + } +} +#endif - if (onRead(pUserData, pRawData, blockSize) != blockSize) { - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4; + int shift; + int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */ + int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */ + int32x4_t one4; - metadata.pRawData = pRawData; - metadata.rawDataSize = blockSize; + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); - char* pRunningData = (char*)pRawData; - const char* const pRunningDataEnd = (const char*)pRawData + blockSize; + frameCount4 = frameCount >> 2; - drflac_copy_memory(metadata.data.cuesheet.catalog, pRunningData, 128); pRunningData += 128; - metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8; - metadata.data.cuesheet.isCD = (pRunningData[0] & 0x80) != 0; pRunningData += 259; - metadata.data.cuesheet.trackCount = pRunningData[0]; pRunningData += 1; - metadata.data.cuesheet.pTrackData = pRunningData; + wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + one4 = vdupq_n_s32(1); - // Check that the cuesheet tracks are valid before passing it to the callback - for (drflac_uint8 i = 0; i < metadata.data.cuesheet.trackCount; ++i) { - if (pRunningDataEnd - pRunningData < 36) { - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } + shift = unusedBitsPerSample; + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + int32x4_t mid; + int32x4_t side; + int32x4_t left; + int32x4_t right; - // Skip to the index point count - pRunningData += 35; - const drflac_uint8 indexCount = pRunningData[0]; pRunningData += 1; - const drflac_uint32 indexPointSize = indexCount * sizeof(drflac_cuesheet_track_index); - if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) { - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } + mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4); + side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4); - // Endian swap. - for (drflac_uint8 index = 0; index < indexCount; ++index) { - drflac_cuesheet_track_index* pTrack = (drflac_cuesheet_track_index*)pRunningData; - pRunningData += sizeof(drflac_cuesheet_track_index); - pTrack->offset = drflac__be2host_64(pTrack->offset); - } - } + mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, one4)); - onMeta(pUserDataMD, &metadata); + left = vshrq_n_s32(vaddq_s32(mid, side), 1); + right = vshrq_n_s32(vsubq_s32(mid, side), 1); - DRFLAC_FREE(pRawData); - } - } break; + drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); + } - case DRFLAC_METADATA_BLOCK_TYPE_PICTURE: - { - if (blockSize < 32) { - return DRFLAC_FALSE; - } + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; - if (onMeta) { - void* pRawData = DRFLAC_MALLOC(blockSize); - if (pRawData == NULL) { - return DRFLAC_FALSE; - } + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); - if (onRead(pUserData, pRawData, blockSize) != blockSize) { - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } + pOutputSamples[i*2+0] = ((mid + side) >> 1); + pOutputSamples[i*2+1] = ((mid - side) >> 1); + } + } else { + int32x4_t shift4; - metadata.pRawData = pRawData; - metadata.rawDataSize = blockSize; + shift -= 1; + shift4 = vdupq_n_s32(shift); - const char* pRunningData = (const char*)pRawData; - const char* const pRunningDataEnd = (const char*)pRawData + blockSize; + for (i = 0; i < frameCount4; ++i) { + int32x4_t mid; + int32x4_t side; + int32x4_t left; + int32x4_t right; - metadata.data.picture.type = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; - metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4); + side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4); - // Need space for the rest of the block - if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { // <-- Note the order of operations to avoid overflow to a valid value - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } - metadata.data.picture.mime = pRunningData; pRunningData += metadata.data.picture.mimeLength; - metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, one4)); - // Need space for the rest of the block - if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { // <-- Note the order of operations to avoid overflow to a valid value - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } - metadata.data.picture.description = pRunningData; pRunningData += metadata.data.picture.descriptionLength; - metadata.data.picture.width = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; - metadata.data.picture.height = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; - metadata.data.picture.colorDepth = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; - metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; - metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; - metadata.data.picture.pPictureData = (const drflac_uint8*)pRunningData; - - // Need space for the picture after the block - if (pRunningDataEnd - pRunningData < (drflac_int64)metadata.data.picture.pictureDataSize) { // <-- Note the order of operations to avoid overflow to a valid value - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } + left = vshlq_s32(vaddq_s32(mid, side), shift4); + right = vshlq_s32(vsubq_s32(mid, side), shift4); - onMeta(pUserDataMD, &metadata); + drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); + } - DRFLAC_FREE(pRawData); - } - } break; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; - case DRFLAC_METADATA_BLOCK_TYPE_PADDING: - { - if (onMeta) { - metadata.data.padding.unused = 0; + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); - // Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback. - if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { - isLastBlock = DRFLAC_TRUE; // An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. - } else { - onMeta(pUserDataMD, &metadata); - } - } - } break; + pOutputSamples[i*2+0] = ((mid + side) << shift); + pOutputSamples[i*2+1] = ((mid - side) << shift); + } + } +} +#endif - case DRFLAC_METADATA_BLOCK_TYPE_INVALID: - { - // Invalid chunk. Just skip over this one. - if (onMeta) { - if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { - isLastBlock = DRFLAC_TRUE; // An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. - } - } - } break; +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} - default: - { - // It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we - // can at the very least report the chunk to the application and let it look at the raw data. - if (onMeta) { - void* pRawData = DRFLAC_MALLOC(blockSize); - if (pRawData == NULL) { - return DRFLAC_FALSE; - } - if (onRead(pUserData, pRawData, blockSize) != blockSize) { - DRFLAC_FREE(pRawData); - return DRFLAC_FALSE; - } +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + for (drflac_uint64 i = 0; i < frameCount; ++i) { + pOutputSamples[i*2+0] = (pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)); + pOutputSamples[i*2+1] = (pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; - metadata.pRawData = pRawData; - metadata.rawDataSize = blockSize; - onMeta(pUserDataMD, &metadata); + drflac_int32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_int32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); - DRFLAC_FREE(pRawData); - } - } break; - } + for (i = 0; i < frameCount4; ++i) { + drflac_int32 tempL0 = pInputSamples0[i*4+0] << shift0; + drflac_int32 tempL1 = pInputSamples0[i*4+1] << shift0; + drflac_int32 tempL2 = pInputSamples0[i*4+2] << shift0; + drflac_int32 tempL3 = pInputSamples0[i*4+3] << shift0; - // If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above. - if (onMeta == NULL && blockSize > 0) { - if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { - isLastBlock = DRFLAC_TRUE; - } - } + drflac_int32 tempR0 = pInputSamples1[i*4+0] << shift1; + drflac_int32 tempR1 = pInputSamples1[i*4+1] << shift1; + drflac_int32 tempR2 = pInputSamples1[i*4+2] << shift1; + drflac_int32 tempR3 = pInputSamples1[i*4+3] << shift1; - runningFilePos += blockSize; - if (isLastBlock) { - break; - } + pOutputSamples[i*8+0] = tempL0; + pOutputSamples[i*8+1] = tempR0; + pOutputSamples[i*8+2] = tempL1; + pOutputSamples[i*8+3] = tempR1; + pOutputSamples[i*8+4] = tempL2; + pOutputSamples[i*8+5] = tempR2; + pOutputSamples[i*8+6] = tempL3; + pOutputSamples[i*8+7] = tempR3; } - *pSeektablePos = seektablePos; - *pSeektableSize = seektableSize; - *pFirstFramePos = runningFilePos; - - return DRFLAC_TRUE; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (pInputSamples0[i] << shift0); + pOutputSamples[i*2+1] = (pInputSamples1[i] << shift1); + } } -drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed) +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) { - (void)onSeek; + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; - // Pre: The bit stream should be sitting just past the 4-byte id header. + int shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + int shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); - pInit->container = drflac_container_native; + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); - // The first metadata block should be the STREAMINFO block. - drflac_uint8 isLastBlock; - drflac_uint8 blockType; - drflac_uint32 blockSize; - if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) { - return DRFLAC_FALSE; + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); } - if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) { - if (!relaxed) { - // We're opening in strict mode and the first block is not the STREAMINFO block. Error. - return DRFLAC_FALSE; - } else { - // Relaxed mode. To open from here we need to just find the first frame and set the sample rate, etc. to whatever is defined - // for that frame. - pInit->hasStreamInfoBlock = DRFLAC_FALSE; - pInit->hasMetadataBlocks = DRFLAC_FALSE; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (pInputSamples0[i] << shift0); + pOutputSamples[i*2+1] = (pInputSamples1[i] << shift1); + } +} +#endif - if (!drflac__read_next_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) { - return DRFLAC_FALSE; // Couldn't find a frame. - } +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; - if (pInit->firstFrameHeader.bitsPerSample == 0) { - return DRFLAC_FALSE; // Failed to initialize because the first frame depends on the STREAMINFO block, which does not exist. - } + int shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + int shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); - pInit->sampleRate = pInit->firstFrameHeader.sampleRate; - pInit->channels = drflac__get_channel_count_from_channel_assignment(pInit->firstFrameHeader.channelAssignment); - pInit->bitsPerSample = pInit->firstFrameHeader.bitsPerSample; - pInit->maxBlockSize = 65535; // <-- See notes here: https://xiph.org/flac/format.html#metadata_block_streaminfo - return DRFLAC_TRUE; - } - } else { - drflac_streaminfo streaminfo; - if (!drflac__read_streaminfo(onRead, pUserData, &streaminfo)) { - return DRFLAC_FALSE; - } + int32x4_t shift4_0 = vdupq_n_s32(shift0); + int32x4_t shift4_1 = vdupq_n_s32(shift1); - pInit->hasStreamInfoBlock = DRFLAC_TRUE; - pInit->sampleRate = streaminfo.sampleRate; - pInit->channels = streaminfo.channels; - pInit->bitsPerSample = streaminfo.bitsPerSample; - pInit->totalSampleCount = streaminfo.totalSampleCount; - pInit->maxBlockSize = streaminfo.maxBlockSize; // Don't care about the min block size - only the max (used for determining the size of the memory allocation). - pInit->hasMetadataBlocks = !isLastBlock; + for (i = 0; i < frameCount4; ++i) { + int32x4_t left; + int32x4_t right; - if (onMeta) { - drflac_metadata metadata; - metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO; - metadata.pRawData = NULL; - metadata.rawDataSize = 0; - metadata.data.streaminfo = streaminfo; - onMeta(pUserDataMD, &metadata); - } + left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift4_0); + right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift4_1); - return DRFLAC_TRUE; + drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (pInputSamples0[i] << shift0); + pOutputSamples[i*2+1] = (pInputSamples1[i] << shift1); } } +#endif -#ifndef DR_FLAC_NO_OGG -#define DRFLAC_OGG_MAX_PAGE_SIZE 65307 -#define DRFLAC_OGG_CAPTURE_PATTERN_CRC32 1605413199 // CRC-32 of "OggS". +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} -typedef enum + +drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut) { - drflac_ogg_recover_on_crc_mismatch, - drflac_ogg_fail_on_crc_mismatch -} drflac_ogg_crc_mismatch_recovery; + drflac_uint64 framesRead; + drflac_int32 unusedBitsPerSample; + if (pFlac == NULL || framesToRead == 0) { + return 0; + } -static drflac_uint32 drflac__crc32_table[] = { - 0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L, - 0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L, - 0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L, - 0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL, - 0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L, - 0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L, - 0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L, - 0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL, - 0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L, - 0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L, - 0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L, - 0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL, - 0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L, - 0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L, - 0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L, - 0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL, - 0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL, - 0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L, - 0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L, - 0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL, - 0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL, - 0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L, - 0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L, - 0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL, - 0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL, - 0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L, - 0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L, - 0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL, - 0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL, - 0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L, - 0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L, - 0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL, - 0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L, - 0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL, - 0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL, - 0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L, - 0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L, - 0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL, - 0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL, - 0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L, - 0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L, - 0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL, - 0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL, - 0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L, - 0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L, - 0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL, - 0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL, - 0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L, - 0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L, - 0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL, - 0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L, - 0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L, - 0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L, - 0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL, - 0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L, - 0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L, - 0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L, - 0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL, - 0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L, - 0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L, - 0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L, - 0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL, - 0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L, - 0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L -}; + if (pBufferOut == NULL) { + return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead); + } -static DRFLAC_INLINE drflac_uint32 drflac_crc32_byte(drflac_uint32 crc32, drflac_uint8 data) -{ -#ifndef DR_FLAC_NO_CRC - return (crc32 << 8) ^ drflac__crc32_table[(drflac_uint8)((crc32 >> 24) & 0xFF) ^ data]; -#else - (void)data; - return crc32; -#endif + unusedBitsPerSample = 32 - pFlac->bitsPerSample; + + framesRead = 0; + while (framesToRead > 0) { + /* If we've run out of samples in this frame, go to the next. */ + if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + break; /* Couldn't read the next frame, so just break from the loop and return. */ + } + } else { + unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining; + drflac_uint64 frameCountThisIteration = framesToRead; + + if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) { + frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining; + } + + if (channelCount == 2) { + const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame; + const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame; + + switch (pFlac->currentFLACFrame.header.channelAssignment) + { + case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: + { + drflac_read_pcm_frames_s32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: + { + drflac_read_pcm_frames_s32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: + { + drflac_read_pcm_frames_s32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: + default: + { + drflac_read_pcm_frames_s32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + } + } else { + /* Generic interleaving. */ + drflac_uint64 i; + for (i = 0; i < frameCountThisIteration; ++i) { + unsigned int j; + for (j = 0; j < channelCount; ++j) { + pBufferOut[(i*channelCount)+j] = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample)); + } + } + } + + framesRead += frameCountThisIteration; + pBufferOut += frameCountThisIteration * channelCount; + framesToRead -= frameCountThisIteration; + pFlac->currentPCMFrame += frameCountThisIteration; + pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration; + } + } + + return framesRead; } + #if 0 -static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint32(drflac_uint32 crc32, drflac_uint32 data) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) { - crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 24) & 0xFF)); - crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 16) & 0xFF)); - crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 8) & 0xFF)); - crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 0) & 0xFF)); - return crc32; -} + drflac_uint64 i; + for (i = 0; i < frameCount; ++i) { + drflac_int32 left = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_int32 side = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + drflac_int32 right = left - side; -static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint64(drflac_uint32 crc32, drflac_uint64 data) -{ - crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 32) & 0xFFFFFFFF)); - crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 0) & 0xFFFFFFFF)); - return crc32; + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } } #endif -static DRFLAC_INLINE drflac_uint32 drflac_crc32_buffer(drflac_uint32 crc32, drflac_uint8* pData, drflac_uint32 dataSize) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) { - // This can be optimized. - for (drflac_uint32 i = 0; i < dataSize; ++i) { - crc32 = drflac_crc32_byte(crc32, pData[i]); + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + + drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + for (i = 0; i < frameCount4; ++i) { + drflac_int32 left0 = pInputSamples0[i*4+0] << shift0; + drflac_int32 left1 = pInputSamples0[i*4+1] << shift0; + drflac_int32 left2 = pInputSamples0[i*4+2] << shift0; + drflac_int32 left3 = pInputSamples0[i*4+3] << shift0; + + drflac_int32 side0 = pInputSamples1[i*4+0] << shift1; + drflac_int32 side1 = pInputSamples1[i*4+1] << shift1; + drflac_int32 side2 = pInputSamples1[i*4+2] << shift1; + drflac_int32 side3 = pInputSamples1[i*4+3] << shift1; + + drflac_int32 right0 = left0 - side0; + drflac_int32 right1 = left1 - side1; + drflac_int32 right2 = left2 - side2; + drflac_int32 right3 = left3 - side3; + + left0 >>= 16; + left1 >>= 16; + left2 >>= 16; + left3 >>= 16; + + right0 >>= 16; + right1 >>= 16; + right2 >>= 16; + right3 >>= 16; + + pOutputSamples[i*8+0] = (drflac_int16)left0; + pOutputSamples[i*8+1] = (drflac_int16)right0; + pOutputSamples[i*8+2] = (drflac_int16)left1; + pOutputSamples[i*8+3] = (drflac_int16)right1; + pOutputSamples[i*8+4] = (drflac_int16)left2; + pOutputSamples[i*8+5] = (drflac_int16)right2; + pOutputSamples[i*8+6] = (drflac_int16)left3; + pOutputSamples[i*8+7] = (drflac_int16)right3; } - return crc32; -} + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 left = pInputSamples0[i] << shift0; + drflac_int32 side = pInputSamples1[i] << shift1; + drflac_int32 right = left - side; -static DRFLAC_INLINE drflac_bool32 drflac_ogg__is_capture_pattern(drflac_uint8 pattern[4]) -{ - return pattern[0] == 'O' && pattern[1] == 'g' && pattern[2] == 'g' && pattern[3] == 'S'; -} + left >>= 16; + right >>= 16; -static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_header_size(drflac_ogg_page_header* pHeader) -{ - return 27 + pHeader->segmentCount; + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } } -static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_body_size(drflac_ogg_page_header* pHeader) +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) { - drflac_uint32 pageBodySize = 0; - for (int i = 0; i < pHeader->segmentCount; ++i) { - pageBodySize += pHeader->segmentTable[i]; + drflac_uint64 frameCount4; + drflac_int32 shift0; + drflac_int32 shift1; + drflac_uint64 i; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + frameCount4 = frameCount >> 2; + + shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i right = _mm_sub_epi32(left, side); + + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); } - return pageBodySize; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 left = pInputSamples0[i] << shift0; + drflac_int32 side = pInputSamples1[i] << shift1; + drflac_int32 right = left - side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } } +#endif -drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32) +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) { - drflac_assert(*pCRC32 == DRFLAC_OGG_CAPTURE_PATTERN_CRC32); + drflac_uint64 frameCount4; + drflac_int32 shift0; + drflac_int32 shift1; + drflac_uint64 i; + int32x4_t shift0_4; + int32x4_t shift1_4; - drflac_uint8 data[23]; - if (onRead(pUserData, data, 23) != 23) { - return DRFLAC_END_OF_STREAM; + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + frameCount4 = frameCount >> 2; + + shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + int32x4_t left; + int32x4_t side; + int32x4_t right; + + left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); + side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); + right = vsubq_s32(left, side); + + left = vshrq_n_s32(left, 16); + right = vshrq_n_s32(right, 16); + + drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); } - *pBytesRead += 23; - pHeader->structureVersion = data[0]; - pHeader->headerType = data[1]; - drflac_copy_memory(&pHeader->granulePosition, &data[ 2], 8); - drflac_copy_memory(&pHeader->serialNumber, &data[10], 4); - drflac_copy_memory(&pHeader->sequenceNumber, &data[14], 4); - drflac_copy_memory(&pHeader->checksum, &data[18], 4); - pHeader->segmentCount = data[22]; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 left = pInputSamples0[i] << shift0; + drflac_int32 side = pInputSamples1[i] << shift1; + drflac_int32 right = left - side; - // Calculate the CRC. Note that for the calculation the checksum part of the page needs to be set to 0. - data[18] = 0; - data[19] = 0; - data[20] = 0; - data[21] = 0; + left >>= 16; + right >>= 16; - drflac_uint32 i; - for (i = 0; i < 23; ++i) { - *pCRC32 = drflac_crc32_byte(*pCRC32, data[i]); + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s16__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s16__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif } +} - if (onRead(pUserData, pHeader->segmentTable, pHeader->segmentCount) != pHeader->segmentCount) { - return DRFLAC_END_OF_STREAM; - } - *pBytesRead += pHeader->segmentCount; +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + for (i = 0; i < frameCount; ++i) { + drflac_int32 side = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_int32 right = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + drflac_int32 left = right + side; - for (i = 0; i < pHeader->segmentCount; ++i) { - *pCRC32 = drflac_crc32_byte(*pCRC32, pHeader->segmentTable[i]); - } + left >>= 16; + right >>= 16; - return DRFLAC_SUCCESS; + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } } +#endif -drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) { - *pBytesRead = 0; + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; - drflac_uint8 id[4]; - if (onRead(pUserData, id, 4) != 4) { - return DRFLAC_END_OF_STREAM; + drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + for (i = 0; i < frameCount4; ++i) { + drflac_int32 side0 = pInputSamples0[i*4+0] << shift0; + drflac_int32 side1 = pInputSamples0[i*4+1] << shift0; + drflac_int32 side2 = pInputSamples0[i*4+2] << shift0; + drflac_int32 side3 = pInputSamples0[i*4+3] << shift0; + + drflac_int32 right0 = pInputSamples1[i*4+0] << shift1; + drflac_int32 right1 = pInputSamples1[i*4+1] << shift1; + drflac_int32 right2 = pInputSamples1[i*4+2] << shift1; + drflac_int32 right3 = pInputSamples1[i*4+3] << shift1; + + drflac_int32 left0 = right0 + side0; + drflac_int32 left1 = right1 + side1; + drflac_int32 left2 = right2 + side2; + drflac_int32 left3 = right3 + side3; + + left0 >>= 16; + left1 >>= 16; + left2 >>= 16; + left3 >>= 16; + + right0 >>= 16; + right1 >>= 16; + right2 >>= 16; + right3 >>= 16; + + pOutputSamples[i*8+0] = (drflac_int16)left0; + pOutputSamples[i*8+1] = (drflac_int16)right0; + pOutputSamples[i*8+2] = (drflac_int16)left1; + pOutputSamples[i*8+3] = (drflac_int16)right1; + pOutputSamples[i*8+4] = (drflac_int16)left2; + pOutputSamples[i*8+5] = (drflac_int16)right2; + pOutputSamples[i*8+6] = (drflac_int16)left3; + pOutputSamples[i*8+7] = (drflac_int16)right3; } - *pBytesRead += 4; - // We need to read byte-by-byte until we find the OggS capture pattern. - for (;;) { - if (drflac_ogg__is_capture_pattern(id)) { - *pCRC32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 side = pInputSamples0[i] << shift0; + drflac_int32 right = pInputSamples1[i] << shift1; + drflac_int32 left = right + side; - drflac_result result = drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pBytesRead, pCRC32); - if (result == DRFLAC_SUCCESS) { - return DRFLAC_SUCCESS; - } else { - if (result == DRFLAC_CRC_MISMATCH) { - continue; - } else { - return result; - } - } - } else { - // The first 4 bytes did not equal the capture pattern. Read the next byte and try again. - id[0] = id[1]; - id[1] = id[2]; - id[2] = id[3]; - if (onRead(pUserData, &id[3], 1) != 1) { - return DRFLAC_END_OF_STREAM; - } - *pBytesRead += 1; - } + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; } } - -// The main part of the Ogg encapsulation is the conversion from the physical Ogg bitstream to the native FLAC bitstream. It works -// in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is designed -// in such a way that the core sections assume everything is delivered in native format. Therefore, for each encapsulation type -// dr_flac is supporting there needs to be a layer sitting on top of the onRead and onSeek callbacks that ensures the bits read from -// the physical Ogg bitstream are converted and delivered in native FLAC format. -typedef struct +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) { - drflac_read_proc onRead; // The original onRead callback from drflac_open() and family. - drflac_seek_proc onSeek; // The original onSeek callback from drflac_open() and family. - void* pUserData; // The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family. - drflac_uint64 currentBytePos; // The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking. - drflac_uint64 firstBytePos; // The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page. - drflac_uint32 serialNumber; // The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization. - drflac_ogg_page_header bosPageHeader; // Used for seeking. - drflac_ogg_page_header currentPageHeader; - drflac_uint32 bytesRemainingInPage; - drflac_uint32 pageDataSize; - drflac_uint8 pageData[DRFLAC_OGG_MAX_PAGE_SIZE]; -} drflac_oggbs; // oggbs = Ogg Bitstream + drflac_uint64 frameCount4; + drflac_int32 shift0; + drflac_int32 shift1; + drflac_uint64 i; -static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut, size_t bytesToRead) -{ - size_t bytesActuallyRead = oggbs->onRead(oggbs->pUserData, bufferOut, bytesToRead); - oggbs->currentBytePos += bytesActuallyRead; + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); - return bytesActuallyRead; -} + frameCount4 = frameCount >> 2; -static drflac_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, drflac_uint64 offset, drflac_seek_origin origin) -{ - if (origin == drflac_seek_origin_start) { - if (offset <= 0x7FFFFFFF) { - if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_start)) { - return DRFLAC_FALSE; - } - oggbs->currentBytePos = offset; + shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); - return DRFLAC_TRUE; - } else { - if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) { - return DRFLAC_FALSE; - } - oggbs->currentBytePos = offset; + for (i = 0; i < frameCount4; ++i) { + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i left = _mm_add_epi32(right, side); - return drflac_oggbs__seek_physical(oggbs, offset - 0x7FFFFFFF, drflac_seek_origin_current); - } - } else { - while (offset > 0x7FFFFFFF) { - if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) { - return DRFLAC_FALSE; - } - oggbs->currentBytePos += 0x7FFFFFFF; - offset -= 0x7FFFFFFF; - } + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); - if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_current)) { // <-- Safe cast thanks to the loop above. - return DRFLAC_FALSE; - } - oggbs->currentBytePos += offset; + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); + } - return DRFLAC_TRUE; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 side = pInputSamples0[i] << shift0; + drflac_int32 right = pInputSamples1[i] << shift1; + drflac_int32 left = right + side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; } } +#endif -static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_ogg_crc_mismatch_recovery recoveryMethod) +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) { - drflac_ogg_page_header header; - for (;;) { - drflac_uint32 crc32 = 0; - drflac_uint32 bytesRead; - if (drflac_ogg__read_page_header(oggbs->onRead, oggbs->pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { - return DRFLAC_FALSE; - } - oggbs->currentBytePos += bytesRead; + drflac_uint64 frameCount4; + drflac_int32 shift0; + drflac_int32 shift1; + drflac_uint64 i; + int32x4_t shift0_4; + int32x4_t shift1_4; - drflac_uint32 pageBodySize = drflac_ogg__get_page_body_size(&header); - if (pageBodySize > DRFLAC_OGG_MAX_PAGE_SIZE) { - continue; // Invalid page size. Assume it's corrupted and just move to the next page. - } + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); - if (header.serialNumber != oggbs->serialNumber) { - // It's not a FLAC page. Skip it. - if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, drflac_seek_origin_current)) { - return DRFLAC_FALSE; - } - continue; - } + frameCount4 = frameCount >> 2; + shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); - // We need to read the entire page and then do a CRC check on it. If there's a CRC mismatch we need to skip this page. - if (drflac_oggbs__read_physical(oggbs, oggbs->pageData, pageBodySize) != pageBodySize) { - return DRFLAC_FALSE; - } - oggbs->pageDataSize = pageBodySize; + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); -#ifndef DR_FLAC_NO_CRC - drflac_uint32 actualCRC32 = drflac_crc32_buffer(crc32, oggbs->pageData, oggbs->pageDataSize); - if (actualCRC32 != header.checksum) { - if (recoveryMethod == drflac_ogg_recover_on_crc_mismatch) { - continue; // CRC mismatch. Skip this page. - } else { - // Even though we are failing on a CRC mismatch, we still want our stream to be in a good state. Therefore we - // go to the next valid page to ensure we're in a good state, but return false to let the caller know that the - // seek did not fully complete. - drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch); - return DRFLAC_FALSE; - } - } -#else - (void)recoveryMethod; // <-- Silence a warning. + for (i = 0; i < frameCount4; ++i) { + int32x4_t side; + int32x4_t right; + int32x4_t left; + + side = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); + right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); + left = vaddq_s32(right, side); + + left = vshrq_n_s32(left, 16); + right = vshrq_n_s32(right, 16); + + drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 side = pInputSamples0[i] << shift0; + drflac_int32 right = pInputSamples1[i] << shift1; + drflac_int32 left = right + side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} #endif - oggbs->currentPageHeader = header; - oggbs->bytesRemainingInPage = pageBodySize; - return DRFLAC_TRUE; +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s16__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s16__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif } } -// Function below is unused at the moment, but I might be re-adding it later. + #if 0 -static drflac_uint8 drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs, drflac_uint8* pBytesRemainingInSeg) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) { - drflac_uint32 bytesConsumedInPage = drflac_ogg__get_page_body_size(&oggbs->currentPageHeader) - oggbs->bytesRemainingInPage; - drflac_uint8 iSeg = 0; - drflac_uint32 iByte = 0; - while (iByte < bytesConsumedInPage) { - drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg]; - if (iByte + segmentSize > bytesConsumedInPage) { - break; - } else { - iSeg += 1; - iByte += segmentSize; - } - } + for (drflac_uint64 i = 0; i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; - *pBytesRemainingInSeg = oggbs->currentPageHeader.segmentTable[iSeg] - (drflac_uint8)(bytesConsumedInPage - iByte); - return iSeg; + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)((((mid + side) >> 1) << unusedBitsPerSample) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)((((mid - side) >> 1) << unusedBitsPerSample) >> 16); + } } +#endif -static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs) -{ - // The current packet ends when we get to the segment with a lacing value of < 255 which is not at the end of a page. - for (;;) { - drflac_bool32 atEndOfPage = DRFLAC_FALSE; +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + + int shift = unusedBitsPerSample; + if (shift > 0) { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + drflac_int32 temp0L; + drflac_int32 temp1L; + drflac_int32 temp2L; + drflac_int32 temp3L; + drflac_int32 temp0R; + drflac_int32 temp1R; + drflac_int32 temp2R; + drflac_int32 temp3R; + + drflac_int32 mid0 = pInputSamples0[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid1 = pInputSamples0[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid2 = pInputSamples0[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid3 = pInputSamples0[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_int32 side0 = pInputSamples1[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side1 = pInputSamples1[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side2 = pInputSamples1[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side3 = pInputSamples1[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01); + mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01); + mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01); + mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01); + + temp0L = ((mid0 + side0) << shift); + temp1L = ((mid1 + side1) << shift); + temp2L = ((mid2 + side2) << shift); + temp3L = ((mid3 + side3) << shift); + + temp0R = ((mid0 - side0) << shift); + temp1R = ((mid1 - side1) << shift); + temp2R = ((mid2 - side2) << shift); + temp3R = ((mid3 - side3) << shift); + + temp0L >>= 16; + temp1L >>= 16; + temp2L >>= 16; + temp3L >>= 16; + + temp0R >>= 16; + temp1R >>= 16; + temp2R >>= 16; + temp3R >>= 16; + + pOutputSamples[i*8+0] = (drflac_int16)temp0L; + pOutputSamples[i*8+1] = (drflac_int16)temp0R; + pOutputSamples[i*8+2] = (drflac_int16)temp1L; + pOutputSamples[i*8+3] = (drflac_int16)temp1R; + pOutputSamples[i*8+4] = (drflac_int16)temp2L; + pOutputSamples[i*8+5] = (drflac_int16)temp2R; + pOutputSamples[i*8+6] = (drflac_int16)temp3L; + pOutputSamples[i*8+7] = (drflac_int16)temp3R; + } + } else { + for (i = 0; i < frameCount4; ++i) { + drflac_int32 temp0L; + drflac_int32 temp1L; + drflac_int32 temp2L; + drflac_int32 temp3L; + drflac_int32 temp0R; + drflac_int32 temp1R; + drflac_int32 temp2R; + drflac_int32 temp3R; - drflac_uint8 bytesRemainingInSeg; - drflac_uint8 iFirstSeg = drflac_oggbs__get_current_segment_index(oggbs, &bytesRemainingInSeg); + drflac_int32 mid0 = pInputSamples0[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid1 = pInputSamples0[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid2 = pInputSamples0[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid3 = pInputSamples0[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; - drflac_uint32 bytesToEndOfPacketOrPage = bytesRemainingInSeg; - for (drflac_uint8 iSeg = iFirstSeg; iSeg < oggbs->currentPageHeader.segmentCount; ++iSeg) { - drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg]; - if (segmentSize < 255) { - if (iSeg == oggbs->currentPageHeader.segmentCount-1) { - atEndOfPage = DRFLAC_TRUE; - } + drflac_int32 side0 = pInputSamples1[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side1 = pInputSamples1[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side2 = pInputSamples1[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side3 = pInputSamples1[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; - break; - } + mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01); + mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01); + mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01); + mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01); - bytesToEndOfPacketOrPage += segmentSize; - } + temp0L = ((mid0 + side0) >> 1); + temp1L = ((mid1 + side1) >> 1); + temp2L = ((mid2 + side2) >> 1); + temp3L = ((mid3 + side3) >> 1); - // At this point we will have found either the packet or the end of the page. If were at the end of the page we'll - // want to load the next page and keep searching for the end of the packet. - drflac_oggbs__seek_physical(oggbs, bytesToEndOfPacketOrPage, drflac_seek_origin_current); - oggbs->bytesRemainingInPage -= bytesToEndOfPacketOrPage; + temp0R = ((mid0 - side0) >> 1); + temp1R = ((mid1 - side1) >> 1); + temp2R = ((mid2 - side2) >> 1); + temp3R = ((mid3 - side3) >> 1); - if (atEndOfPage) { - // We're potentially at the next packet, but we need to check the next page first to be sure because the packet may - // straddle pages. - if (!drflac_oggbs__goto_next_page(oggbs)) { - return DRFLAC_FALSE; - } + temp0L >>= 16; + temp1L >>= 16; + temp2L >>= 16; + temp3L >>= 16; - // If it's a fresh packet it most likely means we're at the next packet. - if ((oggbs->currentPageHeader.headerType & 0x01) == 0) { - return DRFLAC_TRUE; - } - } else { - // We're at the next packet. - return DRFLAC_TRUE; + temp0R >>= 16; + temp1R >>= 16; + temp2R >>= 16; + temp3R >>= 16; + + pOutputSamples[i*8+0] = (drflac_int16)temp0L; + pOutputSamples[i*8+1] = (drflac_int16)temp0R; + pOutputSamples[i*8+2] = (drflac_int16)temp1L; + pOutputSamples[i*8+3] = (drflac_int16)temp1R; + pOutputSamples[i*8+4] = (drflac_int16)temp2L; + pOutputSamples[i*8+5] = (drflac_int16)temp2R; + pOutputSamples[i*8+6] = (drflac_int16)temp3L; + pOutputSamples[i*8+7] = (drflac_int16)temp3R; } } -} -static drflac_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs) -{ - // The bitstream should be sitting on the first byte just after the header of the frame. + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; - // What we're actually doing here is seeking to the start of the next packet. - return drflac_oggbs__seek_to_next_packet(oggbs); + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)((((mid + side) >> 1) << unusedBitsPerSample) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)((((mid - side) >> 1) << unusedBitsPerSample) >> 16); + } } -#endif -static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytesToRead) +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) { - drflac_oggbs* oggbs = (drflac_oggbs*)pUserData; - drflac_assert(oggbs != NULL); + drflac_uint64 i; + drflac_uint64 frameCount4; + drflac_int32 shift; - drflac_uint8* pRunningBufferOut = (drflac_uint8*)bufferOut; + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); - // Reading is done page-by-page. If we've run out of bytes in the page we need to move to the next one. - size_t bytesRead = 0; - while (bytesRead < bytesToRead) { - size_t bytesRemainingToRead = bytesToRead - bytesRead; + frameCount4 = frameCount >> 2; - if (oggbs->bytesRemainingInPage >= bytesRemainingToRead) { - drflac_copy_memory(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), bytesRemainingToRead); - bytesRead += bytesRemainingToRead; - oggbs->bytesRemainingInPage -= (drflac_uint32)bytesRemainingToRead; - break; - } + shift = unusedBitsPerSample; + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i left; + __m128i right; - // If we get here it means some of the requested data is contained in the next pages. - if (oggbs->bytesRemainingInPage > 0) { - drflac_copy_memory(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), oggbs->bytesRemainingInPage); - bytesRead += oggbs->bytesRemainingInPage; - pRunningBufferOut += oggbs->bytesRemainingInPage; - oggbs->bytesRemainingInPage = 0; - } + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); - drflac_assert(bytesRemainingToRead > 0); - if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { - break; // Failed to go to the next page. Might have simply hit the end of the stream. - } - } + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); - return bytesRead; -} + left = _mm_srai_epi32(_mm_add_epi32(mid, side), 1); + right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1); -static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_origin origin) -{ - drflac_oggbs* oggbs = (drflac_oggbs*)pUserData; - drflac_assert(oggbs != NULL); - drflac_assert(offset >= 0); // <-- Never seek backwards. + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); - // Seeking is always forward which makes things a lot simpler. - if (origin == drflac_seek_origin_start) { - if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, drflac_seek_origin_start)) { - return DRFLAC_FALSE; + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); } - if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) { - return DRFLAC_FALSE; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) >> 1) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) >> 1) >> 16); } + } else { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i left; + __m128i right; - return drflac__on_seek_ogg(pUserData, offset, drflac_seek_origin_current); - } + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); - drflac_assert(origin == drflac_seek_origin_current); + left = _mm_slli_epi32(_mm_add_epi32(mid, side), shift); + right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift); - int bytesSeeked = 0; - while (bytesSeeked < offset) { - int bytesRemainingToSeek = offset - bytesSeeked; - drflac_assert(bytesRemainingToSeek >= 0); + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); - if (oggbs->bytesRemainingInPage >= (size_t)bytesRemainingToSeek) { - bytesSeeked += bytesRemainingToSeek; - oggbs->bytesRemainingInPage -= bytesRemainingToSeek; - break; + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); } - // If we get here it means some of the requested data is contained in the next pages. - if (oggbs->bytesRemainingInPage > 0) { - bytesSeeked += (int)oggbs->bytesRemainingInPage; - oggbs->bytesRemainingInPage = 0; - } + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; - drflac_assert(bytesRemainingToSeek > 0); - if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) { - // Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch. - return DRFLAC_FALSE; + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16); } } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4; + int shift; + int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */ + int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */ + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + frameCount4 = frameCount >> 2; - return DRFLAC_TRUE; -} + wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); -drflac_bool32 drflac_ogg__seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex) -{ - drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; + shift = unusedBitsPerSample; + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + int32x4_t mid; + int32x4_t side; + int32x4_t left; + int32x4_t right; - drflac_uint64 originalBytePos = oggbs->currentBytePos; // For recovery. + mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4); + side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4); - // First seek to the first frame. - if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFramePos)) { - return DRFLAC_FALSE; - } - oggbs->bytesRemainingInPage = 0; + mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1))); - drflac_uint64 runningGranulePosition = 0; - drflac_uint64 runningFrameBytePos = oggbs->currentBytePos; // <-- Points to the OggS identifier. - for (;;) { - if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { - drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start); - return DRFLAC_FALSE; // Never did find that sample... - } + left = vshrq_n_s32(vaddq_s32(mid, side), 1); + right = vshrq_n_s32(vsubq_s32(mid, side), 1); - runningFrameBytePos = oggbs->currentBytePos - drflac_ogg__get_page_header_size(&oggbs->currentPageHeader) - oggbs->pageDataSize; - if (oggbs->currentPageHeader.granulePosition*pFlac->channels >= sampleIndex) { - break; // The sample is somewhere in the previous page. - } + left = vshrq_n_s32(left, 16); + right = vshrq_n_s32(right, 16); + drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); + } - // At this point we know the sample is not in the previous page. It could possibly be in this page. For simplicity we - // disregard any pages that do not begin a fresh packet. - if ((oggbs->currentPageHeader.headerType & 0x01) == 0) { // <-- Is it a fresh page? - if (oggbs->currentPageHeader.segmentTable[0] >= 2) { - drflac_uint8 firstBytesInPage[2]; - firstBytesInPage[0] = oggbs->pageData[0]; - firstBytesInPage[1] = oggbs->pageData[1]; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; - if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) { // <-- Does the page begin with a frame's sync code? - runningGranulePosition = oggbs->currentPageHeader.granulePosition*pFlac->channels; - } + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); - continue; - } + pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) >> 1) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) >> 1) >> 16); } - } + } else { + int32x4_t shift4; + shift -= 1; + shift4 = vdupq_n_s32(shift); - // We found the page that that is closest to the sample, so now we need to find it. The first thing to do is seek to the - // start of that page. In the loop above we checked that it was a fresh page which means this page is also the start of - // a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until - // we find the one containing the target sample. - if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, drflac_seek_origin_start)) { - return DRFLAC_FALSE; - } - if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { - return DRFLAC_FALSE; - } + for (i = 0; i < frameCount4; ++i) { + int32x4_t mid; + int32x4_t side; + int32x4_t left; + int32x4_t right; + mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4); + side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4); - // At this point we'll be sitting on the first byte of the frame header of the first frame in the page. We just keep - // looping over these frames until we find the one containing the sample we're after. - drflac_uint64 runningSampleCount = runningGranulePosition; - for (;;) { - // There are two ways to find the sample and seek past irrelevant frames: - // 1) Use the native FLAC decoder. - // 2) Use Ogg's framing system. - // - // Both of these options have their own pros and cons. Using the native FLAC decoder is slower because it needs to - // do a full decode of the frame. Using Ogg's framing system is faster, but more complicated and involves some code - // duplication for the decoding of frame headers. - // - // Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg - // bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the - // standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks - // the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read - // using the native FLAC decoding APIs, such as drflac__read_next_frame_header(), need to be re-implemented so as to - // avoid the use of the drflac_bs object. - // - // Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons: - // 1) Seeking is already partially accelerated using Ogg's paging system in the code block above. - // 2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon. - // 3) Simplicity. - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { - return DRFLAC_FALSE; + mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1))); + + left = vshlq_s32(vaddq_s32(mid, side), shift4); + right = vshlq_s32(vsubq_s32(mid, side), shift4); + + left = vshrq_n_s32(left, 16); + right = vshrq_n_s32(right, 16); + + drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); } - drflac_uint64 firstSampleInFrame = 0; - drflac_uint64 lastSampleInFrame = 0; - drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, &lastSampleInFrame); + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; - drflac_uint64 sampleCountInThisFrame = (lastSampleInFrame - firstSampleInFrame) + 1; - if (sampleIndex < (runningSampleCount + sampleCountInThisFrame)) { - // The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend - // it never existed and keep iterating. - drflac_result result = drflac__decode_frame(pFlac); - if (result == DRFLAC_SUCCESS) { - // The frame is valid. We just need to skip over some samples to ensure it's sample-exact. - drflac_uint64 samplesToDecode = (size_t)(sampleIndex - runningSampleCount); // <-- Safe cast because the maximum number of samples in a frame is 65535. - if (samplesToDecode == 0) { - return DRFLAC_TRUE; - } - return drflac_read_s32(pFlac, samplesToDecode, NULL) != 0; // <-- If this fails, something bad has happened (it should never fail). - } else { - if (result == DRFLAC_CRC_MISMATCH) { - continue; // CRC mismatch. Pretend this frame never existed. - } else { - return DRFLAC_FALSE; - } - } - } else { - // It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this - // frame never existed and leave the running sample count untouched. - drflac_result result = drflac__seek_to_next_frame(pFlac); - if (result == DRFLAC_SUCCESS) { - runningSampleCount += sampleCountInThisFrame; - } else { - if (result == DRFLAC_CRC_MISMATCH) { - continue; // CRC mismatch. Pretend this frame never existed. - } else { - return DRFLAC_FALSE; - } - } + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16); } } } +#endif +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s16__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s16__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} -drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed) + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) { - // Pre: The bit stream should be sitting just past the 4-byte OggS capture pattern. - (void)relaxed; + for (drflac_uint64 i = 0; i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) >> 16); + } +} +#endif - pInit->container = drflac_container_ogg; - pInit->oggFirstBytePos = 0; +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; - // We'll get here if the first 4 bytes of the stream were the OggS capture pattern, however it doesn't necessarily mean the - // stream includes FLAC encoded audio. To check for this we need to scan the beginning-of-stream page markers and check if - // any match the FLAC specification. Important to keep in mind that the stream may be multiplexed. - drflac_ogg_page_header header; + int shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + int shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); - drflac_uint32 crc32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32; - drflac_uint32 bytesRead = 0; - if (drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { - return DRFLAC_FALSE; - } - pInit->runningFilePos += bytesRead; + for (i = 0; i < frameCount4; ++i) { + drflac_int32 tempL0 = pInputSamples0[i*4+0] << shift0; + drflac_int32 tempL1 = pInputSamples0[i*4+1] << shift0; + drflac_int32 tempL2 = pInputSamples0[i*4+2] << shift0; + drflac_int32 tempL3 = pInputSamples0[i*4+3] << shift0; - for (;;) { - // Break if we're past the beginning of stream page. - if ((header.headerType & 0x02) == 0) { - return DRFLAC_FALSE; - } + drflac_int32 tempR0 = pInputSamples1[i*4+0] << shift1; + drflac_int32 tempR1 = pInputSamples1[i*4+1] << shift1; + drflac_int32 tempR2 = pInputSamples1[i*4+2] << shift1; + drflac_int32 tempR3 = pInputSamples1[i*4+3] << shift1; + tempL0 >>= 16; + tempL1 >>= 16; + tempL2 >>= 16; + tempL3 >>= 16; - // Check if it's a FLAC header. - int pageBodySize = drflac_ogg__get_page_body_size(&header); - if (pageBodySize == 51) { // 51 = the lacing value of the FLAC header packet. - // It could be a FLAC page... - drflac_uint32 bytesRemainingInPage = pageBodySize; + tempR0 >>= 16; + tempR1 >>= 16; + tempR2 >>= 16; + tempR3 >>= 16; - drflac_uint8 packetType; - if (onRead(pUserData, &packetType, 1) != 1) { - return DRFLAC_FALSE; - } + pOutputSamples[i*8+0] = (drflac_int16)tempL0; + pOutputSamples[i*8+1] = (drflac_int16)tempR0; + pOutputSamples[i*8+2] = (drflac_int16)tempL1; + pOutputSamples[i*8+3] = (drflac_int16)tempR1; + pOutputSamples[i*8+4] = (drflac_int16)tempL2; + pOutputSamples[i*8+5] = (drflac_int16)tempR2; + pOutputSamples[i*8+6] = (drflac_int16)tempL3; + pOutputSamples[i*8+7] = (drflac_int16)tempR3; + } - bytesRemainingInPage -= 1; - if (packetType == 0x7F) { - // Increasingly more likely to be a FLAC page... - drflac_uint8 sig[4]; - if (onRead(pUserData, sig, 4) != 4) { - return DRFLAC_FALSE; - } + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0[i] << shift0) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1[i] << shift1) >> 16); + } +} - bytesRemainingInPage -= 4; - if (sig[0] == 'F' && sig[1] == 'L' && sig[2] == 'A' && sig[3] == 'C') { - // Almost certainly a FLAC page... - drflac_uint8 mappingVersion[2]; - if (onRead(pUserData, mappingVersion, 2) != 2) { - return DRFLAC_FALSE; - } +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; - if (mappingVersion[0] != 1) { - return DRFLAC_FALSE; // Only supporting version 1.x of the Ogg mapping. - } + drflac_int32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_int32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); - // The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to - // be handling it in a generic way based on the serial number and packet types. - if (!onSeek(pUserData, 2, drflac_seek_origin_current)) { - return DRFLAC_FALSE; - } + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); - // Expecting the native FLAC signature "fLaC". - if (onRead(pUserData, sig, 4) != 4) { - return DRFLAC_FALSE; - } + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); - if (sig[0] == 'f' && sig[1] == 'L' && sig[2] == 'a' && sig[3] == 'C') { - // The remaining data in the page should be the STREAMINFO block. - drflac_uint8 isLastBlock; - drflac_uint8 blockType; - drflac_uint32 blockSize; - if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) { - return DRFLAC_FALSE; - } + /* At this point we have results. We can now pack and interleave these into a single __m128i object and then store the in the output buffer. */ + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); + } - if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) { - return DRFLAC_FALSE; // Invalid block type. First block must be the STREAMINFO block. - } + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0[i] << shift0) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1[i] << shift1) >> 16); + } +} +#endif - drflac_streaminfo streaminfo; - if (drflac__read_streaminfo(onRead, pUserData, &streaminfo)) { - // Success! - pInit->hasStreamInfoBlock = DRFLAC_TRUE; - pInit->sampleRate = streaminfo.sampleRate; - pInit->channels = streaminfo.channels; - pInit->bitsPerSample = streaminfo.bitsPerSample; - pInit->totalSampleCount = streaminfo.totalSampleCount; - pInit->maxBlockSize = streaminfo.maxBlockSize; - pInit->hasMetadataBlocks = !isLastBlock; +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; - if (onMeta) { - drflac_metadata metadata; - metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO; - metadata.pRawData = NULL; - metadata.rawDataSize = 0; - metadata.data.streaminfo = streaminfo; - onMeta(pUserDataMD, &metadata); - } + drflac_int32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_int32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); - pInit->runningFilePos += pageBodySize; - pInit->oggFirstBytePos = pInit->runningFilePos - 79; // Subtracting 79 will place us right on top of the "OggS" identifier of the FLAC bos page. - pInit->oggSerial = header.serialNumber; - pInit->oggBosHeader = header; - break; - } else { - // Failed to read STREAMINFO block. Aww, so close... - return DRFLAC_FALSE; - } - } else { - // Invalid file. - return DRFLAC_FALSE; - } - } else { - // Not a FLAC header. Skip it. - if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) { - return DRFLAC_FALSE; - } - } - } else { - // Not a FLAC header. Seek past the entire page and move on to the next. - if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) { - return DRFLAC_FALSE; - } - } - } else { - if (!onSeek(pUserData, pageBodySize, drflac_seek_origin_current)) { - return DRFLAC_FALSE; - } - } + int32x4_t shift0_4 = vdupq_n_s32(shift0); + int32x4_t shift1_4 = vdupq_n_s32(shift1); - pInit->runningFilePos += pageBodySize; + for (i = 0; i < frameCount4; ++i) { + int32x4_t left; + int32x4_t right; + left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); + right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); - // Read the header of the next page. - if (drflac_ogg__read_page_header(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { - return DRFLAC_FALSE; - } - pInit->runningFilePos += bytesRead; - } + left = vshrq_n_s32(left, 16); + right = vshrq_n_s32(right, 16); + drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); + } - // If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next - // packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialization phase for Ogg is to create the - // Ogg bistream object. - pInit->hasMetadataBlocks = DRFLAC_TRUE; // <-- Always have at least VORBIS_COMMENT metadata block. - return DRFLAC_TRUE; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0[i] << shift0) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1[i] << shift1) >> 16); + } } #endif -drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) { - if (pInit == NULL || onRead == NULL || onSeek == NULL) { - return DRFLAC_FALSE; +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_s16__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif } +} - drflac_zero_memory(pInit, sizeof(*pInit)); - pInit->onRead = onRead; - pInit->onSeek = onSeek; - pInit->onMeta = onMeta; - pInit->container = container; - pInit->pUserData = pUserData; - pInit->pUserDataMD = pUserDataMD; - - pInit->bs.onRead = onRead; - pInit->bs.onSeek = onSeek; - pInit->bs.pUserData = pUserData; - drflac__reset_cache(&pInit->bs); - +drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut) +{ + drflac_uint64 framesRead; + drflac_int32 unusedBitsPerSample; - // If the container is explicitly defined then we can try opening in relaxed mode. - drflac_bool32 relaxed = container != drflac_container_unknown; + if (pFlac == NULL || framesToRead == 0) { + return 0; + } - drflac_uint8 id[4]; + if (pBufferOut == NULL) { + return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead); + } - // Skip over any ID3 tags. - for (;;) { - if (onRead(pUserData, id, 4) != 4) { - return DRFLAC_FALSE; // Ran out of data. - } - pInit->runningFilePos += 4; + unusedBitsPerSample = 32 - pFlac->bitsPerSample; - if (id[0] == 'I' && id[1] == 'D' && id[2] == '3') { - drflac_uint8 header[6]; - if (onRead(pUserData, header, 6) != 6) { - return DRFLAC_FALSE; // Ran out of data. + framesRead = 0; + while (framesToRead > 0) { + /* If we've run out of samples in this frame, go to the next. */ + if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + break; /* Couldn't read the next frame, so just break from the loop and return. */ } - pInit->runningFilePos += 6; + } else { + unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining; + drflac_uint64 frameCountThisIteration = framesToRead; - drflac_uint8 flags = header[1]; - drflac_uint32 headerSize; - drflac_copy_memory(&headerSize, header+2, 4); - headerSize = drflac__unsynchsafe_32(drflac__be2host_32(headerSize)); - if (flags & 0x10) { - headerSize += 10; + if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) { + frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining; } - if (!onSeek(pUserData, headerSize, drflac_seek_origin_current)) { - return DRFLAC_FALSE; // Failed to seek past the tag. + if (channelCount == 2) { + const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame; + const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame; + + switch (pFlac->currentFLACFrame.header.channelAssignment) + { + case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: + { + drflac_read_pcm_frames_s16__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: + { + drflac_read_pcm_frames_s16__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: + { + drflac_read_pcm_frames_s16__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: + default: + { + drflac_read_pcm_frames_s16__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + } + } else { + /* Generic interleaving. */ + drflac_uint64 i; + for (i = 0; i < frameCountThisIteration; ++i) { + unsigned int j; + for (j = 0; j < channelCount; ++j) { + drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample)); + pBufferOut[(i*channelCount)+j] = (drflac_int16)(sampleS32 >> 16); + } + } } - pInit->runningFilePos += headerSize; - } else { - break; + + framesRead += frameCountThisIteration; + pBufferOut += frameCountThisIteration * channelCount; + framesToRead -= frameCountThisIteration; + pFlac->currentPCMFrame += frameCountThisIteration; + pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration; } } - if (id[0] == 'f' && id[1] == 'L' && id[2] == 'a' && id[3] == 'C') { - return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); - } -#ifndef DR_FLAC_NO_OGG - if (id[0] == 'O' && id[1] == 'g' && id[2] == 'g' && id[3] == 'S') { - return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); - } -#endif + return framesRead; +} - // If we get here it means we likely don't have a header. Try opening in relaxed mode, if applicable. - if (relaxed) { - if (container == drflac_container_native) { - return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); - } -#ifndef DR_FLAC_NO_OGG - if (container == drflac_container_ogg) { - return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); - } -#endif - } - // Unsupported container. - return DRFLAC_FALSE; +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + for (i = 0; i < frameCount; ++i) { + drflac_int32 left = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_int32 side = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + drflac_int32 right = left - side; + + pOutputSamples[i*2+0] = (float)(left / 2147483648.0); + pOutputSamples[i*2+1] = (float)(right / 2147483648.0); + } } +#endif -void drflac__init_from_info(drflac* pFlac, drflac_init_info* pInit) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) { - drflac_assert(pFlac != NULL); - drflac_assert(pInit != NULL); + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; - drflac_zero_memory(pFlac, sizeof(*pFlac)); - pFlac->bs = pInit->bs; - pFlac->onMeta = pInit->onMeta; - pFlac->pUserDataMD = pInit->pUserDataMD; - pFlac->maxBlockSize = pInit->maxBlockSize; - pFlac->sampleRate = pInit->sampleRate; - pFlac->channels = (drflac_uint8)pInit->channels; - pFlac->bitsPerSample = (drflac_uint8)pInit->bitsPerSample; - pFlac->totalSampleCount = pInit->totalSampleCount; - pFlac->container = pInit->container; -} + float factor = 1 / 2147483648.0; -drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD) -{ -#ifndef DRFLAC_NO_CPUID - // CPU support first. - drflac__init_cpu_caps(); -#endif + drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + for (i = 0; i < frameCount4; ++i) { + drflac_int32 left0 = pInputSamples0[i*4+0] << shift0; + drflac_int32 left1 = pInputSamples0[i*4+1] << shift0; + drflac_int32 left2 = pInputSamples0[i*4+2] << shift0; + drflac_int32 left3 = pInputSamples0[i*4+3] << shift0; - drflac_init_info init; - if (!drflac__init_private(&init, onRead, onSeek, onMeta, container, pUserData, pUserDataMD)) { - return NULL; + drflac_int32 side0 = pInputSamples1[i*4+0] << shift1; + drflac_int32 side1 = pInputSamples1[i*4+1] << shift1; + drflac_int32 side2 = pInputSamples1[i*4+2] << shift1; + drflac_int32 side3 = pInputSamples1[i*4+3] << shift1; + + drflac_int32 right0 = left0 - side0; + drflac_int32 right1 = left1 - side1; + drflac_int32 right2 = left2 - side2; + drflac_int32 right3 = left3 - side3; + + pOutputSamples[i*8+0] = left0 * factor; + pOutputSamples[i*8+1] = right0 * factor; + pOutputSamples[i*8+2] = left1 * factor; + pOutputSamples[i*8+3] = right1 * factor; + pOutputSamples[i*8+4] = left2 * factor; + pOutputSamples[i*8+5] = right2 * factor; + pOutputSamples[i*8+6] = left3 * factor; + pOutputSamples[i*8+7] = right3 * factor; } - // The size of the allocation for the drflac object needs to be large enough to fit the following: - // 1) The main members of the drflac structure - // 2) A block of memory large enough to store the decoded samples of the largest frame in the stream - // 3) If the container is Ogg, a drflac_oggbs object - // - // The complicated part of the allocation is making sure there's enough room the decoded samples, taking into consideration - // the different SIMD instruction sets. - drflac_uint32 allocationSize = sizeof(drflac); + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 left = pInputSamples0[i] << shift0; + drflac_int32 side = pInputSamples1[i] << shift1; + drflac_int32 right = left - side; - // The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector - // we are supporting. - drflac_uint32 wholeSIMDVectorCountPerChannel; - if ((init.maxBlockSize % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) { - wholeSIMDVectorCountPerChannel = (init.maxBlockSize / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))); - } else { - wholeSIMDVectorCountPerChannel = (init.maxBlockSize / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1; + pOutputSamples[i*2+0] = (float)(left * factor); + pOutputSamples[i*2+1] = (float)(right * factor); } +} - drflac_uint32 decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels; +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 frameCount4; + drflac_int32 shift0; + drflac_int32 shift1; + drflac_uint64 i; + __m128 factor; - allocationSize += decodedSamplesAllocationSize; - allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE; // Allocate extra bytes to ensure we have enough for alignment. + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); -#ifndef DR_FLAC_NO_OGG - // There's additional data required for Ogg streams. - drflac_uint32 oggbsAllocationSize = 0; - if (init.container == drflac_container_ogg) { - oggbsAllocationSize = sizeof(drflac_oggbs); - allocationSize += oggbsAllocationSize; + frameCount4 = frameCount >> 2; + + factor = _mm_set1_ps(1.0f / 8388608.0f); + shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i right = _mm_sub_epi32(left, side); + __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(left), factor); + __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor); + + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); } - drflac_oggbs oggbs; - drflac_zero_memory(&oggbs, sizeof(oggbs)); - if (init.container == drflac_container_ogg) { - oggbs.onRead = onRead; - oggbs.onSeek = onSeek; - oggbs.pUserData = pUserData; - oggbs.currentBytePos = init.oggFirstBytePos; - oggbs.firstBytePos = init.oggFirstBytePos; - oggbs.serialNumber = init.oggSerial; - oggbs.bosPageHeader = init.oggBosHeader; - oggbs.bytesRemainingInPage = 0; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 left = pInputSamples0[i] << shift0; + drflac_int32 side = pInputSamples1[i] << shift1; + drflac_int32 right = left - side; + + pOutputSamples[i*2+0] = (float)(left / 8388608.0f); + pOutputSamples[i*2+1] = (float)(right / 8388608.0f); } +} #endif - // This part is a bit awkward. We need to load the seektable so that it can be referenced in-memory, but I want the drflac object to - // consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading - // and decoding the metadata. - drflac_uint64 firstFramePos = 42; // <-- We know we are at byte 42 at this point. - drflac_uint64 seektablePos = 0; - drflac_uint32 seektableSize = 0; - if (init.hasMetadataBlocks) { - drflac_read_proc onReadOverride = onRead; - drflac_seek_proc onSeekOverride = onSeek; - void* pUserDataOverride = pUserData; +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 frameCount4; + drflac_int32 shift0; + drflac_int32 shift1; + drflac_uint64 i; + float32x4_t factor4; + int32x4_t shift0_4; + int32x4_t shift1_4; -#ifndef DR_FLAC_NO_OGG - if (init.container == drflac_container_ogg) { - onReadOverride = drflac__on_read_ogg; - onSeekOverride = drflac__on_seek_ogg; - pUserDataOverride = (void*)&oggbs; - } -#endif + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); - if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize)) { - return NULL; - } + frameCount4 = frameCount >> 2; - allocationSize += seektableSize; - } + factor4 = vdupq_n_f32(1.0f / 8388608.0f); + shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; - drflac* pFlac = (drflac*)DRFLAC_MALLOC(allocationSize); - drflac__init_from_info(pFlac, &init); - pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE); + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); -#ifndef DR_FLAC_NO_OGG - if (init.container == drflac_container_ogg) { - drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize); - *pInternalOggbs = oggbs; + for (i = 0; i < frameCount4; ++i) { + int32x4_t left; + int32x4_t side; + int32x4_t right; + float32x4_t leftf; + float32x4_t rightf; - // The Ogg bistream needs to be layered on top of the original bitstream. - pFlac->bs.onRead = drflac__on_read_ogg; - pFlac->bs.onSeek = drflac__on_seek_ogg; - pFlac->bs.pUserData = (void*)pInternalOggbs; - pFlac->_oggbs = (void*)pInternalOggbs; + left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); + side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); + right = vsubq_s32(left, side); + leftf = vmulq_f32(vcvtq_f32_s32(left), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(right), factor4); + + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); } -#endif - pFlac->firstFramePos = firstFramePos; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 left = pInputSamples0[i] << shift0; + drflac_int32 side = pInputSamples1[i] << shift1; + drflac_int32 right = left - side; - // NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now. -#ifndef DR_FLAC_NO_OGG - if (init.container == drflac_container_ogg) - { - pFlac->pSeekpoints = NULL; - pFlac->seekpointCount = 0; + pOutputSamples[i*2+0] = (float)(left / 8388608.0f); + pOutputSamples[i*2+1] = (float)(right / 8388608.0f); } - else +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else #endif { - // If we have a seektable we need to load it now, making sure we move back to where we were previously. - if (seektablePos != 0) { - pFlac->seekpointCount = seektableSize / sizeof(*pFlac->pSeekpoints); - pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize); + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_f32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_f32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} - // Seek to the seektable, then just read directly into our seektable buffer. - if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) { - if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints, seektableSize) == seektableSize) { - // Endian swap. - for (drflac_uint32 iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) { - pFlac->pSeekpoints[iSeekpoint].firstSample = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstSample); - pFlac->pSeekpoints[iSeekpoint].frameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].frameOffset); - pFlac->pSeekpoints[iSeekpoint].sampleCount = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].sampleCount); - } - } else { - // Failed to read the seektable. Pretend we don't have one. - pFlac->pSeekpoints = NULL; - pFlac->seekpointCount = 0; - } - // We need to seek back to where we were. If this fails it's a critical error. - if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFramePos, drflac_seek_origin_start)) { - DRFLAC_FREE(pFlac); - return NULL; - } - } else { - // Failed to seek to the seektable. Ominous sign, but for now we can just pretend we don't have one. - pFlac->pSeekpoints = NULL; - pFlac->seekpointCount = 0; - } - } +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + for (i = 0; i < frameCount; ++i) { + drflac_int32 side = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_int32 right = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + drflac_int32 left = right + side; + + pOutputSamples[i*2+0] = (float)(left / 2147483648.0); + pOutputSamples[i*2+1] = (float)(right / 2147483648.0); } +} +#endif +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + float factor = 1 / 2147483648.0; - // If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode - // the first frame. - if (!init.hasStreamInfoBlock) { - pFlac->currentFrame.header = init.firstFrameHeader; - do - { - drflac_result result = drflac__decode_frame(pFlac); - if (result == DRFLAC_SUCCESS) { - break; - } else { - if (result == DRFLAC_CRC_MISMATCH) { - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { - DRFLAC_FREE(pFlac); - return NULL; - } - continue; - } else { - DRFLAC_FREE(pFlac); - return NULL; - } - } - } while (1); - } + drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + for (i = 0; i < frameCount4; ++i) { + drflac_int32 side0 = pInputSamples0[i*4+0] << shift0; + drflac_int32 side1 = pInputSamples0[i*4+1] << shift0; + drflac_int32 side2 = pInputSamples0[i*4+2] << shift0; + drflac_int32 side3 = pInputSamples0[i*4+3] << shift0; - return pFlac; -} + drflac_int32 right0 = pInputSamples1[i*4+0] << shift1; + drflac_int32 right1 = pInputSamples1[i*4+1] << shift1; + drflac_int32 right2 = pInputSamples1[i*4+2] << shift1; + drflac_int32 right3 = pInputSamples1[i*4+3] << shift1; + drflac_int32 left0 = right0 + side0; + drflac_int32 left1 = right1 + side1; + drflac_int32 left2 = right2 + side2; + drflac_int32 left3 = right3 + side3; + pOutputSamples[i*8+0] = left0 * factor; + pOutputSamples[i*8+1] = right0 * factor; + pOutputSamples[i*8+2] = left1 * factor; + pOutputSamples[i*8+3] = right1 * factor; + pOutputSamples[i*8+4] = left2 * factor; + pOutputSamples[i*8+5] = right2 * factor; + pOutputSamples[i*8+6] = left3 * factor; + pOutputSamples[i*8+7] = right3 * factor; + } -#ifndef DR_FLAC_NO_STDIO -#include <stdio.h> + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 side = pInputSamples0[i] << shift0; + drflac_int32 right = pInputSamples1[i] << shift1; + drflac_int32 left = right + side; -static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead) -{ - return fread(bufferOut, 1, bytesToRead, (FILE*)pUserData); + pOutputSamples[i*2+0] = (float)(left * factor); + pOutputSamples[i*2+1] = (float)(right * factor); + } } -static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin) +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) { - drflac_assert(offset >= 0); // <-- Never seek backwards. + drflac_uint64 frameCount4; + drflac_int32 shift0; + drflac_int32 shift1; + drflac_uint64 i; + __m128 factor; - return fseek((FILE*)pUserData, offset, (origin == drflac_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0; -} + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); -static FILE* drflac__fopen(const char* filename) -{ - FILE* pFile; -#ifdef _MSC_VER - if (fopen_s(&pFile, filename, "rb") != 0) { - return NULL; - } -#else - pFile = fopen(filename, "rb"); - if (pFile == NULL) { - return NULL; - } -#endif + frameCount4 = frameCount >> 2; - return pFile; -} + factor = _mm_set1_ps(1.0f / 8388608.0f); + shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + for (i = 0; i < frameCount4; ++i) { + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i left = _mm_add_epi32(right, side); + __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(left), factor); + __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor); -drflac* drflac_open_file(const char* filename) -{ - FILE* file = drflac__fopen(filename); - if (file == NULL) { - return NULL; + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); } - drflac* pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)file); - if (pFlac == NULL) { - fclose(file); - return NULL; - } + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 side = pInputSamples0[i] << shift0; + drflac_int32 right = pInputSamples1[i] << shift1; + drflac_int32 left = right + side; - return pFlac; + pOutputSamples[i*2+0] = (float)(left / 8388608.0f); + pOutputSamples[i*2+1] = (float)(right / 8388608.0f); + } } +#endif -drflac* drflac_open_file_with_metadata(const char* filename, drflac_meta_proc onMeta, void* pUserData) +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) { - FILE* file = drflac__fopen(filename); - if (file == NULL) { - return NULL; - } + drflac_uint64 frameCount4; + drflac_int32 shift0; + drflac_int32 shift1; + drflac_uint64 i; + float32x4_t factor4; + int32x4_t shift0_4; + int32x4_t shift1_4; - drflac* pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)file, pUserData); - if (pFlac == NULL) { - fclose(file); - return pFlac; - } + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); - return pFlac; -} -#endif //DR_FLAC_NO_STDIO + frameCount4 = frameCount >> 2; -static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead) -{ - drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData; - drflac_assert(memoryStream != NULL); - drflac_assert(memoryStream->dataSize >= memoryStream->currentReadPos); + factor4 = vdupq_n_f32(1.0f / 8388608.0f); - size_t bytesRemaining = memoryStream->dataSize - memoryStream->currentReadPos; - if (bytesToRead > bytesRemaining) { - bytesToRead = bytesRemaining; + shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + int32x4_t side; + int32x4_t right; + int32x4_t left; + float32x4_t leftf; + float32x4_t rightf; + + side = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); + right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); + left = vaddq_s32(right, side); + leftf = vmulq_f32(vcvtq_f32_s32(left), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(right), factor4); + + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); } - if (bytesToRead > 0) { - drflac_copy_memory(bufferOut, memoryStream->data + memoryStream->currentReadPos, bytesToRead); - memoryStream->currentReadPos += bytesToRead; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 side = pInputSamples0[i] << shift0; + drflac_int32 right = pInputSamples1[i] << shift1; + drflac_int32 left = right + side; + + pOutputSamples[i*2+0] = (float)(left / 8388608.0f); + pOutputSamples[i*2+1] = (float)(right / 8388608.0f); } +} +#endif - return bytesToRead; +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_f32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_f32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } } -static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin) + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) { - drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData; - drflac_assert(memoryStream != NULL); - drflac_assert(offset >= 0); // <-- Never seek backwards. + for (drflac_uint64 i = 0; i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; - if (offset > (drflac_int64)memoryStream->dataSize) { - return DRFLAC_FALSE; + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (float)((((mid + side) >> 1) << (unusedBitsPerSample)) / 2147483648.0); + pOutputSamples[i*2+1] = (float)((((mid - side) >> 1) << (unusedBitsPerSample)) / 2147483648.0); } +} +#endif - if (origin == drflac_seek_origin_current) { - if (memoryStream->currentReadPos + offset <= memoryStream->dataSize) { - memoryStream->currentReadPos += offset; - } else { - return DRFLAC_FALSE; // Trying to seek too far forward. +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + + float factor = 1 / 2147483648.0; + + int shift = unusedBitsPerSample; + if (shift > 0) { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + drflac_int32 temp0L; + drflac_int32 temp1L; + drflac_int32 temp2L; + drflac_int32 temp3L; + drflac_int32 temp0R; + drflac_int32 temp1R; + drflac_int32 temp2R; + drflac_int32 temp3R; + + drflac_int32 mid0 = pInputSamples0[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid1 = pInputSamples0[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid2 = pInputSamples0[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid3 = pInputSamples0[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_int32 side0 = pInputSamples1[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side1 = pInputSamples1[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side2 = pInputSamples1[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side3 = pInputSamples1[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01); + mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01); + mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01); + mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01); + + temp0L = ((mid0 + side0) << shift); + temp1L = ((mid1 + side1) << shift); + temp2L = ((mid2 + side2) << shift); + temp3L = ((mid3 + side3) << shift); + + temp0R = ((mid0 - side0) << shift); + temp1R = ((mid1 - side1) << shift); + temp2R = ((mid2 - side2) << shift); + temp3R = ((mid3 - side3) << shift); + + pOutputSamples[i*8+0] = (float)(temp0L * factor); + pOutputSamples[i*8+1] = (float)(temp0R * factor); + pOutputSamples[i*8+2] = (float)(temp1L * factor); + pOutputSamples[i*8+3] = (float)(temp1R * factor); + pOutputSamples[i*8+4] = (float)(temp2L * factor); + pOutputSamples[i*8+5] = (float)(temp2R * factor); + pOutputSamples[i*8+6] = (float)(temp3L * factor); + pOutputSamples[i*8+7] = (float)(temp3R * factor); } } else { - if ((drflac_uint32)offset <= memoryStream->dataSize) { - memoryStream->currentReadPos = offset; - } else { - return DRFLAC_FALSE; // Trying to seek too far forward. + for (i = 0; i < frameCount4; ++i) { + drflac_int32 temp0L; + drflac_int32 temp1L; + drflac_int32 temp2L; + drflac_int32 temp3L; + drflac_int32 temp0R; + drflac_int32 temp1R; + drflac_int32 temp2R; + drflac_int32 temp3R; + + drflac_int32 mid0 = pInputSamples0[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid1 = pInputSamples0[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid2 = pInputSamples0[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 mid3 = pInputSamples0[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_int32 side0 = pInputSamples1[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side1 = pInputSamples1[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side2 = pInputSamples1[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_int32 side3 = pInputSamples1[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01); + mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01); + mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01); + mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01); + + temp0L = ((mid0 + side0) >> 1); + temp1L = ((mid1 + side1) >> 1); + temp2L = ((mid2 + side2) >> 1); + temp3L = ((mid3 + side3) >> 1); + + temp0R = ((mid0 - side0) >> 1); + temp1R = ((mid1 - side1) >> 1); + temp2R = ((mid2 - side2) >> 1); + temp3R = ((mid3 - side3) >> 1); + + pOutputSamples[i*8+0] = (float)(temp0L * factor); + pOutputSamples[i*8+1] = (float)(temp0R * factor); + pOutputSamples[i*8+2] = (float)(temp1L * factor); + pOutputSamples[i*8+3] = (float)(temp1R * factor); + pOutputSamples[i*8+4] = (float)(temp2L * factor); + pOutputSamples[i*8+5] = (float)(temp2R * factor); + pOutputSamples[i*8+6] = (float)(temp3L * factor); + pOutputSamples[i*8+7] = (float)(temp3R * factor); } } - return DRFLAC_TRUE; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + int mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + int side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (float)((((mid + side) >> 1) << unusedBitsPerSample) * factor); + pOutputSamples[i*2+1] = (float)((((mid - side) >> 1) << unusedBitsPerSample) * factor); + } } -drflac* drflac_open_memory(const void* data, size_t dataSize) +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) { - drflac__memory_stream memoryStream; - memoryStream.data = (const unsigned char*)data; - memoryStream.dataSize = dataSize; - memoryStream.currentReadPos = 0; - drflac* pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, &memoryStream); - if (pFlac == NULL) { - return NULL; - } + drflac_uint64 i; + drflac_uint64 frameCount4; + float factor; + drflac_int32 shift; + __m128 factor128; - pFlac->memoryStream = memoryStream; + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); - // This is an awful hack... -#ifndef DR_FLAC_NO_OGG - if (pFlac->container == drflac_container_ogg) - { - drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; - oggbs->pUserData = &pFlac->memoryStream; - } - else -#endif - { - pFlac->bs.pUserData = &pFlac->memoryStream; - } + frameCount4 = frameCount >> 2; - return pFlac; -} + factor = 1.0f / 8388608.0f; + factor128 = _mm_set1_ps(1.0f / 8388608.0f); -drflac* drflac_open_memory_with_metadata(const void* data, size_t dataSize, drflac_meta_proc onMeta, void* pUserData) -{ - drflac__memory_stream memoryStream; - memoryStream.data = (const unsigned char*)data; - memoryStream.dataSize = dataSize; - memoryStream.currentReadPos = 0; - drflac* pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData); - if (pFlac == NULL) { - return NULL; - } + shift = unusedBitsPerSample - 8; + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i tempL; + __m128i tempR; + __m128 leftf; + __m128 rightf; - pFlac->memoryStream = memoryStream; + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); - // This is an awful hack... -#ifndef DR_FLAC_NO_OGG - if (pFlac->container == drflac_container_ogg) - { - drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; - oggbs->pUserData = &pFlac->memoryStream; - } - else -#endif - { - pFlac->bs.pUserData = &pFlac->memoryStream; - } + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + tempL = _mm_srai_epi32(_mm_add_epi32(mid, side), 1); + tempR = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1); + + leftf = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128); + rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128); + + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (float)(((mid + side) >> 1) * factor); + pOutputSamples[i*2+1] = (float)(((mid - side) >> 1) * factor); + } + } else { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i tempL; + __m128i tempR; + __m128 leftf; + __m128 rightf; - return pFlac; -} + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + tempL = _mm_slli_epi32(_mm_add_epi32(mid, side), shift); + tempR = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift); -drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData) -{ - return drflac_open_with_metadata_private(onRead, onSeek, NULL, drflac_container_unknown, pUserData, pUserData); -} -drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData) -{ - return drflac_open_with_metadata_private(onRead, onSeek, NULL, container, pUserData, pUserData); -} + leftf = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128); + rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128); -drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData) -{ - return drflac_open_with_metadata_private(onRead, onSeek, onMeta, drflac_container_unknown, pUserData, pUserData); -} -drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData) -{ - return drflac_open_with_metadata_private(onRead, onSeek, onMeta, container, pUserData, pUserData); -} + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); + } -void drflac_close(drflac* pFlac) -{ - if (pFlac == NULL) { - return; - } + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; -#ifndef DR_FLAC_NO_STDIO - // If we opened the file with drflac_open_file() we will want to close the file handle. We can know whether or not drflac_open_file() - // was used by looking at the callbacks. - if (pFlac->bs.onRead == drflac__on_read_stdio) { - fclose((FILE*)pFlac->bs.pUserData); - } + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); -#ifndef DR_FLAC_NO_OGG - // Need to clean up Ogg streams a bit differently due to the way the bit streaming is chained. - if (pFlac->container == drflac_container_ogg) { - drflac_assert(pFlac->bs.onRead == drflac__on_read_ogg); - drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; - if (oggbs->onRead == drflac__on_read_stdio) { - fclose((FILE*)oggbs->pUserData); + pOutputSamples[i*2+0] = (float)(((mid + side) << shift) * factor); + pOutputSamples[i*2+1] = (float)(((mid - side) << shift) * factor); } } -#endif -#endif - - DRFLAC_FREE(pFlac); } +#endif -drflac_uint64 drflac__read_s32__misaligned(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int32* bufferOut) +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) { - unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment); + drflac_uint64 i; + drflac_uint64 frameCount4; + float factor; + drflac_int32 shift; + float32x4_t factor4; + int32x4_t shift4; + int32x4_t wbps0_4; /* Wasted Bits Per Sample */ + int32x4_t wbps1_4; /* Wasted Bits Per Sample */ - // We should never be calling this when the number of samples to read is >= the sample count. - drflac_assert(samplesToRead < channelCount); - drflac_assert(pFlac->currentFrame.samplesRemaining > 0 && samplesToRead <= pFlac->currentFrame.samplesRemaining); + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + frameCount4 = frameCount >> 2; - drflac_uint64 samplesRead = 0; - while (samplesToRead > 0) { - drflac_uint64 totalSamplesInFrame = pFlac->currentFrame.header.blockSize * channelCount; - drflac_uint64 samplesReadFromFrameSoFar = totalSamplesInFrame - pFlac->currentFrame.samplesRemaining; - drflac_uint64 channelIndex = samplesReadFromFrameSoFar % channelCount; + factor = 1.0f / 8388608.0f; + factor4 = vdupq_n_f32(factor); - drflac_uint64 nextSampleInFrame = samplesReadFromFrameSoFar / channelCount; + wbps0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + wbps1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); - int decodedSample = 0; - switch (pFlac->currentFrame.header.channelAssignment) - { - case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: - { - if (channelIndex == 0) { - decodedSample = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 0].wastedBitsPerSample; - } else { - int side = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 0].wastedBitsPerSample; - int left = pFlac->currentFrame.subframes[channelIndex - 1].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex - 1].wastedBitsPerSample; - decodedSample = left - side; - } - } break; + shift = unusedBitsPerSample - 8; + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + int32x4_t lefti; + int32x4_t righti; + float32x4_t leftf; + float32x4_t rightf; - case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: - { - if (channelIndex == 0) { - int side = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 0].wastedBitsPerSample; - int right = pFlac->currentFrame.subframes[channelIndex + 1].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 1].wastedBitsPerSample; - decodedSample = side + right; - } else { - decodedSample = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 0].wastedBitsPerSample; - } - } break; + int32x4_t mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbps0_4); + int32x4_t side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbps1_4); - case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: - { - int mid; - int side; - if (channelIndex == 0) { - mid = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 0].wastedBitsPerSample; - side = pFlac->currentFrame.subframes[channelIndex + 1].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 1].wastedBitsPerSample; - - mid = (((unsigned int)mid) << 1) | (side & 0x01); - decodedSample = (mid + side) >> 1; - } else { - mid = pFlac->currentFrame.subframes[channelIndex - 1].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex - 1].wastedBitsPerSample; - side = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 0].wastedBitsPerSample; + mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1))); - mid = (((unsigned int)mid) << 1) | (side & 0x01); - decodedSample = (mid - side) >> 1; - } - } break; + lefti = vshrq_n_s32(vaddq_s32(mid, side), 1); + righti = vshrq_n_s32(vsubq_s32(mid, side), 1); - case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: - default: - { - decodedSample = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 0].wastedBitsPerSample; - } break; + leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4); + + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); } + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; - decodedSample <<= (32 - pFlac->bitsPerSample); + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); - if (bufferOut) { - *bufferOut++ = decodedSample; + pOutputSamples[i*2+0] = (float)(((mid + side) >> 1) * factor); + pOutputSamples[i*2+1] = (float)(((mid - side) >> 1) * factor); } + } else { + shift -= 1; + shift4 = vdupq_n_s32(shift); + for (i = 0; i < frameCount4; ++i) { + int32x4_t mid; + int32x4_t side; + int32x4_t lefti; + int32x4_t righti; + float32x4_t leftf; + float32x4_t rightf; - samplesRead += 1; - pFlac->currentFrame.samplesRemaining -= 1; - samplesToRead -= 1; - } + mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbps0_4); + side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbps1_4); - return samplesRead; -} + mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1))); -drflac_uint64 drflac__seek_forward_by_samples(drflac* pFlac, drflac_uint64 samplesToRead) -{ - drflac_uint64 samplesRead = 0; - while (samplesToRead > 0) { - if (pFlac->currentFrame.samplesRemaining == 0) { - if (!drflac__read_and_decode_next_frame(pFlac)) { - break; // Couldn't read the next frame, so just break from the loop and return. - } - } else { - if (pFlac->currentFrame.samplesRemaining > samplesToRead) { - samplesRead += samplesToRead; - pFlac->currentFrame.samplesRemaining -= (drflac_uint32)samplesToRead; // <-- Safe cast. Will always be < currentFrame.samplesRemaining < 65536. - samplesToRead = 0; - } else { - samplesRead += pFlac->currentFrame.samplesRemaining; - samplesToRead -= pFlac->currentFrame.samplesRemaining; - pFlac->currentFrame.samplesRemaining = 0; - } + lefti = vshlq_s32(vaddq_s32(mid, side), shift4); + righti = vshlq_s32(vsubq_s32(mid, side), shift4); + + leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4); + + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); } - } - pFlac->currentSample += samplesRead; - return samplesRead; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (float)(((mid + side) << shift) * factor); + pOutputSamples[i*2+1] = (float)(((mid - side) << shift) * factor); + } + } } +#endif -drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int32* bufferOut) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) { - // Note that <bufferOut> is allowed to be null, in which case this will act like a seek. - if (pFlac == NULL || samplesToRead == 0) { - return 0; +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_f32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_f32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif } +} - if (bufferOut == NULL) { - return drflac__seek_forward_by_samples(pFlac, samplesToRead); +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + for (drflac_uint64 i = 0; i < frameCount; ++i) { + pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) / 2147483648.0); + pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) / 2147483648.0); } +} +#endif +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; - drflac_uint64 samplesRead = 0; - while (samplesToRead > 0) { - // If we've run out of samples in this frame, go to the next. - if (pFlac->currentFrame.samplesRemaining == 0) { - if (!drflac__read_and_decode_next_frame(pFlac)) { - break; // Couldn't read the next frame, so just break from the loop and return. - } - } else { - // Here is where we grab the samples and interleave them. + float factor = 1 / 2147483648.0; - unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment); - drflac_uint64 totalSamplesInFrame = pFlac->currentFrame.header.blockSize * channelCount; - drflac_uint64 samplesReadFromFrameSoFar = totalSamplesInFrame - pFlac->currentFrame.samplesRemaining; + drflac_int32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + drflac_int32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); - drflac_uint64 misalignedSampleCount = samplesReadFromFrameSoFar % channelCount; - if (misalignedSampleCount > 0) { - drflac_uint64 misalignedSamplesRead = drflac__read_s32__misaligned(pFlac, misalignedSampleCount, bufferOut); - samplesRead += misalignedSamplesRead; - samplesReadFromFrameSoFar += misalignedSamplesRead; - bufferOut += misalignedSamplesRead; - samplesToRead -= misalignedSamplesRead; - pFlac->currentSample += misalignedSamplesRead; - } + for (i = 0; i < frameCount4; ++i) { + drflac_int32 tempL0 = pInputSamples0[i*4+0] << shift0; + drflac_int32 tempL1 = pInputSamples0[i*4+1] << shift0; + drflac_int32 tempL2 = pInputSamples0[i*4+2] << shift0; + drflac_int32 tempL3 = pInputSamples0[i*4+3] << shift0; + drflac_int32 tempR0 = pInputSamples1[i*4+0] << shift1; + drflac_int32 tempR1 = pInputSamples1[i*4+1] << shift1; + drflac_int32 tempR2 = pInputSamples1[i*4+2] << shift1; + drflac_int32 tempR3 = pInputSamples1[i*4+3] << shift1; - drflac_uint64 alignedSampleCountPerChannel = samplesToRead / channelCount; - if (alignedSampleCountPerChannel > pFlac->currentFrame.samplesRemaining / channelCount) { - alignedSampleCountPerChannel = pFlac->currentFrame.samplesRemaining / channelCount; - } + pOutputSamples[i*8+0] = (float)(tempL0 * factor); + pOutputSamples[i*8+1] = (float)(tempR0 * factor); + pOutputSamples[i*8+2] = (float)(tempL1 * factor); + pOutputSamples[i*8+3] = (float)(tempR1 * factor); + pOutputSamples[i*8+4] = (float)(tempL2 * factor); + pOutputSamples[i*8+5] = (float)(tempR2 * factor); + pOutputSamples[i*8+6] = (float)(tempL3 * factor); + pOutputSamples[i*8+7] = (float)(tempR3 * factor); + } - drflac_uint64 firstAlignedSampleInFrame = samplesReadFromFrameSoFar / channelCount; - unsigned int unusedBitsPerSample = 32 - pFlac->bitsPerSample; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << shift0) * factor); + pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << shift1) * factor); + } +} - switch (pFlac->currentFrame.header.channelAssignment) - { - case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: - { - const drflac_int32* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame; - const drflac_int32* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame; +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; - for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) { - int left = pDecodedSamples0[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample); - int side = pDecodedSamples1[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample); - int right = left - side; + float factor = 1.0f / 8388608.0f; + __m128 factor128 = _mm_set1_ps(1.0f / 8388608.0f); - bufferOut[i*2+0] = left; - bufferOut[i*2+1] = right; - } - } break; + drflac_int32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + drflac_int32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; - case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: - { - const drflac_int32* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame; - const drflac_int32* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame; + for (i = 0; i < frameCount4; ++i) { + __m128i lefti; + __m128i righti; + __m128 leftf; + __m128 rightf; - for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) { - int side = pDecodedSamples0[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample); - int right = pDecodedSamples1[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample); - int left = right + side; + lefti = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + righti = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); - bufferOut[i*2+0] = left; - bufferOut[i*2+1] = right; - } - } break; + leftf = _mm_mul_ps(_mm_cvtepi32_ps(lefti), factor128); + rightf = _mm_mul_ps(_mm_cvtepi32_ps(righti), factor128); - case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: - { - const drflac_int32* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame; - const drflac_int32* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame; + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); + } - for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) { - int mid = pDecodedSamples0[i] << pFlac->currentFrame.subframes[0].wastedBitsPerSample; - int side = pDecodedSamples1[i] << pFlac->currentFrame.subframes[1].wastedBitsPerSample; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << shift0) * factor); + pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << shift1) * factor); + } +} +#endif - mid = (((drflac_uint32)mid) << 1) | (side & 0x01); +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; - bufferOut[i*2+0] = ((mid + side) >> 1) << (unusedBitsPerSample); - bufferOut[i*2+1] = ((mid - side) >> 1) << (unusedBitsPerSample); - } - } break; + float factor = 1.0f / 8388608.0f; + float32x4_t factor4 = vdupq_n_f32(factor); - case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: - default: - { - if (pFlac->currentFrame.header.channelAssignment == 1) // 1 = Stereo - { - // Stereo optimized inner loop unroll. - const drflac_int32* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame; - const drflac_int32* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame; + drflac_int32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + drflac_int32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; - for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) { - bufferOut[i*2+0] = pDecodedSamples0[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample); - bufferOut[i*2+1] = pDecodedSamples1[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample); - } - } - else - { - // Generic interleaving. - for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) { - for (unsigned int j = 0; j < channelCount; ++j) { - bufferOut[(i*channelCount)+j] = (pFlac->currentFrame.subframes[j].pDecodedSamples[firstAlignedSampleInFrame + i]) << (unusedBitsPerSample + pFlac->currentFrame.subframes[j].wastedBitsPerSample); - } - } - } - } break; - } + int32x4_t shift0_4 = vdupq_n_s32(shift0); + int32x4_t shift1_4 = vdupq_n_s32(shift1); - drflac_uint64 alignedSamplesRead = alignedSampleCountPerChannel * channelCount; - samplesRead += alignedSamplesRead; - samplesReadFromFrameSoFar += alignedSamplesRead; - bufferOut += alignedSamplesRead; - samplesToRead -= alignedSamplesRead; - pFlac->currentSample += alignedSamplesRead; - pFlac->currentFrame.samplesRemaining -= (unsigned int)alignedSamplesRead; + for (i = 0; i < frameCount4; ++i) { + int32x4_t lefti; + int32x4_t righti; + float32x4_t leftf; + float32x4_t rightf; + lefti = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); + righti = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); - // At this point we may still have some excess samples left to read. - if (samplesToRead > 0 && pFlac->currentFrame.samplesRemaining > 0) { - drflac_uint64 excessSamplesRead = 0; - if (samplesToRead < pFlac->currentFrame.samplesRemaining) { - excessSamplesRead = drflac__read_s32__misaligned(pFlac, samplesToRead, bufferOut); - } else { - excessSamplesRead = drflac__read_s32__misaligned(pFlac, pFlac->currentFrame.samplesRemaining, bufferOut); - } + leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4); - samplesRead += excessSamplesRead; - samplesReadFromFrameSoFar += excessSamplesRead; - bufferOut += excessSamplesRead; - samplesToRead -= excessSamplesRead; - pFlac->currentSample += excessSamplesRead; - } - } + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); } - return samplesRead; + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << shift0) * factor); + pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << shift1) * factor); + } } +#endif -drflac_uint64 drflac_read_s16(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int16* pBufferOut) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) { - // This reads samples in 2 passes and can probably be optimized. - drflac_uint64 totalSamplesRead = 0; +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ +#if 0 + drflac_read_pcm_frames_f32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} - while (samplesToRead > 0) { - drflac_int32 samples32[4096]; - drflac_uint64 samplesJustRead = drflac_read_s32(pFlac, (samplesToRead > 4096) ? 4096 : samplesToRead, samples32); - if (samplesJustRead == 0) { - break; // Reached the end. - } +drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut) +{ + drflac_uint64 framesRead; + drflac_int32 unusedBitsPerSample; - // s32 -> s16 - for (drflac_uint64 i = 0; i < samplesJustRead; ++i) { - pBufferOut[i] = (drflac_int16)(samples32[i] >> 16); - } + if (pFlac == NULL || framesToRead == 0) { + return 0; + } - totalSamplesRead += samplesJustRead; - samplesToRead -= samplesJustRead; - pBufferOut += samplesJustRead; + if (pBufferOut == NULL) { + return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead); } - return totalSamplesRead; -} + unusedBitsPerSample = 32 - pFlac->bitsPerSample; -drflac_uint64 drflac_read_f32(drflac* pFlac, drflac_uint64 samplesToRead, float* pBufferOut) -{ - // This reads samples in 2 passes and can probably be optimized. - drflac_uint64 totalSamplesRead = 0; + framesRead = 0; + while (framesToRead > 0) { + /* If we've run out of samples in this frame, go to the next. */ + if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + break; /* Couldn't read the next frame, so just break from the loop and return. */ + } + } else { + unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining; + drflac_uint64 frameCountThisIteration = framesToRead; - while (samplesToRead > 0) { - drflac_int32 samples32[4096]; - drflac_uint64 samplesJustRead = drflac_read_s32(pFlac, (samplesToRead > 4096) ? 4096 : samplesToRead, samples32); - if (samplesJustRead == 0) { - break; // Reached the end. - } + if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) { + frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining; + } - // s32 -> f32 - for (drflac_uint64 i = 0; i < samplesJustRead; ++i) { - pBufferOut[i] = (float)(samples32[i] / 2147483648.0); - } + if (channelCount == 2) { + const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame; + const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame; + + switch (pFlac->currentFLACFrame.header.channelAssignment) + { + case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: + { + drflac_read_pcm_frames_f32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: + { + drflac_read_pcm_frames_f32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: + { + drflac_read_pcm_frames_f32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: + default: + { + drflac_read_pcm_frames_f32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + } + } else { + /* Generic interleaving. */ + drflac_uint64 i; + for (i = 0; i < frameCountThisIteration; ++i) { + unsigned int j; + for (j = 0; j < channelCount; ++j) { + pBufferOut[(i*channelCount)+j] = (float)(((pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample)) / 2147483648.0); + } + } + } - totalSamplesRead += samplesJustRead; - samplesToRead -= samplesJustRead; - pBufferOut += samplesJustRead; + framesRead += frameCountThisIteration; + pBufferOut += frameCountThisIteration * channelCount; + framesToRead -= frameCountThisIteration; + pFlac->currentPCMFrame += frameCountThisIteration; + pFlac->currentFLACFrame.pcmFramesRemaining -= (unsigned int)frameCountThisIteration; + } } - return totalSamplesRead; + return framesRead; } -drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex) + +drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex) { if (pFlac == NULL) { return DRFLAC_FALSE; } - // If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present - // when the decoder was opened. - if (pFlac->firstFramePos == 0) { + /* Don't do anything if we're already on the seek point. */ + if (pFlac->currentPCMFrame == pcmFrameIndex) { + return DRFLAC_TRUE; + } + + /* + If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present + when the decoder was opened. + */ + if (pFlac->firstFLACFramePosInBytes == 0) { return DRFLAC_FALSE; } - if (sampleIndex == 0) { - pFlac->currentSample = 0; + if (pcmFrameIndex == 0) { + pFlac->currentPCMFrame = 0; return drflac__seek_to_first_frame(pFlac); } else { drflac_bool32 wasSuccessful = DRFLAC_FALSE; - // Clamp the sample to the end. - if (sampleIndex >= pFlac->totalSampleCount) { - sampleIndex = pFlac->totalSampleCount - 1; + /* Clamp the sample to the end. */ + if (pcmFrameIndex > pFlac->totalPCMFrameCount) { + pcmFrameIndex = pFlac->totalPCMFrameCount; } - // If the target sample and the current sample are in the same frame we just move the position forward. - if (sampleIndex > pFlac->currentSample) { - // Forward. - drflac_uint32 offset = (drflac_uint32)(sampleIndex - pFlac->currentSample); - if (pFlac->currentFrame.samplesRemaining > offset) { - pFlac->currentFrame.samplesRemaining -= offset; - pFlac->currentSample = sampleIndex; + /* If the target sample and the current sample are in the same frame we just move the position forward. */ + if (pcmFrameIndex > pFlac->currentPCMFrame) { + /* Forward. */ + drflac_uint32 offset = (drflac_uint32)(pcmFrameIndex - pFlac->currentPCMFrame); + if (pFlac->currentFLACFrame.pcmFramesRemaining > offset) { + pFlac->currentFLACFrame.pcmFramesRemaining -= offset; + pFlac->currentPCMFrame = pcmFrameIndex; return DRFLAC_TRUE; } } else { - // Backward. - drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentSample - sampleIndex); - drflac_uint32 currentFrameSampleCount = pFlac->currentFrame.header.blockSize * drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment); - drflac_uint32 currentFrameSamplesConsumed = (drflac_uint32)(currentFrameSampleCount - pFlac->currentFrame.samplesRemaining); - if (currentFrameSamplesConsumed > offsetAbs) { - pFlac->currentFrame.samplesRemaining += offsetAbs; - pFlac->currentSample = sampleIndex; + /* Backward. */ + drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentPCMFrame - pcmFrameIndex); + drflac_uint32 currentFLACFramePCMFrameCount = pFlac->currentFLACFrame.header.blockSizeInPCMFrames; + drflac_uint32 currentFLACFramePCMFramesConsumed = currentFLACFramePCMFrameCount - pFlac->currentFLACFrame.pcmFramesRemaining; + if (currentFLACFramePCMFramesConsumed > offsetAbs) { + pFlac->currentFLACFrame.pcmFramesRemaining += offsetAbs; + pFlac->currentPCMFrame = pcmFrameIndex; return DRFLAC_TRUE; } } - // Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so - // we'll instead use Ogg's natural seeking facility. + /* + Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so + we'll instead use Ogg's natural seeking facility. + */ #ifndef DR_FLAC_NO_OGG if (pFlac->container == drflac_container_ogg) { - wasSuccessful = drflac_ogg__seek_to_sample(pFlac, sampleIndex); + wasSuccessful = drflac_ogg__seek_to_pcm_frame(pFlac, pcmFrameIndex); } else #endif { - // First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. - wasSuccessful = drflac__seek_to_sample__seek_table(pFlac, sampleIndex); - if (!wasSuccessful) { - wasSuccessful = drflac__seek_to_sample__brute_force(pFlac, sampleIndex); + /* First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. */ + if (!wasSuccessful && !pFlac->_noSeekTableSeek) { + wasSuccessful = drflac__seek_to_pcm_frame__seek_table(pFlac, pcmFrameIndex); + } + +#if !defined(DR_FLAC_NO_CRC) + /* Fall back to binary search if seek table seeking fails. This requires the length of the stream to be known. */ + if (!wasSuccessful && !pFlac->_noBinarySearchSeek && pFlac->totalPCMFrameCount > 0) { + wasSuccessful = drflac__seek_to_pcm_frame__binary_search(pFlac, pcmFrameIndex); + } +#endif + + /* Fall back to brute force if all else fails. */ + if (!wasSuccessful && !pFlac->_noBruteForceSeek) { + wasSuccessful = drflac__seek_to_pcm_frame__brute_force(pFlac, pcmFrameIndex); } } - pFlac->currentSample = sampleIndex; + pFlac->currentPCMFrame = pcmFrameIndex; return wasSuccessful; } } -//// High Level APIs //// +/* High Level APIs */ #if defined(SIZE_MAX) #define DRFLAC_SIZE_MAX SIZE_MAX @@ -5781,61 +10337,67 @@ drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex) #endif -// Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me. -#define DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(extension, type) \ -static type* drflac__full_decode_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut)\ +/* Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me. */ +#define DRFLAC_DEFINE_FULL_READ_AND_CLOSE(extension, type) \ +static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)\ { \ - drflac_assert(pFlac != NULL); \ - \ type* pSampleData = NULL; \ - drflac_uint64 totalSampleCount = pFlac->totalSampleCount; \ + drflac_uint64 totalPCMFrameCount; \ \ - if (totalSampleCount == 0) { \ - type buffer[4096]; \ + DRFLAC_ASSERT(pFlac != NULL); \ \ + totalPCMFrameCount = pFlac->totalPCMFrameCount; \ + \ + if (totalPCMFrameCount == 0) { \ + type buffer[4096]; \ + drflac_uint64 pcmFramesRead; \ size_t sampleDataBufferSize = sizeof(buffer); \ - pSampleData = (type*)DRFLAC_MALLOC(sampleDataBufferSize); \ + \ + pSampleData = (type*)drflac__malloc_from_callbacks(sampleDataBufferSize, &pFlac->allocationCallbacks); \ if (pSampleData == NULL) { \ goto on_error; \ } \ \ - drflac_uint64 samplesRead; \ - while ((samplesRead = (drflac_uint64)drflac_read_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0]), buffer)) > 0) { \ - if (((totalSampleCount + samplesRead) * sizeof(type)) > sampleDataBufferSize) { \ - sampleDataBufferSize *= 2; \ - type* pNewSampleData = (type*)DRFLAC_REALLOC(pSampleData, sampleDataBufferSize); \ + while ((pcmFramesRead = (drflac_uint64)drflac_read_pcm_frames_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0])/pFlac->channels, buffer)) > 0) { \ + if (((totalPCMFrameCount + pcmFramesRead) * pFlac->channels * sizeof(type)) > sampleDataBufferSize) { \ + type* pNewSampleData; \ + size_t newSampleDataBufferSize; \ + \ + newSampleDataBufferSize = sampleDataBufferSize * 2; \ + pNewSampleData = (type*)drflac__realloc_from_callbacks(pSampleData, newSampleDataBufferSize, sampleDataBufferSize, &pFlac->allocationCallbacks); \ if (pNewSampleData == NULL) { \ - DRFLAC_FREE(pSampleData); \ + drflac__free_from_callbacks(pSampleData, &pFlac->allocationCallbacks); \ goto on_error; \ } \ \ + sampleDataBufferSize = newSampleDataBufferSize; \ pSampleData = pNewSampleData; \ } \ \ - drflac_copy_memory(pSampleData + totalSampleCount, buffer, (size_t)(samplesRead*sizeof(type))); \ - totalSampleCount += samplesRead; \ + DRFLAC_COPY_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), buffer, (size_t)(pcmFramesRead*pFlac->channels*sizeof(type))); \ + totalPCMFrameCount += pcmFramesRead; \ } \ \ /* At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to \ protect those ears from random noise! */ \ - drflac_zero_memory(pSampleData + totalSampleCount, (size_t)(sampleDataBufferSize - totalSampleCount*sizeof(type))); \ + DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type))); \ } else { \ - drflac_uint64 dataSize = totalSampleCount * sizeof(type); \ + drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type); \ if (dataSize > DRFLAC_SIZE_MAX) { \ goto on_error; /* The decoded data is too big. */ \ } \ \ - pSampleData = (type*)DRFLAC_MALLOC((size_t)dataSize); /* <-- Safe cast as per the check above. */ \ + pSampleData = (type*)drflac__malloc_from_callbacks((size_t)dataSize, &pFlac->allocationCallbacks); /* <-- Safe cast as per the check above. */ \ if (pSampleData == NULL) { \ goto on_error; \ } \ \ - totalSampleCount = drflac_read_##extension(pFlac, pFlac->totalSampleCount, pSampleData); \ + totalPCMFrameCount = drflac_read_pcm_frames_##extension(pFlac, pFlac->totalPCMFrameCount, pSampleData); \ } \ \ if (sampleRateOut) *sampleRateOut = pFlac->sampleRate; \ if (channelsOut) *channelsOut = pFlac->channels; \ - if (totalSampleCountOut) *totalSampleCountOut = totalSampleCount; \ + if (totalPCMFrameCountOut) *totalPCMFrameCountOut = totalPCMFrameCount; \ \ drflac_close(pFlac); \ return pSampleData; \ @@ -5845,144 +10407,218 @@ on_error: return NULL; \ } -DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(s32, drflac_int32) -DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(s16, drflac_int16) -DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(f32, float) +DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s32, drflac_int32) +DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s16, drflac_int16) +DRFLAC_DEFINE_FULL_READ_AND_CLOSE(f32, float) -drflac_int32* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks) { - // Safety. - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + drflac* pFlac; + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalPCMFrameCountOut) { + *totalPCMFrameCountOut = 0; + } - drflac* pFlac = drflac_open(onRead, onSeek, pUserData); + pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_s32(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); } -drflac_int16* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks) { - // Safety. - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + drflac* pFlac; - drflac* pFlac = drflac_open(onRead, onSeek, pUserData); + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalPCMFrameCountOut) { + *totalPCMFrameCountOut = 0; + } + + pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_s16(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); } -float* drflac_open_and_decode_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks) { - // Safety. - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + drflac* pFlac; + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalPCMFrameCountOut) { + *totalPCMFrameCountOut = 0; + } - drflac* pFlac = drflac_open(onRead, onSeek, pUserData); + pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_f32(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_f32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); } #ifndef DR_FLAC_NO_STDIO -drflac_int32* drflac_open_and_decode_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + drflac* pFlac; - drflac* pFlac = drflac_open_file(filename); + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } + + pFlac = drflac_open_file(filename, pAllocationCallbacks); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_s32(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount); } -drflac_int16* drflac_open_and_decode_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + drflac* pFlac; + + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } - drflac* pFlac = drflac_open_file(filename); + pFlac = drflac_open_file(filename, pAllocationCallbacks); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_s16(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount); } -float* drflac_open_and_decode_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + drflac* pFlac; - drflac* pFlac = drflac_open_file(filename); + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } + + pFlac = drflac_open_file(filename, pAllocationCallbacks); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_f32(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount); } #endif -drflac_int32* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + drflac* pFlac; + + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } - drflac* pFlac = drflac_open_memory(data, dataSize); + pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_s32(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount); } -drflac_int16* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + drflac* pFlac; - drflac* pFlac = drflac_open_memory(data, dataSize); + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } + + pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_s16(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount); } -float* drflac_open_and_decode_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + drflac* pFlac; + + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } - drflac* pFlac = drflac_open_memory(data, dataSize); + pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_f32(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount); } -void drflac_free(void* pSampleDataReturnedByOpenAndDecode) + +void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks) { - DRFLAC_FREE(pSampleDataReturnedByOpenAndDecode); + if (pAllocationCallbacks != NULL) { + drflac__free_from_callbacks(p, pAllocationCallbacks); + } else { + drflac__free_default(p, NULL); + } } @@ -6000,21 +10636,29 @@ void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut) { - // Safety. - if (pCommentLengthOut) *pCommentLengthOut = 0; + drflac_int32 length; + const char* pComment; + + /* Safety. */ + if (pCommentLengthOut) { + *pCommentLengthOut = 0; + } if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) { return NULL; } - drflac_uint32 length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData); + length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData); pIter->pRunningData += 4; - const char* pComment = pIter->pRunningData; + pComment = pIter->pRunningData; pIter->pRunningData += length; pIter->countRemaining -= 1; - if (pCommentLengthOut) *pCommentLengthOut = length; + if (pCommentLengthOut) { + *pCommentLengthOut = length; + } + return pComment; } @@ -6033,19 +10677,22 @@ void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack) { + drflac_cuesheet_track cuesheetTrack; + const char* pRunningData; + drflac_uint64 offsetHi; + drflac_uint64 offsetLo; + if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) { return DRFLAC_FALSE; } - drflac_cuesheet_track cuesheetTrack; - - const char* pRunningData = pIter->pRunningData; + pRunningData = pIter->pRunningData; - drflac_uint64 offsetHi = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; - drflac_uint64 offsetLo = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + offsetHi = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + offsetLo = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; cuesheetTrack.offset = offsetLo | (offsetHi << 32); cuesheetTrack.trackNumber = pRunningData[0]; pRunningData += 1; - drflac_copy_memory(cuesheetTrack.ISRC, pRunningData, sizeof(cuesheetTrack.ISRC)); pRunningData += 12; + DRFLAC_COPY_MEMORY(cuesheetTrack.ISRC, pRunningData, sizeof(cuesheetTrack.ISRC)); pRunningData += 12; cuesheetTrack.isAudio = (pRunningData[0] & 0x80) != 0; cuesheetTrack.preEmphasis = (pRunningData[0] & 0x40) != 0; pRunningData += 14; cuesheetTrack.indexCount = pRunningData[0]; pRunningData += 1; @@ -6054,206 +10701,332 @@ drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, pIter->pRunningData = pRunningData; pIter->countRemaining -= 1; - if (pCuesheetTrack) *pCuesheetTrack = cuesheetTrack; + if (pCuesheetTrack) { + *pCuesheetTrack = cuesheetTrack; + } + return DRFLAC_TRUE; } -#endif //DR_FLAC_IMPLEMENTATION - - -// REVISION HISTORY -// -// v0.10.0 - 2018-09-11 -// - Remove the DR_FLAC_NO_WIN32_IO option and the Win32 file IO functionality. If you need to use Win32 file IO you -// need to do it yourself via the callback API. -// - Fix the clang build. -// - Fix undefined behavior. -// - Fix errors with CUESHEET metdata blocks. -// - Add an API for iterating over each cuesheet track in the CUESHEET metadata block. This works the same way as the -// Vorbis comment API. -// - Other miscellaneous bug fixes, mostly relating to invalid FLAC streams. -// - Minor optimizations. -// -// v0.9.11 - 2018-08-29 -// - Fix a bug with sample reconstruction. -// -// v0.9.10 - 2018-08-07 -// - Improve 64-bit detection. -// -// v0.9.9 - 2018-08-05 -// - Fix C++ build on older versions of GCC. -// -// v0.9.8 - 2018-07-24 -// - Fix compilation errors. -// -// v0.9.7 - 2018-07-05 -// - Fix a warning. -// -// v0.9.6 - 2018-06-29 -// - Fix some typos. -// -// v0.9.5 - 2018-06-23 -// - Fix some warnings. -// -// v0.9.4 - 2018-06-14 -// - Optimizations to seeking. -// - Clean up. -// -// v0.9.3 - 2018-05-22 -// - Bug fix. -// -// v0.9.2 - 2018-05-12 -// - Fix a compilation error due to a missing break statement. -// -// v0.9.1 - 2018-04-29 -// - Fix compilation error with Clang. -// -// v0.9 - 2018-04-24 -// - Fix Clang build. -// - Start using major.minor.revision versioning. -// -// v0.8g - 2018-04-19 -// - Fix build on non-x86/x64 architectures. -// -// v0.8f - 2018-02-02 -// - Stop pretending to support changing rate/channels mid stream. -// -// v0.8e - 2018-02-01 -// - Fix a crash when the block size of a frame is larger than the maximum block size defined by the FLAC stream. -// - Fix a crash the the Rice partition order is invalid. -// -// v0.8d - 2017-09-22 -// - Add support for decoding streams with ID3 tags. ID3 tags are just skipped. -// -// v0.8c - 2017-09-07 -// - Fix warning on non-x86/x64 architectures. -// -// v0.8b - 2017-08-19 -// - Fix build on non-x86/x64 architectures. -// -// v0.8a - 2017-08-13 -// - A small optimization for the Clang build. -// -// v0.8 - 2017-08-12 -// - API CHANGE: Rename dr_* types to drflac_*. -// - Optimizations. This brings dr_flac back to about the same class of efficiency as the reference implementation. -// - Add support for custom implementations of malloc(), realloc(), etc. -// - Add CRC checking to Ogg encapsulated streams. -// - Fix VC++ 6 build. This is only for the C++ compiler. The C compiler is not currently supported. -// - Bug fixes. -// -// v0.7 - 2017-07-23 -// - Add support for opening a stream without a header block. To do this, use drflac_open_relaxed() / drflac_open_with_metadata_relaxed(). -// -// v0.6 - 2017-07-22 -// - Add support for recovering from invalid frames. With this change, dr_flac will simply skip over invalid frames as if they -// never existed. Frames are checked against their sync code, the CRC-8 of the frame header and the CRC-16 of the whole frame. -// -// v0.5 - 2017-07-16 -// - Fix typos. -// - Change drflac_bool* types to unsigned. -// - Add CRC checking. This makes dr_flac slower, but can be disabled with #define DR_FLAC_NO_CRC. -// -// v0.4f - 2017-03-10 -// - Fix a couple of bugs with the bitstreaming code. -// -// v0.4e - 2017-02-17 -// - Fix some warnings. -// -// v0.4d - 2016-12-26 -// - Add support for 32-bit floating-point PCM decoding. -// - Use drflac_int*/drflac_uint* sized types to improve compiler support. -// - Minor improvements to documentation. -// -// v0.4c - 2016-12-26 -// - Add support for signed 16-bit integer PCM decoding. -// -// v0.4b - 2016-10-23 -// - A minor change to drflac_bool8 and drflac_bool32 types. -// -// v0.4a - 2016-10-11 -// - Rename drBool32 to drflac_bool32 for styling consistency. -// -// v0.4 - 2016-09-29 -// - API/ABI CHANGE: Use fixed size 32-bit booleans instead of the built-in bool type. -// - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32(). -// - API CHANGE: Swap the order of "channels" and "sampleRate" parameters in drflac_open_and_decode*(). Rationale for this is to -// keep it consistent with drflac_audio. -// -// v0.3f - 2016-09-21 -// - Fix a warning with GCC. -// -// v0.3e - 2016-09-18 -// - Fixed a bug where GCC 4.3+ was not getting properly identified. -// - Fixed a few typos. -// - Changed date formats to ISO 8601 (YYYY-MM-DD). -// -// v0.3d - 2016-06-11 -// - Minor clean up. -// -// v0.3c - 2016-05-28 -// - Fixed compilation error. -// -// v0.3b - 2016-05-16 -// - Fixed Linux/GCC build. -// - Updated documentation. -// -// v0.3a - 2016-05-15 -// - Minor fixes to documentation. -// -// v0.3 - 2016-05-11 -// - Optimizations. Now at about parity with the reference implementation on 32-bit builds. -// - Lots of clean up. -// -// v0.2b - 2016-05-10 -// - Bug fixes. -// -// v0.2a - 2016-05-10 -// - Made drflac_open_and_decode() more robust. -// - Removed an unused debugging variable -// -// v0.2 - 2016-05-09 -// - Added support for Ogg encapsulation. -// - API CHANGE. Have the onSeek callback take a third argument which specifies whether or not the seek -// should be relative to the start or the current position. Also changes the seeking rules such that -// seeking offsets will never be negative. -// - Have drflac_open_and_decode() fail gracefully if the stream has an unknown total sample count. -// -// v0.1b - 2016-05-07 -// - Properly close the file handle in drflac_open_file() and family when the decoder fails to initialize. -// - Removed a stale comment. -// -// v0.1a - 2016-05-05 -// - Minor formatting changes. -// - Fixed a warning on the GCC build. -// -// v0.1 - 2016-05-03 -// - Initial versioned release. +#if defined(__GNUC__) + #pragma GCC diagnostic pop +#endif +#endif /* DR_FLAC_IMPLEMENTATION */ + + +/* +REVISION HISTORY +================ +v0.12.2 - 2019-10-07 + - Internal code clean up. + +v0.12.1 - 2019-09-29 + - Fix some Clang Static Analyzer warnings. + - Fix an unused variable warning. + +v0.12.0 - 2019-09-23 + - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation + routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs: + - drflac_open() + - drflac_open_relaxed() + - drflac_open_with_metadata() + - drflac_open_with_metadata_relaxed() + - drflac_open_file() + - drflac_open_file_with_metadata() + - drflac_open_memory() + - drflac_open_memory_with_metadata() + - drflac_open_and_read_pcm_frames_s32() + - drflac_open_and_read_pcm_frames_s16() + - drflac_open_and_read_pcm_frames_f32() + - drflac_open_file_and_read_pcm_frames_s32() + - drflac_open_file_and_read_pcm_frames_s16() + - drflac_open_file_and_read_pcm_frames_f32() + - drflac_open_memory_and_read_pcm_frames_s32() + - drflac_open_memory_and_read_pcm_frames_s16() + - drflac_open_memory_and_read_pcm_frames_f32() + Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use + DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE. + - Remove deprecated APIs: + - drflac_read_s32() + - drflac_read_s16() + - drflac_read_f32() + - drflac_seek_to_sample() + - drflac_open_and_decode_s32() + - drflac_open_and_decode_s16() + - drflac_open_and_decode_f32() + - drflac_open_and_decode_file_s32() + - drflac_open_and_decode_file_s16() + - drflac_open_and_decode_file_f32() + - drflac_open_and_decode_memory_s32() + - drflac_open_and_decode_memory_s16() + - drflac_open_and_decode_memory_f32() + - Remove drflac.totalSampleCount which is now replaced with drflac.totalPCMFrameCount. You can emulate drflac.totalSampleCount + by doing pFlac->totalPCMFrameCount*pFlac->channels. + - Rename drflac.currentFrame to drflac.currentFLACFrame to remove ambiguity with PCM frames. + - Fix errors when seeking to the end of a stream. + - Optimizations to seeking. + - SSE improvements and optimizations. + - ARM NEON optimizations. + - Optimizations to drflac_read_pcm_frames_s16(). + - Optimizations to drflac_read_pcm_frames_s32(). + +v0.11.10 - 2019-06-26 + - Fix a compiler error. + +v0.11.9 - 2019-06-16 + - Silence some ThreadSanitizer warnings. + +v0.11.8 - 2019-05-21 + - Fix warnings. + +v0.11.7 - 2019-05-06 + - C89 fixes. + +v0.11.6 - 2019-05-05 + - Add support for C89. + - Fix a compiler warning when CRC is disabled. + - Change license to choice of public domain or MIT-0. + +v0.11.5 - 2019-04-19 + - Fix a compiler error with GCC. + +v0.11.4 - 2019-04-17 + - Fix some warnings with GCC when compiling with -std=c99. + +v0.11.3 - 2019-04-07 + - Silence warnings with GCC. + +v0.11.2 - 2019-03-10 + - Fix a warning. + +v0.11.1 - 2019-02-17 + - Fix a potential bug with seeking. + +v0.11.0 - 2018-12-16 + - API CHANGE: Deprecated drflac_read_s32(), drflac_read_s16() and drflac_read_f32() and replaced them with + drflac_read_pcm_frames_s32(), drflac_read_pcm_frames_s16() and drflac_read_pcm_frames_f32(). The new APIs take + and return PCM frame counts instead of sample counts. To upgrade you will need to change the input count by + dividing it by the channel count, and then do the same with the return value. + - API_CHANGE: Deprecated drflac_seek_to_sample() and replaced with drflac_seek_to_pcm_frame(). Same rules as + the changes to drflac_read_*() apply. + - API CHANGE: Deprecated drflac_open_and_decode_*() and replaced with drflac_open_*_and_read_*(). Same rules as + the changes to drflac_read_*() apply. + - Optimizations. + +v0.10.0 - 2018-09-11 + - Remove the DR_FLAC_NO_WIN32_IO option and the Win32 file IO functionality. If you need to use Win32 file IO you + need to do it yourself via the callback API. + - Fix the clang build. + - Fix undefined behavior. + - Fix errors with CUESHEET metdata blocks. + - Add an API for iterating over each cuesheet track in the CUESHEET metadata block. This works the same way as the + Vorbis comment API. + - Other miscellaneous bug fixes, mostly relating to invalid FLAC streams. + - Minor optimizations. + +v0.9.11 - 2018-08-29 + - Fix a bug with sample reconstruction. + +v0.9.10 - 2018-08-07 + - Improve 64-bit detection. + +v0.9.9 - 2018-08-05 + - Fix C++ build on older versions of GCC. + +v0.9.8 - 2018-07-24 + - Fix compilation errors. + +v0.9.7 - 2018-07-05 + - Fix a warning. + +v0.9.6 - 2018-06-29 + - Fix some typos. + +v0.9.5 - 2018-06-23 + - Fix some warnings. + +v0.9.4 - 2018-06-14 + - Optimizations to seeking. + - Clean up. + +v0.9.3 - 2018-05-22 + - Bug fix. + +v0.9.2 - 2018-05-12 + - Fix a compilation error due to a missing break statement. + +v0.9.1 - 2018-04-29 + - Fix compilation error with Clang. + +v0.9 - 2018-04-24 + - Fix Clang build. + - Start using major.minor.revision versioning. + +v0.8g - 2018-04-19 + - Fix build on non-x86/x64 architectures. + +v0.8f - 2018-02-02 + - Stop pretending to support changing rate/channels mid stream. + +v0.8e - 2018-02-01 + - Fix a crash when the block size of a frame is larger than the maximum block size defined by the FLAC stream. + - Fix a crash the the Rice partition order is invalid. + +v0.8d - 2017-09-22 + - Add support for decoding streams with ID3 tags. ID3 tags are just skipped. + +v0.8c - 2017-09-07 + - Fix warning on non-x86/x64 architectures. + +v0.8b - 2017-08-19 + - Fix build on non-x86/x64 architectures. + +v0.8a - 2017-08-13 + - A small optimization for the Clang build. + +v0.8 - 2017-08-12 + - API CHANGE: Rename dr_* types to drflac_*. + - Optimizations. This brings dr_flac back to about the same class of efficiency as the reference implementation. + - Add support for custom implementations of malloc(), realloc(), etc. + - Add CRC checking to Ogg encapsulated streams. + - Fix VC++ 6 build. This is only for the C++ compiler. The C compiler is not currently supported. + - Bug fixes. + +v0.7 - 2017-07-23 + - Add support for opening a stream without a header block. To do this, use drflac_open_relaxed() / drflac_open_with_metadata_relaxed(). + +v0.6 - 2017-07-22 + - Add support for recovering from invalid frames. With this change, dr_flac will simply skip over invalid frames as if they + never existed. Frames are checked against their sync code, the CRC-8 of the frame header and the CRC-16 of the whole frame. + +v0.5 - 2017-07-16 + - Fix typos. + - Change drflac_bool* types to unsigned. + - Add CRC checking. This makes dr_flac slower, but can be disabled with #define DR_FLAC_NO_CRC. + +v0.4f - 2017-03-10 + - Fix a couple of bugs with the bitstreaming code. + +v0.4e - 2017-02-17 + - Fix some warnings. + +v0.4d - 2016-12-26 + - Add support for 32-bit floating-point PCM decoding. + - Use drflac_int* and drflac_uint* sized types to improve compiler support. + - Minor improvements to documentation. + +v0.4c - 2016-12-26 + - Add support for signed 16-bit integer PCM decoding. + +v0.4b - 2016-10-23 + - A minor change to drflac_bool8 and drflac_bool32 types. + +v0.4a - 2016-10-11 + - Rename drBool32 to drflac_bool32 for styling consistency. + +v0.4 - 2016-09-29 + - API/ABI CHANGE: Use fixed size 32-bit booleans instead of the built-in bool type. + - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32(). + - API CHANGE: Swap the order of "channels" and "sampleRate" parameters in drflac_open_and_decode*(). Rationale for this is to + keep it consistent with drflac_audio. + +v0.3f - 2016-09-21 + - Fix a warning with GCC. + +v0.3e - 2016-09-18 + - Fixed a bug where GCC 4.3+ was not getting properly identified. + - Fixed a few typos. + - Changed date formats to ISO 8601 (YYYY-MM-DD). + +v0.3d - 2016-06-11 + - Minor clean up. + +v0.3c - 2016-05-28 + - Fixed compilation error. + +v0.3b - 2016-05-16 + - Fixed Linux/GCC build. + - Updated documentation. + +v0.3a - 2016-05-15 + - Minor fixes to documentation. + +v0.3 - 2016-05-11 + - Optimizations. Now at about parity with the reference implementation on 32-bit builds. + - Lots of clean up. + +v0.2b - 2016-05-10 + - Bug fixes. + +v0.2a - 2016-05-10 + - Made drflac_open_and_decode() more robust. + - Removed an unused debugging variable + +v0.2 - 2016-05-09 + - Added support for Ogg encapsulation. + - API CHANGE. Have the onSeek callback take a third argument which specifies whether or not the seek + should be relative to the start or the current position. Also changes the seeking rules such that + seeking offsets will never be negative. + - Have drflac_open_and_decode() fail gracefully if the stream has an unknown total sample count. + +v0.1b - 2016-05-07 + - Properly close the file handle in drflac_open_file() and family when the decoder fails to initialize. + - Removed a stale comment. + +v0.1a - 2016-05-05 + - Minor formatting changes. + - Fixed a warning on the GCC build. + +v0.1 - 2016-05-03 + - Initial versioned release. +*/ /* +This software is available as a choice of the following licenses. Choose +whichever you prefer. + +=============================================================================== +ALTERNATIVE 1 - Public Domain (www.unlicense.org) +=============================================================================== This is free and unencumbered software released into the public domain. -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. + +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to <http://unlicense.org/> + +=============================================================================== +ALTERNATIVE 2 - MIT No Attribution +=============================================================================== +Copyright 2018 David Reid + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. */ -// clang-format on diff --git a/include/kfr/io/dr/dr_mp3.h b/include/kfr/io/dr/dr_mp3.h @@ -0,0 +1,4203 @@ +/* +MP3 audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file. +dr_mp3 - v0.5.3 - 2019-11-14 + +David Reid - [email protected] + +Based off minimp3 (https://github.com/lieff/minimp3) which is where the real work was done. See the bottom of this file for +differences between minimp3 and dr_mp3. +*/ + +/* +RELEASE NOTES - v0.5.0 +======================= +Version 0.5.0 has breaking API changes. + +Improved Client-Defined Memory Allocation +----------------------------------------- +The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The +existing system of DRMP3_MALLOC, DRMP3_REALLOC and DRMP3_FREE are still in place and will be used by default when no custom +allocation callbacks are specified. + +To use the new system, you pass in a pointer to a drmp3_allocation_callbacks object to drmp3_init() and family, like this: + + void* my_malloc(size_t sz, void* pUserData) + { + return malloc(sz); + } + void* my_realloc(void* p, size_t sz, void* pUserData) + { + return realloc(p, sz); + } + void my_free(void* p, void* pUserData) + { + free(p); + } + + ... + + drmp3_allocation_callbacks allocationCallbacks; + allocationCallbacks.pUserData = &myData; + allocationCallbacks.onMalloc = my_malloc; + allocationCallbacks.onRealloc = my_realloc; + allocationCallbacks.onFree = my_free; + drmp3_init_file(&mp3, "my_file.mp3", NULL, &allocationCallbacks); + +The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines. + +Passing in null for the allocation callbacks object will cause dr_mp3 to use defaults which is the same as DRMP3_MALLOC, +DRMP3_REALLOC and DRMP3_FREE and the equivalent of how it worked in previous versions. + +Every API that opens a drmp3 object now takes this extra parameter. These include the following: + + drmp3_init() + drmp3_init_file() + drmp3_init_memory() + drmp3_open_and_read_pcm_frames_f32() + drmp3_open_and_read_pcm_frames_s16() + drmp3_open_memory_and_read_pcm_frames_f32() + drmp3_open_memory_and_read_pcm_frames_s16() + drmp3_open_file_and_read_pcm_frames_f32() + drmp3_open_file_and_read_pcm_frames_s16() + +Renamed APIs +------------ +The following APIs have been renamed for consistency with other dr_* libraries and to make it clear that they return PCM frame +counts rather than sample counts. + + drmp3_open_and_read_f32() -> drmp3_open_and_read_pcm_frames_f32() + drmp3_open_and_read_s16() -> drmp3_open_and_read_pcm_frames_s16() + drmp3_open_memory_and_read_f32() -> drmp3_open_memory_and_read_pcm_frames_f32() + drmp3_open_memory_and_read_s16() -> drmp3_open_memory_and_read_pcm_frames_s16() + drmp3_open_file_and_read_f32() -> drmp3_open_file_and_read_pcm_frames_f32() + drmp3_open_file_and_read_s16() -> drmp3_open_file_and_read_pcm_frames_s16() +*/ + +/* +USAGE +===== +dr_mp3 is a single-file library. To use it, do something like the following in one .c file. + #define DR_MP3_IMPLEMENTATION + #include "dr_mp3.h" + +You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, +do something like the following: + + drmp3 mp3; + if (!drmp3_init_file(&mp3, "MySong.mp3", NULL)) { + // Failed to open file + } + + ... + + drmp3_uint64 framesRead = drmp3_read_pcm_frames_f32(pMP3, framesToRead, pFrames); + +The drmp3 object is transparent so you can get access to the channel count and sample rate like so: + + drmp3_uint32 channels = mp3.channels; + drmp3_uint32 sampleRate = mp3.sampleRate; + +The third parameter of drmp3_init_file() in the example above allows you to control the output channel count and sample rate. It +is a pointer to a drmp3_config object. Setting any of the variables of this object to 0 will cause dr_mp3 to use defaults. + +The example above initializes a decoder from a file, but you can also initialize it from a block of memory and read and seek +callbacks with drmp3_init_memory() and drmp3_init() respectively. + +You do not need to do any annoying memory management when reading PCM frames - this is all managed internally. You can request +any number of PCM frames in each call to drmp3_read_pcm_frames_f32() and it will return as many PCM frames as it can, up to the +requested amount. + +You can also decode an entire file in one go with drmp3_open_and_read_pcm_frames_f32(), drmp3_open_memory_and_read_pcm_frames_f32() and +drmp3_open_file_and_read_pcm_frames_f32(). + + +OPTIONS +======= +#define these options before including this file. + +#define DR_MP3_NO_STDIO + Disable drmp3_init_file(), etc. + +#define DR_MP3_NO_SIMD + Disable SIMD optimizations. +*/ + +#ifndef dr_mp3_h +#define dr_mp3_h + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stddef.h> + +#if defined(_MSC_VER) && _MSC_VER < 1600 +typedef signed char drmp3_int8; +typedef unsigned char drmp3_uint8; +typedef signed short drmp3_int16; +typedef unsigned short drmp3_uint16; +typedef signed int drmp3_int32; +typedef unsigned int drmp3_uint32; +typedef signed __int64 drmp3_int64; +typedef unsigned __int64 drmp3_uint64; +#else +#include <stdint.h> +typedef int8_t drmp3_int8; +typedef uint8_t drmp3_uint8; +typedef int16_t drmp3_int16; +typedef uint16_t drmp3_uint16; +typedef int32_t drmp3_int32; +typedef uint32_t drmp3_uint32; +typedef int64_t drmp3_int64; +typedef uint64_t drmp3_uint64; +#endif +typedef drmp3_uint8 drmp3_bool8; +typedef drmp3_uint32 drmp3_bool32; +#define DRMP3_TRUE 1 +#define DRMP3_FALSE 0 + +#define DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME 1152 +#define DRMP3_MAX_SAMPLES_PER_FRAME (DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME*2) + +#ifdef _MSC_VER + #define DRMP3_INLINE __forceinline +#elif defined(__GNUC__) + /* + I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when + the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some + case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the + command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue + I am using "__inline__" only when we're compiling in strict ANSI mode. + */ + #if defined(__STRICT_ANSI__) + #define DRMP3_INLINE __inline__ __attribute__((always_inline)) + #else + #define DRMP3_INLINE inline __attribute__((always_inline)) + #endif +#else + #define DRMP3_INLINE +#endif + +/* +Low Level Push API +================== +*/ +typedef struct +{ + int frame_bytes, channels, hz, layer, bitrate_kbps; +} drmp3dec_frame_info; + +typedef struct +{ + float mdct_overlap[2][9*32], qmf_state[15*2*32]; + int reserv, free_format_bytes; + unsigned char header[4], reserv_buf[511]; +} drmp3dec; + +/* Initializes a low level decoder. */ +void drmp3dec_init(drmp3dec *dec); + +/* Reads a frame from a low level decoder. */ +int drmp3dec_decode_frame(drmp3dec *dec, const unsigned char *mp3, int mp3_bytes, void *pcm, drmp3dec_frame_info *info); + +/* Helper for converting between f32 and s16. */ +void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, int num_samples); + + + +/* +Main API (Pull API) +=================== +*/ +#ifndef DR_MP3_DEFAULT_CHANNELS +#define DR_MP3_DEFAULT_CHANNELS 2 +#endif +#ifndef DR_MP3_DEFAULT_SAMPLE_RATE +#define DR_MP3_DEFAULT_SAMPLE_RATE 44100 +#endif + +typedef struct drmp3_src drmp3_src; +typedef drmp3_uint64 (* drmp3_src_read_proc)(drmp3_src* pSRC, drmp3_uint64 frameCount, void* pFramesOut, void* pUserData); /* Returns the number of frames that were read. */ + +typedef enum +{ + drmp3_src_algorithm_none, + drmp3_src_algorithm_linear +} drmp3_src_algorithm; + +#define DRMP3_SRC_CACHE_SIZE_IN_FRAMES 512 +typedef struct +{ + drmp3_src* pSRC; + float pCachedFrames[2 * DRMP3_SRC_CACHE_SIZE_IN_FRAMES]; + drmp3_uint32 cachedFrameCount; + drmp3_uint32 iNextFrame; +} drmp3_src_cache; + +typedef struct +{ + drmp3_uint32 sampleRateIn; + drmp3_uint32 sampleRateOut; + drmp3_uint32 channels; + drmp3_src_algorithm algorithm; + drmp3_uint32 cacheSizeInFrames; /* The number of frames to read from the client at a time. */ +} drmp3_src_config; + +struct drmp3_src +{ + drmp3_src_config config; + drmp3_src_read_proc onRead; + void* pUserData; + float bin[256]; + drmp3_src_cache cache; /* <-- For simplifying and optimizing client -> memory reading. */ + union + { + struct + { + double alpha; + drmp3_bool32 isPrevFramesLoaded : 1; + drmp3_bool32 isNextFramesLoaded : 1; + } linear; + } algo; +}; + +typedef enum +{ + drmp3_seek_origin_start, + drmp3_seek_origin_current +} drmp3_seek_origin; + +typedef struct +{ + drmp3_uint64 seekPosInBytes; /* Points to the first byte of an MP3 frame. */ + drmp3_uint64 pcmFrameIndex; /* The index of the PCM frame this seek point targets. */ + drmp3_uint16 mp3FramesToDiscard; /* The number of whole MP3 frames to be discarded before pcmFramesToDiscard. */ + drmp3_uint16 pcmFramesToDiscard; /* The number of leading samples to read and discard. These are discarded after mp3FramesToDiscard. */ +} drmp3_seek_point; + +/* +Callback for when data is read. Return value is the number of bytes actually read. + +pUserData [in] The user data that was passed to drmp3_init(), drmp3_open() and family. +pBufferOut [out] The output buffer. +bytesToRead [in] The number of bytes to read. + +Returns the number of bytes actually read. + +A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until +either the entire bytesToRead is filled or you have reached the end of the stream. +*/ +typedef size_t (* drmp3_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead); + +/* +Callback for when data needs to be seeked. + +pUserData [in] The user data that was passed to drmp3_init(), drmp3_open() and family. +offset [in] The number of bytes to move, relative to the origin. Will never be negative. +origin [in] The origin of the seek - the current position or the start of the stream. + +Returns whether or not the seek was successful. + +Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which +will be either drmp3_seek_origin_start or drmp3_seek_origin_current. +*/ +typedef drmp3_bool32 (* drmp3_seek_proc)(void* pUserData, int offset, drmp3_seek_origin origin); + +typedef struct +{ + void* pUserData; + void* (* onMalloc)(size_t sz, void* pUserData); + void* (* onRealloc)(void* p, size_t sz, void* pUserData); + void (* onFree)(void* p, void* pUserData); +} drmp3_allocation_callbacks; + +typedef struct +{ + drmp3_uint32 outputChannels; + drmp3_uint32 outputSampleRate; +} drmp3_config; + +typedef struct +{ + drmp3dec decoder; + drmp3dec_frame_info frameInfo; + drmp3_uint32 channels; + drmp3_uint32 sampleRate; + drmp3_read_proc onRead; + drmp3_seek_proc onSeek; + void* pUserData; + drmp3_allocation_callbacks allocationCallbacks; + drmp3_uint32 mp3FrameChannels; /* The number of channels in the currently loaded MP3 frame. Internal use only. */ + drmp3_uint32 mp3FrameSampleRate; /* The sample rate of the currently loaded MP3 frame. Internal use only. */ + drmp3_uint32 pcmFramesConsumedInMP3Frame; + drmp3_uint32 pcmFramesRemainingInMP3Frame; + drmp3_uint8 pcmFrames[sizeof(float)*DRMP3_MAX_SAMPLES_PER_FRAME]; /* <-- Multipled by sizeof(float) to ensure there's enough room for DR_MP3_FLOAT_OUTPUT. */ + drmp3_uint64 currentPCMFrame; /* The current PCM frame, globally, based on the output sample rate. Mainly used for seeking. */ + drmp3_uint64 streamCursor; /* The current byte the decoder is sitting on in the raw stream. */ + drmp3_src src; + drmp3_seek_point* pSeekPoints; /* NULL by default. Set with drmp3_bind_seek_table(). Memory is owned by the client. dr_mp3 will never attempt to free this pointer. */ + drmp3_uint32 seekPointCount; /* The number of items in pSeekPoints. When set to 0 assumes to no seek table. Defaults to zero. */ + size_t dataSize; + size_t dataCapacity; + drmp3_uint8* pData; + drmp3_bool32 atEnd : 1; + struct + { + const drmp3_uint8* pData; + size_t dataSize; + size_t currentReadPos; + } memory; /* Only used for decoders that were opened against a block of memory. */ +} drmp3; + +/* +Initializes an MP3 decoder. + +onRead [in] The function to call when data needs to be read from the client. +onSeek [in] The function to call when the read position of the client data needs to move. +pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek. + +Returns true if successful; false otherwise. + +Close the loader with drmp3_uninit(). + +See also: drmp3_init_file(), drmp3_init_memory(), drmp3_uninit() +*/ +drmp3_bool32 drmp3_init(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, const drmp3_config* pConfig, const drmp3_allocation_callbacks* pAllocationCallbacks); + +/* +Initializes an MP3 decoder from a block of memory. + +This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for +the lifetime of the drmp3 object. + +The buffer should contain the contents of the entire MP3 file. +*/ +drmp3_bool32 drmp3_init_memory(drmp3* pMP3, const void* pData, size_t dataSize, const drmp3_config* pConfig, const drmp3_allocation_callbacks* pAllocationCallbacks); + +#ifndef DR_MP3_NO_STDIO +/* +Initializes an MP3 decoder from a file. + +This holds the internal FILE object until drmp3_uninit() is called. Keep this in mind if you're caching drmp3 +objects because the operating system may restrict the number of file handles an application can have open at +any given time. +*/ +drmp3_bool32 drmp3_init_file(drmp3* pMP3, const char* filePath, const drmp3_config* pConfig, const drmp3_allocation_callbacks* pAllocationCallbacks); +#endif + +/* +Uninitializes an MP3 decoder. +*/ +void drmp3_uninit(drmp3* pMP3); + +/* +Reads PCM frames as interleaved 32-bit IEEE floating point PCM. + +Note that framesToRead specifies the number of PCM frames to read, _not_ the number of MP3 frames. +*/ +drmp3_uint64 drmp3_read_pcm_frames_f32(drmp3* pMP3, drmp3_uint64 framesToRead, float* pBufferOut); + +/* +Reads PCM frames as interleaved signed 16-bit integer PCM. + +Note that framesToRead specifies the number of PCM frames to read, _not_ the number of MP3 frames. +*/ +drmp3_uint64 drmp3_read_pcm_frames_s16(drmp3* pMP3, drmp3_uint64 framesToRead, drmp3_int16* pBufferOut); + +/* +Seeks to a specific frame. + +Note that this is _not_ an MP3 frame, but rather a PCM frame. +*/ +drmp3_bool32 drmp3_seek_to_pcm_frame(drmp3* pMP3, drmp3_uint64 frameIndex); + +/* +Calculates the total number of PCM frames in the MP3 stream. Cannot be used for infinite streams such as internet +radio. Runs in linear time. Returns 0 on error. +*/ +drmp3_uint64 drmp3_get_pcm_frame_count(drmp3* pMP3); + +/* +Calculates the total number of MP3 frames in the MP3 stream. Cannot be used for infinite streams such as internet +radio. Runs in linear time. Returns 0 on error. +*/ +drmp3_uint64 drmp3_get_mp3_frame_count(drmp3* pMP3); + +/* +Calculates the total number of MP3 and PCM frames in the MP3 stream. Cannot be used for infinite streams such as internet +radio. Runs in linear time. Returns 0 on error. + +This is equivalent to calling drmp3_get_mp3_frame_count() and drmp3_get_pcm_frame_count() except that it's more efficient. +*/ +drmp3_bool32 drmp3_get_mp3_and_pcm_frame_count(drmp3* pMP3, drmp3_uint64* pMP3FrameCount, drmp3_uint64* pPCMFrameCount); + +/* +Calculates the seekpoints based on PCM frames. This is slow. + +pSeekpoint count is a pointer to a uint32 containing the seekpoint count. On input it contains the desired count. +On output it contains the actual count. The reason for this design is that the client may request too many +seekpoints, in which case dr_mp3 will return a corrected count. + +Note that seektable seeking is not quite sample exact when the MP3 stream contains inconsistent sample rates. +*/ +drmp3_bool32 drmp3_calculate_seek_points(drmp3* pMP3, drmp3_uint32* pSeekPointCount, drmp3_seek_point* pSeekPoints); + +/* +Binds a seek table to the decoder. + +This does _not_ make a copy of pSeekPoints - it only references it. It is up to the application to ensure this +remains valid while it is bound to the decoder. + +Use drmp3_calculate_seek_points() to calculate the seek points. +*/ +drmp3_bool32 drmp3_bind_seek_table(drmp3* pMP3, drmp3_uint32 seekPointCount, drmp3_seek_point* pSeekPoints); + + +/* +Opens an decodes an entire MP3 stream as a single operation. + +pConfig is both an input and output. On input it contains what you want. On output it contains what you got. + +Free the returned pointer with drmp3_free(). +*/ +float* drmp3_open_and_read_pcm_frames_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks); +drmp3_int16* drmp3_open_and_read_pcm_frames_s16(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks); + +float* drmp3_open_memory_and_read_pcm_frames_f32(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks); +drmp3_int16* drmp3_open_memory_and_read_pcm_frames_s16(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks); + +#ifndef DR_MP3_NO_STDIO +float* drmp3_open_file_and_read_pcm_frames_f32(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks); +drmp3_int16* drmp3_open_file_and_read_pcm_frames_s16(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks); +#endif + +/* +Frees any memory that was allocated by a public drmp3 API. +*/ +void drmp3_free(void* p, const drmp3_allocation_callbacks* pAllocationCallbacks); + +#ifdef __cplusplus +} +#endif +#endif /* dr_mp3_h */ + + +/************************************************************************************************************************************************************ + ************************************************************************************************************************************************************ + + IMPLEMENTATION + + ************************************************************************************************************************************************************ + ************************************************************************************************************************************************************/ +#ifdef DR_MP3_IMPLEMENTATION +#include <stdlib.h> +#include <string.h> +#include <limits.h> /* For INT_MAX */ + +/* Disable SIMD when compiling with TCC for now. */ +#if defined(__TINYC__) +#define DR_MP3_NO_SIMD +#endif + +#define DRMP3_OFFSET_PTR(p, offset) ((void*)((drmp3_uint8*)(p) + (offset))) + +#define DRMP3_MAX_FREE_FORMAT_FRAME_SIZE 2304 /* more than ISO spec's */ +#ifndef DRMP3_MAX_FRAME_SYNC_MATCHES +#define DRMP3_MAX_FRAME_SYNC_MATCHES 10 +#endif + +#define DRMP3_MAX_L3_FRAME_PAYLOAD_BYTES DRMP3_MAX_FREE_FORMAT_FRAME_SIZE /* MUST be >= 320000/8/32000*1152 = 1440 */ + +#define DRMP3_MAX_BITRESERVOIR_BYTES 511 +#define DRMP3_SHORT_BLOCK_TYPE 2 +#define DRMP3_STOP_BLOCK_TYPE 3 +#define DRMP3_MODE_MONO 3 +#define DRMP3_MODE_JOINT_STEREO 1 +#define DRMP3_HDR_SIZE 4 +#define DRMP3_HDR_IS_MONO(h) (((h[3]) & 0xC0) == 0xC0) +#define DRMP3_HDR_IS_MS_STEREO(h) (((h[3]) & 0xE0) == 0x60) +#define DRMP3_HDR_IS_FREE_FORMAT(h) (((h[2]) & 0xF0) == 0) +#define DRMP3_HDR_IS_CRC(h) (!((h[1]) & 1)) +#define DRMP3_HDR_TEST_PADDING(h) ((h[2]) & 0x2) +#define DRMP3_HDR_TEST_MPEG1(h) ((h[1]) & 0x8) +#define DRMP3_HDR_TEST_NOT_MPEG25(h) ((h[1]) & 0x10) +#define DRMP3_HDR_TEST_I_STEREO(h) ((h[3]) & 0x10) +#define DRMP3_HDR_TEST_MS_STEREO(h) ((h[3]) & 0x20) +#define DRMP3_HDR_GET_STEREO_MODE(h) (((h[3]) >> 6) & 3) +#define DRMP3_HDR_GET_STEREO_MODE_EXT(h) (((h[3]) >> 4) & 3) +#define DRMP3_HDR_GET_LAYER(h) (((h[1]) >> 1) & 3) +#define DRMP3_HDR_GET_BITRATE(h) ((h[2]) >> 4) +#define DRMP3_HDR_GET_SAMPLE_RATE(h) (((h[2]) >> 2) & 3) +#define DRMP3_HDR_GET_MY_SAMPLE_RATE(h) (DRMP3_HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3) +#define DRMP3_HDR_IS_FRAME_576(h) ((h[1] & 14) == 2) +#define DRMP3_HDR_IS_LAYER_1(h) ((h[1] & 6) == 6) + +#define DRMP3_BITS_DEQUANTIZER_OUT -1 +#define DRMP3_MAX_SCF (255 + DRMP3_BITS_DEQUANTIZER_OUT*4 - 210) +#define DRMP3_MAX_SCFI ((DRMP3_MAX_SCF + 3) & ~3) + +#define DRMP3_MIN(a, b) ((a) > (b) ? (b) : (a)) +#define DRMP3_MAX(a, b) ((a) < (b) ? (b) : (a)) + +#if !defined(DR_MP3_NO_SIMD) + +#if !defined(DR_MP3_ONLY_SIMD) && (defined(_M_X64) || defined(_M_ARM64) || defined(__x86_64__) || defined(__aarch64__)) +/* x64 always have SSE2, arm64 always have neon, no need for generic code */ +#define DR_MP3_ONLY_SIMD +#endif + +#if ((defined(_MSC_VER) && _MSC_VER >= 1400) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__)) +#if defined(_MSC_VER) +#include <intrin.h> +#endif +#include <emmintrin.h> +#define DRMP3_HAVE_SSE 1 +#define DRMP3_HAVE_SIMD 1 +#define DRMP3_VSTORE _mm_storeu_ps +#define DRMP3_VLD _mm_loadu_ps +#define DRMP3_VSET _mm_set1_ps +#define DRMP3_VADD _mm_add_ps +#define DRMP3_VSUB _mm_sub_ps +#define DRMP3_VMUL _mm_mul_ps +#define DRMP3_VMAC(a, x, y) _mm_add_ps(a, _mm_mul_ps(x, y)) +#define DRMP3_VMSB(a, x, y) _mm_sub_ps(a, _mm_mul_ps(x, y)) +#define DRMP3_VMUL_S(x, s) _mm_mul_ps(x, _mm_set1_ps(s)) +#define DRMP3_VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3)) +typedef __m128 drmp3_f4; +#if defined(_MSC_VER) || defined(DR_MP3_ONLY_SIMD) +#define drmp3_cpuid __cpuid +#else +static __inline__ __attribute__((always_inline)) void drmp3_cpuid(int CPUInfo[], const int InfoType) +{ +#if defined(__PIC__) + __asm__ __volatile__( +#if defined(__x86_64__) + "push %%rbx\n" + "cpuid\n" + "xchgl %%ebx, %1\n" + "pop %%rbx\n" +#else + "xchgl %%ebx, %1\n" + "cpuid\n" + "xchgl %%ebx, %1\n" +#endif + : "=a" (CPUInfo[0]), "=r" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) + : "a" (InfoType)); +#else + __asm__ __volatile__( + "cpuid" + : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) + : "a" (InfoType)); +#endif +} +#endif +static int drmp3_have_simd() +{ +#ifdef DR_MP3_ONLY_SIMD + return 1; +#else + static int g_have_simd; + int CPUInfo[4]; +#ifdef MINIMP3_TEST + static int g_counter; + if (g_counter++ > 100) + return 0; +#endif + if (g_have_simd) + goto end; + drmp3_cpuid(CPUInfo, 0); + if (CPUInfo[0] > 0) + { + drmp3_cpuid(CPUInfo, 1); + g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */ + return g_have_simd - 1; + } + +end: + return g_have_simd - 1; +#endif +} +#elif defined(__ARM_NEON) || defined(__aarch64__) +#include <arm_neon.h> +#define DRMP3_HAVE_SSE 0 +#define DRMP3_HAVE_SIMD 1 +#define DRMP3_VSTORE vst1q_f32 +#define DRMP3_VLD vld1q_f32 +#define DRMP3_VSET vmovq_n_f32 +#define DRMP3_VADD vaddq_f32 +#define DRMP3_VSUB vsubq_f32 +#define DRMP3_VMUL vmulq_f32 +#define DRMP3_VMAC(a, x, y) vmlaq_f32(a, x, y) +#define DRMP3_VMSB(a, x, y) vmlsq_f32(a, x, y) +#define DRMP3_VMUL_S(x, s) vmulq_f32(x, vmovq_n_f32(s)) +#define DRMP3_VREV(x) vcombine_f32(vget_high_f32(vrev64q_f32(x)), vget_low_f32(vrev64q_f32(x))) +typedef float32x4_t drmp3_f4; +static int drmp3_have_simd() +{ /* TODO: detect neon for !DR_MP3_ONLY_SIMD */ + return 1; +} +#else +#define DRMP3_HAVE_SSE 0 +#define DRMP3_HAVE_SIMD 0 +#ifdef DR_MP3_ONLY_SIMD +#error DR_MP3_ONLY_SIMD used, but SSE/NEON not enabled +#endif +#endif + +#else + +#define DRMP3_HAVE_SIMD 0 + +#endif + +typedef struct +{ + const drmp3_uint8 *buf; + int pos, limit; +} drmp3_bs; + +typedef struct +{ + float scf[3*64]; + drmp3_uint8 total_bands, stereo_bands, bitalloc[64], scfcod[64]; +} drmp3_L12_scale_info; + +typedef struct +{ + drmp3_uint8 tab_offset, code_tab_width, band_count; +} drmp3_L12_subband_alloc; + +typedef struct +{ + const drmp3_uint8 *sfbtab; + drmp3_uint16 part_23_length, big_values, scalefac_compress; + drmp3_uint8 global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb; + drmp3_uint8 table_select[3], region_count[3], subblock_gain[3]; + drmp3_uint8 preflag, scalefac_scale, count1_table, scfsi; +} drmp3_L3_gr_info; + +typedef struct +{ + drmp3_bs bs; + drmp3_uint8 maindata[DRMP3_MAX_BITRESERVOIR_BYTES + DRMP3_MAX_L3_FRAME_PAYLOAD_BYTES]; + drmp3_L3_gr_info gr_info[4]; + float grbuf[2][576], scf[40], syn[18 + 15][2*32]; + drmp3_uint8 ist_pos[2][39]; +} drmp3dec_scratch; + +static void drmp3_bs_init(drmp3_bs *bs, const drmp3_uint8 *data, int bytes) +{ + bs->buf = data; + bs->pos = 0; + bs->limit = bytes*8; +} + +static drmp3_uint32 drmp3_bs_get_bits(drmp3_bs *bs, int n) +{ + drmp3_uint32 next, cache = 0, s = bs->pos & 7; + int shl = n + s; + const drmp3_uint8 *p = bs->buf + (bs->pos >> 3); + if ((bs->pos += n) > bs->limit) + return 0; + next = *p++ & (255 >> s); + while ((shl -= 8) > 0) + { + cache |= next << shl; + next = *p++; + } + return cache | (next >> -shl); +} + +static int drmp3_hdr_valid(const drmp3_uint8 *h) +{ + return h[0] == 0xff && + ((h[1] & 0xF0) == 0xf0 || (h[1] & 0xFE) == 0xe2) && + (DRMP3_HDR_GET_LAYER(h) != 0) && + (DRMP3_HDR_GET_BITRATE(h) != 15) && + (DRMP3_HDR_GET_SAMPLE_RATE(h) != 3); +} + +static int drmp3_hdr_compare(const drmp3_uint8 *h1, const drmp3_uint8 *h2) +{ + return drmp3_hdr_valid(h2) && + ((h1[1] ^ h2[1]) & 0xFE) == 0 && + ((h1[2] ^ h2[2]) & 0x0C) == 0 && + !(DRMP3_HDR_IS_FREE_FORMAT(h1) ^ DRMP3_HDR_IS_FREE_FORMAT(h2)); +} + +static unsigned drmp3_hdr_bitrate_kbps(const drmp3_uint8 *h) +{ + static const drmp3_uint8 halfrate[2][3][15] = { + { { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } }, + { { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } }, + }; + return 2*halfrate[!!DRMP3_HDR_TEST_MPEG1(h)][DRMP3_HDR_GET_LAYER(h) - 1][DRMP3_HDR_GET_BITRATE(h)]; +} + +static unsigned drmp3_hdr_sample_rate_hz(const drmp3_uint8 *h) +{ + static const unsigned g_hz[3] = { 44100, 48000, 32000 }; + return g_hz[DRMP3_HDR_GET_SAMPLE_RATE(h)] >> (int)!DRMP3_HDR_TEST_MPEG1(h) >> (int)!DRMP3_HDR_TEST_NOT_MPEG25(h); +} + +static unsigned drmp3_hdr_frame_samples(const drmp3_uint8 *h) +{ + return DRMP3_HDR_IS_LAYER_1(h) ? 384 : (1152 >> (int)DRMP3_HDR_IS_FRAME_576(h)); +} + +static int drmp3_hdr_frame_bytes(const drmp3_uint8 *h, int free_format_size) +{ + int frame_bytes = drmp3_hdr_frame_samples(h)*drmp3_hdr_bitrate_kbps(h)*125/drmp3_hdr_sample_rate_hz(h); + if (DRMP3_HDR_IS_LAYER_1(h)) + { + frame_bytes &= ~3; /* slot align */ + } + return frame_bytes ? frame_bytes : free_format_size; +} + +static int drmp3_hdr_padding(const drmp3_uint8 *h) +{ + return DRMP3_HDR_TEST_PADDING(h) ? (DRMP3_HDR_IS_LAYER_1(h) ? 4 : 1) : 0; +} + +#ifndef DR_MP3_ONLY_MP3 +static const drmp3_L12_subband_alloc *drmp3_L12_subband_alloc_table(const drmp3_uint8 *hdr, drmp3_L12_scale_info *sci) +{ + const drmp3_L12_subband_alloc *alloc; + int mode = DRMP3_HDR_GET_STEREO_MODE(hdr); + int nbands, stereo_bands = (mode == DRMP3_MODE_MONO) ? 0 : (mode == DRMP3_MODE_JOINT_STEREO) ? (DRMP3_HDR_GET_STEREO_MODE_EXT(hdr) << 2) + 4 : 32; + + if (DRMP3_HDR_IS_LAYER_1(hdr)) + { + static const drmp3_L12_subband_alloc g_alloc_L1[] = { { 76, 4, 32 } }; + alloc = g_alloc_L1; + nbands = 32; + } else if (!DRMP3_HDR_TEST_MPEG1(hdr)) + { + static const drmp3_L12_subband_alloc g_alloc_L2M2[] = { { 60, 4, 4 }, { 44, 3, 7 }, { 44, 2, 19 } }; + alloc = g_alloc_L2M2; + nbands = 30; + } else + { + static const drmp3_L12_subband_alloc g_alloc_L2M1[] = { { 0, 4, 3 }, { 16, 4, 8 }, { 32, 3, 12 }, { 40, 2, 7 } }; + int sample_rate_idx = DRMP3_HDR_GET_SAMPLE_RATE(hdr); + unsigned kbps = drmp3_hdr_bitrate_kbps(hdr) >> (int)(mode != DRMP3_MODE_MONO); + if (!kbps) /* free-format */ + { + kbps = 192; + } + + alloc = g_alloc_L2M1; + nbands = 27; + if (kbps < 56) + { + static const drmp3_L12_subband_alloc g_alloc_L2M1_lowrate[] = { { 44, 4, 2 }, { 44, 3, 10 } }; + alloc = g_alloc_L2M1_lowrate; + nbands = sample_rate_idx == 2 ? 12 : 8; + } else if (kbps >= 96 && sample_rate_idx != 1) + { + nbands = 30; + } + } + + sci->total_bands = (drmp3_uint8)nbands; + sci->stereo_bands = (drmp3_uint8)DRMP3_MIN(stereo_bands, nbands); + + return alloc; +} + +static void drmp3_L12_read_scalefactors(drmp3_bs *bs, drmp3_uint8 *pba, drmp3_uint8 *scfcod, int bands, float *scf) +{ + static const float g_deq_L12[18*3] = { +#define DRMP3_DQ(x) 9.53674316e-07f/x, 7.56931807e-07f/x, 6.00777173e-07f/x + DRMP3_DQ(3),DRMP3_DQ(7),DRMP3_DQ(15),DRMP3_DQ(31),DRMP3_DQ(63),DRMP3_DQ(127),DRMP3_DQ(255),DRMP3_DQ(511),DRMP3_DQ(1023),DRMP3_DQ(2047),DRMP3_DQ(4095),DRMP3_DQ(8191),DRMP3_DQ(16383),DRMP3_DQ(32767),DRMP3_DQ(65535),DRMP3_DQ(3),DRMP3_DQ(5),DRMP3_DQ(9) + }; + int i, m; + for (i = 0; i < bands; i++) + { + float s = 0; + int ba = *pba++; + int mask = ba ? 4 + ((19 >> scfcod[i]) & 3) : 0; + for (m = 4; m; m >>= 1) + { + if (mask & m) + { + int b = drmp3_bs_get_bits(bs, 6); + s = g_deq_L12[ba*3 - 6 + b % 3]*(1 << 21 >> b/3); + } + *scf++ = s; + } + } +} + +static void drmp3_L12_read_scale_info(const drmp3_uint8 *hdr, drmp3_bs *bs, drmp3_L12_scale_info *sci) +{ + static const drmp3_uint8 g_bitalloc_code_tab[] = { + 0,17, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16, + 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,16, + 0,17,18, 3,19,4,5,16, + 0,17,18,16, + 0,17,18,19, 4,5,6, 7,8, 9,10,11,12,13,14,15, + 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,14, + 0, 2, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16 + }; + const drmp3_L12_subband_alloc *subband_alloc = drmp3_L12_subband_alloc_table(hdr, sci); + + int i, k = 0, ba_bits = 0; + const drmp3_uint8 *ba_code_tab = g_bitalloc_code_tab; + + for (i = 0; i < sci->total_bands; i++) + { + drmp3_uint8 ba; + if (i == k) + { + k += subband_alloc->band_count; + ba_bits = subband_alloc->code_tab_width; + ba_code_tab = g_bitalloc_code_tab + subband_alloc->tab_offset; + subband_alloc++; + } + ba = ba_code_tab[drmp3_bs_get_bits(bs, ba_bits)]; + sci->bitalloc[2*i] = ba; + if (i < sci->stereo_bands) + { + ba = ba_code_tab[drmp3_bs_get_bits(bs, ba_bits)]; + } + sci->bitalloc[2*i + 1] = sci->stereo_bands ? ba : 0; + } + + for (i = 0; i < 2*sci->total_bands; i++) + { + sci->scfcod[i] = (drmp3_uint8)(sci->bitalloc[i] ? DRMP3_HDR_IS_LAYER_1(hdr) ? 2 : drmp3_bs_get_bits(bs, 2) : 6); + } + + drmp3_L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands*2, sci->scf); + + for (i = sci->stereo_bands; i < sci->total_bands; i++) + { + sci->bitalloc[2*i + 1] = 0; + } +} + +static int drmp3_L12_dequantize_granule(float *grbuf, drmp3_bs *bs, drmp3_L12_scale_info *sci, int group_size) +{ + int i, j, k, choff = 576; + for (j = 0; j < 4; j++) + { + float *dst = grbuf + group_size*j; + for (i = 0; i < 2*sci->total_bands; i++) + { + int ba = sci->bitalloc[i]; + if (ba != 0) + { + if (ba < 17) + { + int half = (1 << (ba - 1)) - 1; + for (k = 0; k < group_size; k++) + { + dst[k] = (float)((int)drmp3_bs_get_bits(bs, ba) - half); + } + } else + { + unsigned mod = (2 << (ba - 17)) + 1; /* 3, 5, 9 */ + unsigned code = drmp3_bs_get_bits(bs, mod + 2 - (mod >> 3)); /* 5, 7, 10 */ + for (k = 0; k < group_size; k++, code /= mod) + { + dst[k] = (float)((int)(code % mod - mod/2)); + } + } + } + dst += choff; + choff = 18 - choff; + } + } + return group_size*4; +} + +static void drmp3_L12_apply_scf_384(drmp3_L12_scale_info *sci, const float *scf, float *dst) +{ + int i, k; + memcpy(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float)); + for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6) + { + for (k = 0; k < 12; k++) + { + dst[k + 0] *= scf[0]; + dst[k + 576] *= scf[3]; + } + } +} +#endif + +static int drmp3_L3_read_side_info(drmp3_bs *bs, drmp3_L3_gr_info *gr, const drmp3_uint8 *hdr) +{ + static const drmp3_uint8 g_scf_long[8][23] = { + { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, + { 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 }, + { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, + { 6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36,0 }, + { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, + { 4,4,4,4,4,4,6,6,8,8,10,12,16,20,24,28,34,42,50,54,76,158,0 }, + { 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 }, + { 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 } + }; + static const drmp3_uint8 g_scf_short[8][40] = { + { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + { 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 }, + { 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 }, + { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 }, + { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 }, + { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 }, + { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 } + }; + static const drmp3_uint8 g_scf_mixed[8][40] = { + { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + { 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 }, + { 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 }, + { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 }, + { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 }, + { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 }, + { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 } + }; + + unsigned tables, scfsi = 0; + int main_data_begin, part_23_sum = 0; + int gr_count = DRMP3_HDR_IS_MONO(hdr) ? 1 : 2; + int sr_idx = DRMP3_HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0); + + if (DRMP3_HDR_TEST_MPEG1(hdr)) + { + gr_count *= 2; + main_data_begin = drmp3_bs_get_bits(bs, 9); + scfsi = drmp3_bs_get_bits(bs, 7 + gr_count); + } else + { + main_data_begin = drmp3_bs_get_bits(bs, 8 + gr_count) >> gr_count; + } + + do + { + if (DRMP3_HDR_IS_MONO(hdr)) + { + scfsi <<= 4; + } + gr->part_23_length = (drmp3_uint16)drmp3_bs_get_bits(bs, 12); + part_23_sum += gr->part_23_length; + gr->big_values = (drmp3_uint16)drmp3_bs_get_bits(bs, 9); + if (gr->big_values > 288) + { + return -1; + } + gr->global_gain = (drmp3_uint8)drmp3_bs_get_bits(bs, 8); + gr->scalefac_compress = (drmp3_uint16)drmp3_bs_get_bits(bs, DRMP3_HDR_TEST_MPEG1(hdr) ? 4 : 9); + gr->sfbtab = g_scf_long[sr_idx]; + gr->n_long_sfb = 22; + gr->n_short_sfb = 0; + if (drmp3_bs_get_bits(bs, 1)) + { + gr->block_type = (drmp3_uint8)drmp3_bs_get_bits(bs, 2); + if (!gr->block_type) + { + return -1; + } + gr->mixed_block_flag = (drmp3_uint8)drmp3_bs_get_bits(bs, 1); + gr->region_count[0] = 7; + gr->region_count[1] = 255; + if (gr->block_type == DRMP3_SHORT_BLOCK_TYPE) + { + scfsi &= 0x0F0F; + if (!gr->mixed_block_flag) + { + gr->region_count[0] = 8; + gr->sfbtab = g_scf_short[sr_idx]; + gr->n_long_sfb = 0; + gr->n_short_sfb = 39; + } else + { + gr->sfbtab = g_scf_mixed[sr_idx]; + gr->n_long_sfb = DRMP3_HDR_TEST_MPEG1(hdr) ? 8 : 6; + gr->n_short_sfb = 30; + } + } + tables = drmp3_bs_get_bits(bs, 10); + tables <<= 5; + gr->subblock_gain[0] = (drmp3_uint8)drmp3_bs_get_bits(bs, 3); + gr->subblock_gain[1] = (drmp3_uint8)drmp3_bs_get_bits(bs, 3); + gr->subblock_gain[2] = (drmp3_uint8)drmp3_bs_get_bits(bs, 3); + } else + { + gr->block_type = 0; + gr->mixed_block_flag = 0; + tables = drmp3_bs_get_bits(bs, 15); + gr->region_count[0] = (drmp3_uint8)drmp3_bs_get_bits(bs, 4); + gr->region_count[1] = (drmp3_uint8)drmp3_bs_get_bits(bs, 3); + gr->region_count[2] = 255; + } + gr->table_select[0] = (drmp3_uint8)(tables >> 10); + gr->table_select[1] = (drmp3_uint8)((tables >> 5) & 31); + gr->table_select[2] = (drmp3_uint8)((tables) & 31); + gr->preflag = (drmp3_uint8)(DRMP3_HDR_TEST_MPEG1(hdr) ? drmp3_bs_get_bits(bs, 1) : (gr->scalefac_compress >= 500)); + gr->scalefac_scale = (drmp3_uint8)drmp3_bs_get_bits(bs, 1); + gr->count1_table = (drmp3_uint8)drmp3_bs_get_bits(bs, 1); + gr->scfsi = (drmp3_uint8)((scfsi >> 12) & 15); + scfsi <<= 4; + gr++; + } while(--gr_count); + + if (part_23_sum + bs->pos > bs->limit + main_data_begin*8) + { + return -1; + } + + return main_data_begin; +} + +static void drmp3_L3_read_scalefactors(drmp3_uint8 *scf, drmp3_uint8 *ist_pos, const drmp3_uint8 *scf_size, const drmp3_uint8 *scf_count, drmp3_bs *bitbuf, int scfsi) +{ + int i, k; + for (i = 0; i < 4 && scf_count[i]; i++, scfsi *= 2) + { + int cnt = scf_count[i]; + if (scfsi & 8) + { + memcpy(scf, ist_pos, cnt); + } else + { + int bits = scf_size[i]; + if (!bits) + { + memset(scf, 0, cnt); + memset(ist_pos, 0, cnt); + } else + { + int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1; + for (k = 0; k < cnt; k++) + { + int s = drmp3_bs_get_bits(bitbuf, bits); + ist_pos[k] = (drmp3_uint8)(s == max_scf ? -1 : s); + scf[k] = (drmp3_uint8)s; + } + } + } + ist_pos += cnt; + scf += cnt; + } + scf[0] = scf[1] = scf[2] = 0; +} + +static float drmp3_L3_ldexp_q2(float y, int exp_q2) +{ + static const float g_expfrac[4] = { 9.31322575e-10f,7.83145814e-10f,6.58544508e-10f,5.53767716e-10f }; + int e; + do + { + e = DRMP3_MIN(30*4, exp_q2); + y *= g_expfrac[e & 3]*(1 << 30 >> (e >> 2)); + } while ((exp_q2 -= e) > 0); + return y; +} + +static void drmp3_L3_decode_scalefactors(const drmp3_uint8 *hdr, drmp3_uint8 *ist_pos, drmp3_bs *bs, const drmp3_L3_gr_info *gr, float *scf, int ch) +{ + static const drmp3_uint8 g_scf_partitions[3][28] = { + { 6,5,5, 5,6,5,5,5,6,5, 7,3,11,10,0,0, 7, 7, 7,0, 6, 6,6,3, 8, 8,5,0 }, + { 8,9,6,12,6,9,9,9,6,9,12,6,15,18,0,0, 6,15,12,0, 6,12,9,6, 6,18,9,0 }, + { 9,9,6,12,9,9,9,9,9,9,12,6,18,18,0,0,12,12,12,0,12, 9,9,6,15,12,9,0 } + }; + const drmp3_uint8 *scf_partition = g_scf_partitions[!!gr->n_short_sfb + !gr->n_long_sfb]; + drmp3_uint8 scf_size[4], iscf[40]; + int i, scf_shift = gr->scalefac_scale + 1, gain_exp, scfsi = gr->scfsi; + float gain; + + if (DRMP3_HDR_TEST_MPEG1(hdr)) + { + static const drmp3_uint8 g_scfc_decode[16] = { 0,1,2,3, 12,5,6,7, 9,10,11,13, 14,15,18,19 }; + int part = g_scfc_decode[gr->scalefac_compress]; + scf_size[1] = scf_size[0] = (drmp3_uint8)(part >> 2); + scf_size[3] = scf_size[2] = (drmp3_uint8)(part & 3); + } else + { + static const drmp3_uint8 g_mod[6*4] = { 5,5,4,4,5,5,4,1,4,3,1,1,5,6,6,1,4,4,4,1,4,3,1,1 }; + int k, modprod, sfc, ist = DRMP3_HDR_TEST_I_STEREO(hdr) && ch; + sfc = gr->scalefac_compress >> ist; + for (k = ist*3*4; sfc >= 0; sfc -= modprod, k += 4) + { + for (modprod = 1, i = 3; i >= 0; i--) + { + scf_size[i] = (drmp3_uint8)(sfc / modprod % g_mod[k + i]); + modprod *= g_mod[k + i]; + } + } + scf_partition += k; + scfsi = -16; + } + drmp3_L3_read_scalefactors(iscf, ist_pos, scf_size, scf_partition, bs, scfsi); + + if (gr->n_short_sfb) + { + int sh = 3 - scf_shift; + for (i = 0; i < gr->n_short_sfb; i += 3) + { + iscf[gr->n_long_sfb + i + 0] += gr->subblock_gain[0] << sh; + iscf[gr->n_long_sfb + i + 1] += gr->subblock_gain[1] << sh; + iscf[gr->n_long_sfb + i + 2] += gr->subblock_gain[2] << sh; + } + } else if (gr->preflag) + { + static const drmp3_uint8 g_preamp[10] = { 1,1,1,1,2,2,3,3,3,2 }; + for (i = 0; i < 10; i++) + { + iscf[11 + i] += g_preamp[i]; + } + } + + gain_exp = gr->global_gain + DRMP3_BITS_DEQUANTIZER_OUT*4 - 210 - (DRMP3_HDR_IS_MS_STEREO(hdr) ? 2 : 0); + gain = drmp3_L3_ldexp_q2(1 << (DRMP3_MAX_SCFI/4), DRMP3_MAX_SCFI - gain_exp); + for (i = 0; i < (int)(gr->n_long_sfb + gr->n_short_sfb); i++) + { + scf[i] = drmp3_L3_ldexp_q2(gain, iscf[i] << scf_shift); + } +} + +static const float g_drmp3_pow43[129 + 16] = { + 0,-1,-2.519842f,-4.326749f,-6.349604f,-8.549880f,-10.902724f,-13.390518f,-16.000000f,-18.720754f,-21.544347f,-24.463781f,-27.473142f,-30.567351f,-33.741992f,-36.993181f, + 0,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f +}; + +static float drmp3_L3_pow_43(int x) +{ + float frac; + int sign, mult = 256; + + if (x < 129) + { + return g_drmp3_pow43[16 + x]; + } + + if (x < 1024) + { + mult = 16; + x <<= 3; + } + + sign = 2*x & 64; + frac = (float)((x & 63) - sign) / ((x & ~63) + sign); + return g_drmp3_pow43[16 + ((x + sign) >> 6)]*(1.f + frac*((4.f/3) + frac*(2.f/9)))*mult; +} + +static void drmp3_L3_huffman(float *dst, drmp3_bs *bs, const drmp3_L3_gr_info *gr_info, const float *scf, int layer3gr_limit) +{ + static const drmp3_int16 tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256, + -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288, + -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288, + -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258, + -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259, + -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258, + -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258, + -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259, + -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258, + -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290, + -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259, + -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258, + -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259, + -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258, + -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 }; + static const drmp3_uint8 tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205}; + static const drmp3_uint8 tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 }; + static const drmp3_int16 tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 }; + static const drmp3_uint8 g_linbits[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 }; + +#define DRMP3_PEEK_BITS(n) (bs_cache >> (32 - n)) +#define DRMP3_FLUSH_BITS(n) { bs_cache <<= (n); bs_sh += (n); } +#define DRMP3_CHECK_BITS while (bs_sh >= 0) { bs_cache |= (drmp3_uint32)*bs_next_ptr++ << bs_sh; bs_sh -= 8; } +#define DRMP3_BSPOS ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh) + + float one = 0.0f; + int ireg = 0, big_val_cnt = gr_info->big_values; + const drmp3_uint8 *sfb = gr_info->sfbtab; + const drmp3_uint8 *bs_next_ptr = bs->buf + bs->pos/8; + drmp3_uint32 bs_cache = (((bs_next_ptr[0]*256u + bs_next_ptr[1])*256u + bs_next_ptr[2])*256u + bs_next_ptr[3]) << (bs->pos & 7); + int pairs_to_decode, np, bs_sh = (bs->pos & 7) - 8; + bs_next_ptr += 4; + + while (big_val_cnt > 0) + { + int tab_num = gr_info->table_select[ireg]; + int sfb_cnt = gr_info->region_count[ireg++]; + const drmp3_int16 *codebook = tabs + tabindex[tab_num]; + int linbits = g_linbits[tab_num]; + if (linbits) + { + do + { + np = *sfb++ / 2; + pairs_to_decode = DRMP3_MIN(big_val_cnt, np); + one = *scf++; + do + { + int j, w = 5; + int leaf = codebook[DRMP3_PEEK_BITS(w)]; + while (leaf < 0) + { + DRMP3_FLUSH_BITS(w); + w = leaf & 7; + leaf = codebook[DRMP3_PEEK_BITS(w) - (leaf >> 3)]; + } + DRMP3_FLUSH_BITS(leaf >> 8); + + for (j = 0; j < 2; j++, dst++, leaf >>= 4) + { + int lsb = leaf & 0x0F; + if (lsb == 15) + { + lsb += DRMP3_PEEK_BITS(linbits); + DRMP3_FLUSH_BITS(linbits); + DRMP3_CHECK_BITS; + *dst = one*drmp3_L3_pow_43(lsb)*((drmp3_int32)bs_cache < 0 ? -1: 1); + } else + { + *dst = g_drmp3_pow43[16 + lsb - 16*(bs_cache >> 31)]*one; + } + DRMP3_FLUSH_BITS(lsb ? 1 : 0); + } + DRMP3_CHECK_BITS; + } while (--pairs_to_decode); + } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0); + } else + { + do + { + np = *sfb++ / 2; + pairs_to_decode = DRMP3_MIN(big_val_cnt, np); + one = *scf++; + do + { + int j, w = 5; + int leaf = codebook[DRMP3_PEEK_BITS(w)]; + while (leaf < 0) + { + DRMP3_FLUSH_BITS(w); + w = leaf & 7; + leaf = codebook[DRMP3_PEEK_BITS(w) - (leaf >> 3)]; + } + DRMP3_FLUSH_BITS(leaf >> 8); + + for (j = 0; j < 2; j++, dst++, leaf >>= 4) + { + int lsb = leaf & 0x0F; + *dst = g_drmp3_pow43[16 + lsb - 16*(bs_cache >> 31)]*one; + DRMP3_FLUSH_BITS(lsb ? 1 : 0); + } + DRMP3_CHECK_BITS; + } while (--pairs_to_decode); + } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0); + } + } + + for (np = 1 - big_val_cnt;; dst += 4) + { + const drmp3_uint8 *codebook_count1 = (gr_info->count1_table) ? tab33 : tab32; + int leaf = codebook_count1[DRMP3_PEEK_BITS(4)]; + if (!(leaf & 8)) + { + leaf = codebook_count1[(leaf >> 3) + (bs_cache << 4 >> (32 - (leaf & 3)))]; + } + DRMP3_FLUSH_BITS(leaf & 7); + if (DRMP3_BSPOS > layer3gr_limit) + { + break; + } +#define DRMP3_RELOAD_SCALEFACTOR if (!--np) { np = *sfb++/2; if (!np) break; one = *scf++; } +#define DRMP3_DEQ_COUNT1(s) if (leaf & (128 >> s)) { dst[s] = ((drmp3_int32)bs_cache < 0) ? -one : one; DRMP3_FLUSH_BITS(1) } + DRMP3_RELOAD_SCALEFACTOR; + DRMP3_DEQ_COUNT1(0); + DRMP3_DEQ_COUNT1(1); + DRMP3_RELOAD_SCALEFACTOR; + DRMP3_DEQ_COUNT1(2); + DRMP3_DEQ_COUNT1(3); + DRMP3_CHECK_BITS; + } + + bs->pos = layer3gr_limit; +} + +static void drmp3_L3_midside_stereo(float *left, int n) +{ + int i = 0; + float *right = left + 576; +#if DRMP3_HAVE_SIMD + if (drmp3_have_simd()) for (; i < n - 3; i += 4) + { + drmp3_f4 vl = DRMP3_VLD(left + i); + drmp3_f4 vr = DRMP3_VLD(right + i); + DRMP3_VSTORE(left + i, DRMP3_VADD(vl, vr)); + DRMP3_VSTORE(right + i, DRMP3_VSUB(vl, vr)); + } +#endif + for (; i < n; i++) + { + float a = left[i]; + float b = right[i]; + left[i] = a + b; + right[i] = a - b; + } +} + +static void drmp3_L3_intensity_stereo_band(float *left, int n, float kl, float kr) +{ + int i; + for (i = 0; i < n; i++) + { + left[i + 576] = left[i]*kr; + left[i] = left[i]*kl; + } +} + +static void drmp3_L3_stereo_top_band(const float *right, const drmp3_uint8 *sfb, int nbands, int max_band[3]) +{ + int i, k; + + max_band[0] = max_band[1] = max_band[2] = -1; + + for (i = 0; i < nbands; i++) + { + for (k = 0; k < sfb[i]; k += 2) + { + if (right[k] != 0 || right[k + 1] != 0) + { + max_band[i % 3] = i; + break; + } + } + right += sfb[i]; + } +} + +static void drmp3_L3_stereo_process(float *left, const drmp3_uint8 *ist_pos, const drmp3_uint8 *sfb, const drmp3_uint8 *hdr, int max_band[3], int mpeg2_sh) +{ + static const float g_pan[7*2] = { 0,1,0.21132487f,0.78867513f,0.36602540f,0.63397460f,0.5f,0.5f,0.63397460f,0.36602540f,0.78867513f,0.21132487f,1,0 }; + unsigned i, max_pos = DRMP3_HDR_TEST_MPEG1(hdr) ? 7 : 64; + + for (i = 0; sfb[i]; i++) + { + unsigned ipos = ist_pos[i]; + if ((int)i > max_band[i % 3] && ipos < max_pos) + { + float kl, kr, s = DRMP3_HDR_TEST_MS_STEREO(hdr) ? 1.41421356f : 1; + if (DRMP3_HDR_TEST_MPEG1(hdr)) + { + kl = g_pan[2*ipos]; + kr = g_pan[2*ipos + 1]; + } else + { + kl = 1; + kr = drmp3_L3_ldexp_q2(1, (ipos + 1) >> 1 << mpeg2_sh); + if (ipos & 1) + { + kl = kr; + kr = 1; + } + } + drmp3_L3_intensity_stereo_band(left, sfb[i], kl*s, kr*s); + } else if (DRMP3_HDR_TEST_MS_STEREO(hdr)) + { + drmp3_L3_midside_stereo(left, sfb[i]); + } + left += sfb[i]; + } +} + +static void drmp3_L3_intensity_stereo(float *left, drmp3_uint8 *ist_pos, const drmp3_L3_gr_info *gr, const drmp3_uint8 *hdr) +{ + int max_band[3], n_sfb = gr->n_long_sfb + gr->n_short_sfb; + int i, max_blocks = gr->n_short_sfb ? 3 : 1; + + drmp3_L3_stereo_top_band(left + 576, gr->sfbtab, n_sfb, max_band); + if (gr->n_long_sfb) + { + max_band[0] = max_band[1] = max_band[2] = DRMP3_MAX(DRMP3_MAX(max_band[0], max_band[1]), max_band[2]); + } + for (i = 0; i < max_blocks; i++) + { + int default_pos = DRMP3_HDR_TEST_MPEG1(hdr) ? 3 : 0; + int itop = n_sfb - max_blocks + i; + int prev = itop - max_blocks; + ist_pos[itop] = (drmp3_uint8)(max_band[i] >= prev ? default_pos : ist_pos[prev]); + } + drmp3_L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1); +} + +static void drmp3_L3_reorder(float *grbuf, float *scratch, const drmp3_uint8 *sfb) +{ + int i, len; + float *src = grbuf, *dst = scratch; + + for (;0 != (len = *sfb); sfb += 3, src += 2*len) + { + for (i = 0; i < len; i++, src++) + { + *dst++ = src[0*len]; + *dst++ = src[1*len]; + *dst++ = src[2*len]; + } + } + memcpy(grbuf, scratch, (dst - scratch)*sizeof(float)); +} + +static void drmp3_L3_antialias(float *grbuf, int nbands) +{ + static const float g_aa[2][8] = { + {0.85749293f,0.88174200f,0.94962865f,0.98331459f,0.99551782f,0.99916056f,0.99989920f,0.99999316f}, + {0.51449576f,0.47173197f,0.31337745f,0.18191320f,0.09457419f,0.04096558f,0.01419856f,0.00369997f} + }; + + for (; nbands > 0; nbands--, grbuf += 18) + { + int i = 0; +#if DRMP3_HAVE_SIMD + if (drmp3_have_simd()) for (; i < 8; i += 4) + { + drmp3_f4 vu = DRMP3_VLD(grbuf + 18 + i); + drmp3_f4 vd = DRMP3_VLD(grbuf + 14 - i); + drmp3_f4 vc0 = DRMP3_VLD(g_aa[0] + i); + drmp3_f4 vc1 = DRMP3_VLD(g_aa[1] + i); + vd = DRMP3_VREV(vd); + DRMP3_VSTORE(grbuf + 18 + i, DRMP3_VSUB(DRMP3_VMUL(vu, vc0), DRMP3_VMUL(vd, vc1))); + vd = DRMP3_VADD(DRMP3_VMUL(vu, vc1), DRMP3_VMUL(vd, vc0)); + DRMP3_VSTORE(grbuf + 14 - i, DRMP3_VREV(vd)); + } +#endif +#ifndef DR_MP3_ONLY_SIMD + for(; i < 8; i++) + { + float u = grbuf[18 + i]; + float d = grbuf[17 - i]; + grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i]; + grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i]; + } +#endif + } +} + +static void drmp3_L3_dct3_9(float *y) +{ + float s0, s1, s2, s3, s4, s5, s6, s7, s8, t0, t2, t4; + + s0 = y[0]; s2 = y[2]; s4 = y[4]; s6 = y[6]; s8 = y[8]; + t0 = s0 + s6*0.5f; + s0 -= s6; + t4 = (s4 + s2)*0.93969262f; + t2 = (s8 + s2)*0.76604444f; + s6 = (s4 - s8)*0.17364818f; + s4 += s8 - s2; + + s2 = s0 - s4*0.5f; + y[4] = s4 + s0; + s8 = t0 - t2 + s6; + s0 = t0 - t4 + t2; + s4 = t0 + t4 - s6; + + s1 = y[1]; s3 = y[3]; s5 = y[5]; s7 = y[7]; + + s3 *= 0.86602540f; + t0 = (s5 + s1)*0.98480775f; + t4 = (s5 - s7)*0.34202014f; + t2 = (s1 + s7)*0.64278761f; + s1 = (s1 - s5 - s7)*0.86602540f; + + s5 = t0 - s3 - t2; + s7 = t4 - s3 - t0; + s3 = t4 + s3 - t2; + + y[0] = s4 - s7; + y[1] = s2 + s1; + y[2] = s0 - s3; + y[3] = s8 + s5; + y[5] = s8 - s5; + y[6] = s0 + s3; + y[7] = s2 - s1; + y[8] = s4 + s7; +} + +static void drmp3_L3_imdct36(float *grbuf, float *overlap, const float *window, int nbands) +{ + int i, j; + static const float g_twid9[18] = { + 0.73727734f,0.79335334f,0.84339145f,0.88701083f,0.92387953f,0.95371695f,0.97629601f,0.99144486f,0.99904822f,0.67559021f,0.60876143f,0.53729961f,0.46174861f,0.38268343f,0.30070580f,0.21643961f,0.13052619f,0.04361938f + }; + + for (j = 0; j < nbands; j++, grbuf += 18, overlap += 9) + { + float co[9], si[9]; + co[0] = -grbuf[0]; + si[0] = grbuf[17]; + for (i = 0; i < 4; i++) + { + si[8 - 2*i] = grbuf[4*i + 1] - grbuf[4*i + 2]; + co[1 + 2*i] = grbuf[4*i + 1] + grbuf[4*i + 2]; + si[7 - 2*i] = grbuf[4*i + 4] - grbuf[4*i + 3]; + co[2 + 2*i] = -(grbuf[4*i + 3] + grbuf[4*i + 4]); + } + drmp3_L3_dct3_9(co); + drmp3_L3_dct3_9(si); + + si[1] = -si[1]; + si[3] = -si[3]; + si[5] = -si[5]; + si[7] = -si[7]; + + i = 0; + +#if DRMP3_HAVE_SIMD + if (drmp3_have_simd()) for (; i < 8; i += 4) + { + drmp3_f4 vovl = DRMP3_VLD(overlap + i); + drmp3_f4 vc = DRMP3_VLD(co + i); + drmp3_f4 vs = DRMP3_VLD(si + i); + drmp3_f4 vr0 = DRMP3_VLD(g_twid9 + i); + drmp3_f4 vr1 = DRMP3_VLD(g_twid9 + 9 + i); + drmp3_f4 vw0 = DRMP3_VLD(window + i); + drmp3_f4 vw1 = DRMP3_VLD(window + 9 + i); + drmp3_f4 vsum = DRMP3_VADD(DRMP3_VMUL(vc, vr1), DRMP3_VMUL(vs, vr0)); + DRMP3_VSTORE(overlap + i, DRMP3_VSUB(DRMP3_VMUL(vc, vr0), DRMP3_VMUL(vs, vr1))); + DRMP3_VSTORE(grbuf + i, DRMP3_VSUB(DRMP3_VMUL(vovl, vw0), DRMP3_VMUL(vsum, vw1))); + vsum = DRMP3_VADD(DRMP3_VMUL(vovl, vw1), DRMP3_VMUL(vsum, vw0)); + DRMP3_VSTORE(grbuf + 14 - i, DRMP3_VREV(vsum)); + } +#endif + for (; i < 9; i++) + { + float ovl = overlap[i]; + float sum = co[i]*g_twid9[9 + i] + si[i]*g_twid9[0 + i]; + overlap[i] = co[i]*g_twid9[0 + i] - si[i]*g_twid9[9 + i]; + grbuf[i] = ovl*window[0 + i] - sum*window[9 + i]; + grbuf[17 - i] = ovl*window[9 + i] + sum*window[0 + i]; + } + } +} + +static void drmp3_L3_idct3(float x0, float x1, float x2, float *dst) +{ + float m1 = x1*0.86602540f; + float a1 = x0 - x2*0.5f; + dst[1] = x0 + x2; + dst[0] = a1 + m1; + dst[2] = a1 - m1; +} + +static void drmp3_L3_imdct12(float *x, float *dst, float *overlap) +{ + static const float g_twid3[6] = { 0.79335334f,0.92387953f,0.99144486f, 0.60876143f,0.38268343f,0.13052619f }; + float co[3], si[3]; + int i; + + drmp3_L3_idct3(-x[0], x[6] + x[3], x[12] + x[9], co); + drmp3_L3_idct3(x[15], x[12] - x[9], x[6] - x[3], si); + si[1] = -si[1]; + + for (i = 0; i < 3; i++) + { + float ovl = overlap[i]; + float sum = co[i]*g_twid3[3 + i] + si[i]*g_twid3[0 + i]; + overlap[i] = co[i]*g_twid3[0 + i] - si[i]*g_twid3[3 + i]; + dst[i] = ovl*g_twid3[2 - i] - sum*g_twid3[5 - i]; + dst[5 - i] = ovl*g_twid3[5 - i] + sum*g_twid3[2 - i]; + } +} + +static void drmp3_L3_imdct_short(float *grbuf, float *overlap, int nbands) +{ + for (;nbands > 0; nbands--, overlap += 9, grbuf += 18) + { + float tmp[18]; + memcpy(tmp, grbuf, sizeof(tmp)); + memcpy(grbuf, overlap, 6*sizeof(float)); + drmp3_L3_imdct12(tmp, grbuf + 6, overlap + 6); + drmp3_L3_imdct12(tmp + 1, grbuf + 12, overlap + 6); + drmp3_L3_imdct12(tmp + 2, overlap, overlap + 6); + } +} + +static void drmp3_L3_change_sign(float *grbuf) +{ + int b, i; + for (b = 0, grbuf += 18; b < 32; b += 2, grbuf += 36) + for (i = 1; i < 18; i += 2) + grbuf[i] = -grbuf[i]; +} + +static void drmp3_L3_imdct_gr(float *grbuf, float *overlap, unsigned block_type, unsigned n_long_bands) +{ + static const float g_mdct_window[2][18] = { + { 0.99904822f,0.99144486f,0.97629601f,0.95371695f,0.92387953f,0.88701083f,0.84339145f,0.79335334f,0.73727734f,0.04361938f,0.13052619f,0.21643961f,0.30070580f,0.38268343f,0.46174861f,0.53729961f,0.60876143f,0.67559021f }, + { 1,1,1,1,1,1,0.99144486f,0.92387953f,0.79335334f,0,0,0,0,0,0,0.13052619f,0.38268343f,0.60876143f } + }; + if (n_long_bands) + { + drmp3_L3_imdct36(grbuf, overlap, g_mdct_window[0], n_long_bands); + grbuf += 18*n_long_bands; + overlap += 9*n_long_bands; + } + if (block_type == DRMP3_SHORT_BLOCK_TYPE) + drmp3_L3_imdct_short(grbuf, overlap, 32 - n_long_bands); + else + drmp3_L3_imdct36(grbuf, overlap, g_mdct_window[block_type == DRMP3_STOP_BLOCK_TYPE], 32 - n_long_bands); +} + +static void drmp3_L3_save_reservoir(drmp3dec *h, drmp3dec_scratch *s) +{ + int pos = (s->bs.pos + 7)/8u; + int remains = s->bs.limit/8u - pos; + if (remains > DRMP3_MAX_BITRESERVOIR_BYTES) + { + pos += remains - DRMP3_MAX_BITRESERVOIR_BYTES; + remains = DRMP3_MAX_BITRESERVOIR_BYTES; + } + if (remains > 0) + { + memmove(h->reserv_buf, s->maindata + pos, remains); + } + h->reserv = remains; +} + +static int drmp3_L3_restore_reservoir(drmp3dec *h, drmp3_bs *bs, drmp3dec_scratch *s, int main_data_begin) +{ + int frame_bytes = (bs->limit - bs->pos)/8; + int bytes_have = DRMP3_MIN(h->reserv, main_data_begin); + memcpy(s->maindata, h->reserv_buf + DRMP3_MAX(0, h->reserv - main_data_begin), DRMP3_MIN(h->reserv, main_data_begin)); + memcpy(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes); + drmp3_bs_init(&s->bs, s->maindata, bytes_have + frame_bytes); + return h->reserv >= main_data_begin; +} + +static void drmp3_L3_decode(drmp3dec *h, drmp3dec_scratch *s, drmp3_L3_gr_info *gr_info, int nch) +{ + int ch; + + for (ch = 0; ch < nch; ch++) + { + int layer3gr_limit = s->bs.pos + gr_info[ch].part_23_length; + drmp3_L3_decode_scalefactors(h->header, s->ist_pos[ch], &s->bs, gr_info + ch, s->scf, ch); + drmp3_L3_huffman(s->grbuf[ch], &s->bs, gr_info + ch, s->scf, layer3gr_limit); + } + + if (DRMP3_HDR_TEST_I_STEREO(h->header)) + { + drmp3_L3_intensity_stereo(s->grbuf[0], s->ist_pos[1], gr_info, h->header); + } else if (DRMP3_HDR_IS_MS_STEREO(h->header)) + { + drmp3_L3_midside_stereo(s->grbuf[0], 576); + } + + for (ch = 0; ch < nch; ch++, gr_info++) + { + int aa_bands = 31; + int n_long_bands = (gr_info->mixed_block_flag ? 2 : 0) << (int)(DRMP3_HDR_GET_MY_SAMPLE_RATE(h->header) == 2); + + if (gr_info->n_short_sfb) + { + aa_bands = n_long_bands - 1; + drmp3_L3_reorder(s->grbuf[ch] + n_long_bands*18, s->syn[0], gr_info->sfbtab + gr_info->n_long_sfb); + } + + drmp3_L3_antialias(s->grbuf[ch], aa_bands); + drmp3_L3_imdct_gr(s->grbuf[ch], h->mdct_overlap[ch], gr_info->block_type, n_long_bands); + drmp3_L3_change_sign(s->grbuf[ch]); + } +} + +static void drmp3d_DCT_II(float *grbuf, int n) +{ + static const float g_sec[24] = { + 10.19000816f,0.50060302f,0.50241929f,3.40760851f,0.50547093f,0.52249861f,2.05778098f,0.51544732f,0.56694406f,1.48416460f,0.53104258f,0.64682180f,1.16943991f,0.55310392f,0.78815460f,0.97256821f,0.58293498f,1.06067765f,0.83934963f,0.62250412f,1.72244716f,0.74453628f,0.67480832f,5.10114861f + }; + int i, k = 0; +#if DRMP3_HAVE_SIMD + if (drmp3_have_simd()) for (; k < n; k += 4) + { + drmp3_f4 t[4][8], *x; + float *y = grbuf + k; + + for (x = t[0], i = 0; i < 8; i++, x++) + { + drmp3_f4 x0 = DRMP3_VLD(&y[i*18]); + drmp3_f4 x1 = DRMP3_VLD(&y[(15 - i)*18]); + drmp3_f4 x2 = DRMP3_VLD(&y[(16 + i)*18]); + drmp3_f4 x3 = DRMP3_VLD(&y[(31 - i)*18]); + drmp3_f4 t0 = DRMP3_VADD(x0, x3); + drmp3_f4 t1 = DRMP3_VADD(x1, x2); + drmp3_f4 t2 = DRMP3_VMUL_S(DRMP3_VSUB(x1, x2), g_sec[3*i + 0]); + drmp3_f4 t3 = DRMP3_VMUL_S(DRMP3_VSUB(x0, x3), g_sec[3*i + 1]); + x[0] = DRMP3_VADD(t0, t1); + x[8] = DRMP3_VMUL_S(DRMP3_VSUB(t0, t1), g_sec[3*i + 2]); + x[16] = DRMP3_VADD(t3, t2); + x[24] = DRMP3_VMUL_S(DRMP3_VSUB(t3, t2), g_sec[3*i + 2]); + } + for (x = t[0], i = 0; i < 4; i++, x += 8) + { + drmp3_f4 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt; + xt = DRMP3_VSUB(x0, x7); x0 = DRMP3_VADD(x0, x7); + x7 = DRMP3_VSUB(x1, x6); x1 = DRMP3_VADD(x1, x6); + x6 = DRMP3_VSUB(x2, x5); x2 = DRMP3_VADD(x2, x5); + x5 = DRMP3_VSUB(x3, x4); x3 = DRMP3_VADD(x3, x4); + x4 = DRMP3_VSUB(x0, x3); x0 = DRMP3_VADD(x0, x3); + x3 = DRMP3_VSUB(x1, x2); x1 = DRMP3_VADD(x1, x2); + x[0] = DRMP3_VADD(x0, x1); + x[4] = DRMP3_VMUL_S(DRMP3_VSUB(x0, x1), 0.70710677f); + x5 = DRMP3_VADD(x5, x6); + x6 = DRMP3_VMUL_S(DRMP3_VADD(x6, x7), 0.70710677f); + x7 = DRMP3_VADD(x7, xt); + x3 = DRMP3_VMUL_S(DRMP3_VADD(x3, x4), 0.70710677f); + x5 = DRMP3_VSUB(x5, DRMP3_VMUL_S(x7, 0.198912367f)); /* rotate by PI/8 */ + x7 = DRMP3_VADD(x7, DRMP3_VMUL_S(x5, 0.382683432f)); + x5 = DRMP3_VSUB(x5, DRMP3_VMUL_S(x7, 0.198912367f)); + x0 = DRMP3_VSUB(xt, x6); xt = DRMP3_VADD(xt, x6); + x[1] = DRMP3_VMUL_S(DRMP3_VADD(xt, x7), 0.50979561f); + x[2] = DRMP3_VMUL_S(DRMP3_VADD(x4, x3), 0.54119611f); + x[3] = DRMP3_VMUL_S(DRMP3_VSUB(x0, x5), 0.60134488f); + x[5] = DRMP3_VMUL_S(DRMP3_VADD(x0, x5), 0.89997619f); + x[6] = DRMP3_VMUL_S(DRMP3_VSUB(x4, x3), 1.30656302f); + x[7] = DRMP3_VMUL_S(DRMP3_VSUB(xt, x7), 2.56291556f); + } + + if (k > n - 3) + { +#if DRMP3_HAVE_SSE +#define DRMP3_VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v) +#else +#define DRMP3_VSAVE2(i, v) vst1_f32((float32_t *)&y[i*18], vget_low_f32(v)) +#endif + for (i = 0; i < 7; i++, y += 4*18) + { + drmp3_f4 s = DRMP3_VADD(t[3][i], t[3][i + 1]); + DRMP3_VSAVE2(0, t[0][i]); + DRMP3_VSAVE2(1, DRMP3_VADD(t[2][i], s)); + DRMP3_VSAVE2(2, DRMP3_VADD(t[1][i], t[1][i + 1])); + DRMP3_VSAVE2(3, DRMP3_VADD(t[2][1 + i], s)); + } + DRMP3_VSAVE2(0, t[0][7]); + DRMP3_VSAVE2(1, DRMP3_VADD(t[2][7], t[3][7])); + DRMP3_VSAVE2(2, t[1][7]); + DRMP3_VSAVE2(3, t[3][7]); + } else + { +#define DRMP3_VSAVE4(i, v) DRMP3_VSTORE(&y[i*18], v) + for (i = 0; i < 7; i++, y += 4*18) + { + drmp3_f4 s = DRMP3_VADD(t[3][i], t[3][i + 1]); + DRMP3_VSAVE4(0, t[0][i]); + DRMP3_VSAVE4(1, DRMP3_VADD(t[2][i], s)); + DRMP3_VSAVE4(2, DRMP3_VADD(t[1][i], t[1][i + 1])); + DRMP3_VSAVE4(3, DRMP3_VADD(t[2][1 + i], s)); + } + DRMP3_VSAVE4(0, t[0][7]); + DRMP3_VSAVE4(1, DRMP3_VADD(t[2][7], t[3][7])); + DRMP3_VSAVE4(2, t[1][7]); + DRMP3_VSAVE4(3, t[3][7]); + } + } else +#endif +#ifdef DR_MP3_ONLY_SIMD + {} +#else + for (; k < n; k++) + { + float t[4][8], *x, *y = grbuf + k; + + for (x = t[0], i = 0; i < 8; i++, x++) + { + float x0 = y[i*18]; + float x1 = y[(15 - i)*18]; + float x2 = y[(16 + i)*18]; + float x3 = y[(31 - i)*18]; + float t0 = x0 + x3; + float t1 = x1 + x2; + float t2 = (x1 - x2)*g_sec[3*i + 0]; + float t3 = (x0 - x3)*g_sec[3*i + 1]; + x[0] = t0 + t1; + x[8] = (t0 - t1)*g_sec[3*i + 2]; + x[16] = t3 + t2; + x[24] = (t3 - t2)*g_sec[3*i + 2]; + } + for (x = t[0], i = 0; i < 4; i++, x += 8) + { + float x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt; + xt = x0 - x7; x0 += x7; + x7 = x1 - x6; x1 += x6; + x6 = x2 - x5; x2 += x5; + x5 = x3 - x4; x3 += x4; + x4 = x0 - x3; x0 += x3; + x3 = x1 - x2; x1 += x2; + x[0] = x0 + x1; + x[4] = (x0 - x1)*0.70710677f; + x5 = x5 + x6; + x6 = (x6 + x7)*0.70710677f; + x7 = x7 + xt; + x3 = (x3 + x4)*0.70710677f; + x5 -= x7*0.198912367f; /* rotate by PI/8 */ + x7 += x5*0.382683432f; + x5 -= x7*0.198912367f; + x0 = xt - x6; xt += x6; + x[1] = (xt + x7)*0.50979561f; + x[2] = (x4 + x3)*0.54119611f; + x[3] = (x0 - x5)*0.60134488f; + x[5] = (x0 + x5)*0.89997619f; + x[6] = (x4 - x3)*1.30656302f; + x[7] = (xt - x7)*2.56291556f; + + } + for (i = 0; i < 7; i++, y += 4*18) + { + y[0*18] = t[0][i]; + y[1*18] = t[2][i] + t[3][i] + t[3][i + 1]; + y[2*18] = t[1][i] + t[1][i + 1]; + y[3*18] = t[2][i + 1] + t[3][i] + t[3][i + 1]; + } + y[0*18] = t[0][7]; + y[1*18] = t[2][7] + t[3][7]; + y[2*18] = t[1][7]; + y[3*18] = t[3][7]; + } +#endif +} + +#ifndef DR_MP3_FLOAT_OUTPUT +typedef drmp3_int16 drmp3d_sample_t; + +static drmp3_int16 drmp3d_scale_pcm(float sample) +{ + drmp3_int16 s; + if (sample >= 32766.5) return (drmp3_int16) 32767; + if (sample <= -32767.5) return (drmp3_int16)-32768; + s = (drmp3_int16)(sample + .5f); + s -= (s < 0); /* away from zero, to be compliant */ + return (drmp3_int16)s; +} +#else +typedef float drmp3d_sample_t; + +static float drmp3d_scale_pcm(float sample) +{ + return sample*(1.f/32768.f); +} +#endif + +static void drmp3d_synth_pair(drmp3d_sample_t *pcm, int nch, const float *z) +{ + float a; + a = (z[14*64] - z[ 0]) * 29; + a += (z[ 1*64] + z[13*64]) * 213; + a += (z[12*64] - z[ 2*64]) * 459; + a += (z[ 3*64] + z[11*64]) * 2037; + a += (z[10*64] - z[ 4*64]) * 5153; + a += (z[ 5*64] + z[ 9*64]) * 6574; + a += (z[ 8*64] - z[ 6*64]) * 37489; + a += z[ 7*64] * 75038; + pcm[0] = drmp3d_scale_pcm(a); + + z += 2; + a = z[14*64] * 104; + a += z[12*64] * 1567; + a += z[10*64] * 9727; + a += z[ 8*64] * 64019; + a += z[ 6*64] * -9975; + a += z[ 4*64] * -45; + a += z[ 2*64] * 146; + a += z[ 0*64] * -5; + pcm[16*nch] = drmp3d_scale_pcm(a); +} + +static void drmp3d_synth(float *xl, drmp3d_sample_t *dstl, int nch, float *lins) +{ + int i; + float *xr = xl + 576*(nch - 1); + drmp3d_sample_t *dstr = dstl + (nch - 1); + + static const float g_win[] = { + -1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992, + -1,24,-35,202,222,347,-581,2080,1952,4425,-5879,7640,5288,33791,-41176,74856, + -1,21,-38,196,225,294,-645,2087,1893,4063,-6237,8092,4561,31947,-43006,74630, + -1,19,-41,190,227,244,-711,2085,1822,3705,-6589,8492,3776,30112,-44821,74313, + -1,17,-45,183,228,197,-779,2075,1739,3351,-6935,8840,2935,28289,-46617,73908, + -1,16,-49,176,228,153,-848,2057,1644,3004,-7271,9139,2037,26482,-48390,73415, + -2,14,-53,169,227,111,-919,2032,1535,2663,-7597,9389,1082,24694,-50137,72835, + -2,13,-58,161,224,72,-991,2001,1414,2330,-7910,9592,70,22929,-51853,72169, + -2,11,-63,154,221,36,-1064,1962,1280,2006,-8209,9750,-998,21189,-53534,71420, + -2,10,-68,147,215,2,-1137,1919,1131,1692,-8491,9863,-2122,19478,-55178,70590, + -3,9,-73,139,208,-29,-1210,1870,970,1388,-8755,9935,-3300,17799,-56778,69679, + -3,8,-79,132,200,-57,-1283,1817,794,1095,-8998,9966,-4533,16155,-58333,68692, + -4,7,-85,125,189,-83,-1356,1759,605,814,-9219,9959,-5818,14548,-59838,67629, + -4,7,-91,117,177,-106,-1428,1698,402,545,-9416,9916,-7154,12980,-61289,66494, + -5,6,-97,111,163,-127,-1498,1634,185,288,-9585,9838,-8540,11455,-62684,65290 + }; + float *zlin = lins + 15*64; + const float *w = g_win; + + zlin[4*15] = xl[18*16]; + zlin[4*15 + 1] = xr[18*16]; + zlin[4*15 + 2] = xl[0]; + zlin[4*15 + 3] = xr[0]; + + zlin[4*31] = xl[1 + 18*16]; + zlin[4*31 + 1] = xr[1 + 18*16]; + zlin[4*31 + 2] = xl[1]; + zlin[4*31 + 3] = xr[1]; + + drmp3d_synth_pair(dstr, nch, lins + 4*15 + 1); + drmp3d_synth_pair(dstr + 32*nch, nch, lins + 4*15 + 64 + 1); + drmp3d_synth_pair(dstl, nch, lins + 4*15); + drmp3d_synth_pair(dstl + 32*nch, nch, lins + 4*15 + 64); + +#if DRMP3_HAVE_SIMD + if (drmp3_have_simd()) for (i = 14; i >= 0; i--) + { +#define DRMP3_VLOAD(k) drmp3_f4 w0 = DRMP3_VSET(*w++); drmp3_f4 w1 = DRMP3_VSET(*w++); drmp3_f4 vz = DRMP3_VLD(&zlin[4*i - 64*k]); drmp3_f4 vy = DRMP3_VLD(&zlin[4*i - 64*(15 - k)]); +#define DRMP3_V0(k) { DRMP3_VLOAD(k) b = DRMP3_VADD(DRMP3_VMUL(vz, w1), DRMP3_VMUL(vy, w0)) ; a = DRMP3_VSUB(DRMP3_VMUL(vz, w0), DRMP3_VMUL(vy, w1)); } +#define DRMP3_V1(k) { DRMP3_VLOAD(k) b = DRMP3_VADD(b, DRMP3_VADD(DRMP3_VMUL(vz, w1), DRMP3_VMUL(vy, w0))); a = DRMP3_VADD(a, DRMP3_VSUB(DRMP3_VMUL(vz, w0), DRMP3_VMUL(vy, w1))); } +#define DRMP3_V2(k) { DRMP3_VLOAD(k) b = DRMP3_VADD(b, DRMP3_VADD(DRMP3_VMUL(vz, w1), DRMP3_VMUL(vy, w0))); a = DRMP3_VADD(a, DRMP3_VSUB(DRMP3_VMUL(vy, w1), DRMP3_VMUL(vz, w0))); } + drmp3_f4 a, b; + zlin[4*i] = xl[18*(31 - i)]; + zlin[4*i + 1] = xr[18*(31 - i)]; + zlin[4*i + 2] = xl[1 + 18*(31 - i)]; + zlin[4*i + 3] = xr[1 + 18*(31 - i)]; + zlin[4*i + 64] = xl[1 + 18*(1 + i)]; + zlin[4*i + 64 + 1] = xr[1 + 18*(1 + i)]; + zlin[4*i - 64 + 2] = xl[18*(1 + i)]; + zlin[4*i - 64 + 3] = xr[18*(1 + i)]; + + DRMP3_V0(0) DRMP3_V2(1) DRMP3_V1(2) DRMP3_V2(3) DRMP3_V1(4) DRMP3_V2(5) DRMP3_V1(6) DRMP3_V2(7) + + { +#ifndef DR_MP3_FLOAT_OUTPUT +#if DRMP3_HAVE_SSE + static const drmp3_f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f }; + static const drmp3_f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f }; + __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)), + _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min))); + dstr[(15 - i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 1); + dstr[(17 + i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 5); + dstl[(15 - i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 0); + dstl[(17 + i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 4); + dstr[(47 - i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 3); + dstr[(49 + i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 7); + dstl[(47 - i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 2); + dstl[(49 + i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 6); +#else + int16x4_t pcma, pcmb; + a = DRMP3_VADD(a, DRMP3_VSET(0.5f)); + b = DRMP3_VADD(b, DRMP3_VSET(0.5f)); + pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, DRMP3_VSET(0))))); + pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, DRMP3_VSET(0))))); + vst1_lane_s16(dstr + (15 - i)*nch, pcma, 1); + vst1_lane_s16(dstr + (17 + i)*nch, pcmb, 1); + vst1_lane_s16(dstl + (15 - i)*nch, pcma, 0); + vst1_lane_s16(dstl + (17 + i)*nch, pcmb, 0); + vst1_lane_s16(dstr + (47 - i)*nch, pcma, 3); + vst1_lane_s16(dstr + (49 + i)*nch, pcmb, 3); + vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2); + vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2); +#endif +#else + static const drmp3_f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f }; + a = DRMP3_VMUL(a, g_scale); + b = DRMP3_VMUL(b, g_scale); +#if DRMP3_HAVE_SSE + _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1))); + _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1))); + _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0))); + _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0))); + _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3))); + _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3))); + _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2))); + _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2))); +#else + vst1q_lane_f32(dstr + (15 - i)*nch, a, 1); + vst1q_lane_f32(dstr + (17 + i)*nch, b, 1); + vst1q_lane_f32(dstl + (15 - i)*nch, a, 0); + vst1q_lane_f32(dstl + (17 + i)*nch, b, 0); + vst1q_lane_f32(dstr + (47 - i)*nch, a, 3); + vst1q_lane_f32(dstr + (49 + i)*nch, b, 3); + vst1q_lane_f32(dstl + (47 - i)*nch, a, 2); + vst1q_lane_f32(dstl + (49 + i)*nch, b, 2); +#endif +#endif /* DR_MP3_FLOAT_OUTPUT */ + } + } else +#endif +#ifdef DR_MP3_ONLY_SIMD + {} +#else + for (i = 14; i >= 0; i--) + { +#define DRMP3_LOAD(k) float w0 = *w++; float w1 = *w++; float *vz = &zlin[4*i - k*64]; float *vy = &zlin[4*i - (15 - k)*64]; +#define DRMP3_S0(k) { int j; DRMP3_LOAD(k); for (j = 0; j < 4; j++) b[j] = vz[j]*w1 + vy[j]*w0, a[j] = vz[j]*w0 - vy[j]*w1; } +#define DRMP3_S1(k) { int j; DRMP3_LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vz[j]*w0 - vy[j]*w1; } +#define DRMP3_S2(k) { int j; DRMP3_LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vy[j]*w1 - vz[j]*w0; } + float a[4], b[4]; + + zlin[4*i] = xl[18*(31 - i)]; + zlin[4*i + 1] = xr[18*(31 - i)]; + zlin[4*i + 2] = xl[1 + 18*(31 - i)]; + zlin[4*i + 3] = xr[1 + 18*(31 - i)]; + zlin[4*(i + 16)] = xl[1 + 18*(1 + i)]; + zlin[4*(i + 16) + 1] = xr[1 + 18*(1 + i)]; + zlin[4*(i - 16) + 2] = xl[18*(1 + i)]; + zlin[4*(i - 16) + 3] = xr[18*(1 + i)]; + + DRMP3_S0(0) DRMP3_S2(1) DRMP3_S1(2) DRMP3_S2(3) DRMP3_S1(4) DRMP3_S2(5) DRMP3_S1(6) DRMP3_S2(7) + + dstr[(15 - i)*nch] = drmp3d_scale_pcm(a[1]); + dstr[(17 + i)*nch] = drmp3d_scale_pcm(b[1]); + dstl[(15 - i)*nch] = drmp3d_scale_pcm(a[0]); + dstl[(17 + i)*nch] = drmp3d_scale_pcm(b[0]); + dstr[(47 - i)*nch] = drmp3d_scale_pcm(a[3]); + dstr[(49 + i)*nch] = drmp3d_scale_pcm(b[3]); + dstl[(47 - i)*nch] = drmp3d_scale_pcm(a[2]); + dstl[(49 + i)*nch] = drmp3d_scale_pcm(b[2]); + } +#endif +} + +static void drmp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, drmp3d_sample_t *pcm, float *lins) +{ + int i; + for (i = 0; i < nch; i++) + { + drmp3d_DCT_II(grbuf + 576*i, nbands); + } + + memcpy(lins, qmf_state, sizeof(float)*15*64); + + for (i = 0; i < nbands; i += 2) + { + drmp3d_synth(grbuf + i, pcm + 32*nch*i, nch, lins + i*64); + } +#ifndef DR_MP3_NONSTANDARD_BUT_LOGICAL + if (nch == 1) + { + for (i = 0; i < 15*64; i += 2) + { + qmf_state[i] = lins[nbands*64 + i]; + } + } else +#endif + { + memcpy(qmf_state, lins + nbands*64, sizeof(float)*15*64); + } +} + +static int drmp3d_match_frame(const drmp3_uint8 *hdr, int mp3_bytes, int frame_bytes) +{ + int i, nmatch; + for (i = 0, nmatch = 0; nmatch < DRMP3_MAX_FRAME_SYNC_MATCHES; nmatch++) + { + i += drmp3_hdr_frame_bytes(hdr + i, frame_bytes) + drmp3_hdr_padding(hdr + i); + if (i + DRMP3_HDR_SIZE > mp3_bytes) + return nmatch > 0; + if (!drmp3_hdr_compare(hdr, hdr + i)) + return 0; + } + return 1; +} + +static int drmp3d_find_frame(const drmp3_uint8 *mp3, int mp3_bytes, int *free_format_bytes, int *ptr_frame_bytes) +{ + int i, k; + for (i = 0; i < mp3_bytes - DRMP3_HDR_SIZE; i++, mp3++) + { + if (drmp3_hdr_valid(mp3)) + { + int frame_bytes = drmp3_hdr_frame_bytes(mp3, *free_format_bytes); + int frame_and_padding = frame_bytes + drmp3_hdr_padding(mp3); + + for (k = DRMP3_HDR_SIZE; !frame_bytes && k < DRMP3_MAX_FREE_FORMAT_FRAME_SIZE && i + 2*k < mp3_bytes - DRMP3_HDR_SIZE; k++) + { + if (drmp3_hdr_compare(mp3, mp3 + k)) + { + int fb = k - drmp3_hdr_padding(mp3); + int nextfb = fb + drmp3_hdr_padding(mp3 + k); + if (i + k + nextfb + DRMP3_HDR_SIZE > mp3_bytes || !drmp3_hdr_compare(mp3, mp3 + k + nextfb)) + continue; + frame_and_padding = k; + frame_bytes = fb; + *free_format_bytes = fb; + } + } + + if ((frame_bytes && i + frame_and_padding <= mp3_bytes && + drmp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) || + (!i && frame_and_padding == mp3_bytes)) + { + *ptr_frame_bytes = frame_and_padding; + return i; + } + *free_format_bytes = 0; + } + } + *ptr_frame_bytes = 0; + return i; +} + +void drmp3dec_init(drmp3dec *dec) +{ + dec->header[0] = 0; +} + +int drmp3dec_decode_frame(drmp3dec *dec, const unsigned char *mp3, int mp3_bytes, void *pcm, drmp3dec_frame_info *info) +{ + int i = 0, igr, frame_size = 0, success = 1; + const drmp3_uint8 *hdr; + drmp3_bs bs_frame[1]; + drmp3dec_scratch scratch; + + if (mp3_bytes > 4 && dec->header[0] == 0xff && drmp3_hdr_compare(dec->header, mp3)) + { + frame_size = drmp3_hdr_frame_bytes(mp3, dec->free_format_bytes) + drmp3_hdr_padding(mp3); + if (frame_size != mp3_bytes && (frame_size + DRMP3_HDR_SIZE > mp3_bytes || !drmp3_hdr_compare(mp3, mp3 + frame_size))) + { + frame_size = 0; + } + } + if (!frame_size) + { + memset(dec, 0, sizeof(drmp3dec)); + i = drmp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size); + if (!frame_size || i + frame_size > mp3_bytes) + { + info->frame_bytes = i; + return 0; + } + } + + hdr = mp3 + i; + memcpy(dec->header, hdr, DRMP3_HDR_SIZE); + info->frame_bytes = i + frame_size; + info->channels = DRMP3_HDR_IS_MONO(hdr) ? 1 : 2; + info->hz = drmp3_hdr_sample_rate_hz(hdr); + info->layer = 4 - DRMP3_HDR_GET_LAYER(hdr); + info->bitrate_kbps = drmp3_hdr_bitrate_kbps(hdr); + + drmp3_bs_init(bs_frame, hdr + DRMP3_HDR_SIZE, frame_size - DRMP3_HDR_SIZE); + if (DRMP3_HDR_IS_CRC(hdr)) + { + drmp3_bs_get_bits(bs_frame, 16); + } + + if (info->layer == 3) + { + int main_data_begin = drmp3_L3_read_side_info(bs_frame, scratch.gr_info, hdr); + if (main_data_begin < 0 || bs_frame->pos > bs_frame->limit) + { + drmp3dec_init(dec); + return 0; + } + success = drmp3_L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin); + if (success && pcm != NULL) + { + for (igr = 0; igr < (DRMP3_HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*576*info->channels)) + { + memset(scratch.grbuf[0], 0, 576*2*sizeof(float)); + drmp3_L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels); + drmp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, (drmp3d_sample_t*)pcm, scratch.syn[0]); + } + } + drmp3_L3_save_reservoir(dec, &scratch); + } else + { +#ifdef DR_MP3_ONLY_MP3 + return 0; +#else + drmp3_L12_scale_info sci[1]; + + if (pcm == NULL) { + return drmp3_hdr_frame_samples(hdr); + } + + drmp3_L12_read_scale_info(hdr, bs_frame, sci); + + memset(scratch.grbuf[0], 0, 576*2*sizeof(float)); + for (i = 0, igr = 0; igr < 3; igr++) + { + if (12 == (i += drmp3_L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1))) + { + i = 0; + drmp3_L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]); + drmp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, (drmp3d_sample_t*)pcm, scratch.syn[0]); + memset(scratch.grbuf[0], 0, 576*2*sizeof(float)); + pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*384*info->channels); + } + if (bs_frame->pos > bs_frame->limit) + { + drmp3dec_init(dec); + return 0; + } + } +#endif + } + + return success*drmp3_hdr_frame_samples(dec->header); +} + +void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, int num_samples) +{ + if(num_samples > 0) + { + int i = 0; +#if DRMP3_HAVE_SIMD + int aligned_count = num_samples & ~7; + for(; i < aligned_count; i+=8) + { + drmp3_f4 scale = DRMP3_VSET(32768.0f); + drmp3_f4 a = DRMP3_VMUL(DRMP3_VLD(&in[i ]), scale); + drmp3_f4 b = DRMP3_VMUL(DRMP3_VLD(&in[i+4]), scale); +#if DRMP3_HAVE_SSE + drmp3_f4 s16max = DRMP3_VSET( 32767.0f); + drmp3_f4 s16min = DRMP3_VSET(-32768.0f); + __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, s16max), s16min)), + _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, s16max), s16min))); + out[i ] = (drmp3_int16)_mm_extract_epi16(pcm8, 0); + out[i+1] = (drmp3_int16)_mm_extract_epi16(pcm8, 1); + out[i+2] = (drmp3_int16)_mm_extract_epi16(pcm8, 2); + out[i+3] = (drmp3_int16)_mm_extract_epi16(pcm8, 3); + out[i+4] = (drmp3_int16)_mm_extract_epi16(pcm8, 4); + out[i+5] = (drmp3_int16)_mm_extract_epi16(pcm8, 5); + out[i+6] = (drmp3_int16)_mm_extract_epi16(pcm8, 6); + out[i+7] = (drmp3_int16)_mm_extract_epi16(pcm8, 7); +#else + int16x4_t pcma, pcmb; + a = DRMP3_VADD(a, DRMP3_VSET(0.5f)); + b = DRMP3_VADD(b, DRMP3_VSET(0.5f)); + pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, DRMP3_VSET(0))))); + pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, DRMP3_VSET(0))))); + vst1_lane_s16(out+i , pcma, 0); + vst1_lane_s16(out+i+1, pcma, 1); + vst1_lane_s16(out+i+2, pcma, 2); + vst1_lane_s16(out+i+3, pcma, 3); + vst1_lane_s16(out+i+4, pcmb, 0); + vst1_lane_s16(out+i+5, pcmb, 1); + vst1_lane_s16(out+i+6, pcmb, 2); + vst1_lane_s16(out+i+7, pcmb, 3); +#endif + } +#endif + for(; i < num_samples; i++) + { + float sample = in[i] * 32768.0f; + if (sample >= 32766.5) + out[i] = (drmp3_int16) 32767; + else if (sample <= -32767.5) + out[i] = (drmp3_int16)-32768; + else + { + short s = (drmp3_int16)(sample + .5f); + s -= (s < 0); /* away from zero, to be compliant */ + out[i] = s; + } + } + } +} + + + +/************************************************************************************************************************************************************ + + Main Public API + + ************************************************************************************************************************************************************/ + +#if defined(SIZE_MAX) + #define DRMP3_SIZE_MAX SIZE_MAX +#else + #if defined(_WIN64) || defined(_LP64) || defined(__LP64__) + #define DRMP3_SIZE_MAX ((drmp3_uint64)0xFFFFFFFFFFFFFFFF) + #else + #define DRMP3_SIZE_MAX 0xFFFFFFFF + #endif +#endif + +/* Options. */ +#ifndef DRMP3_SEEK_LEADING_MP3_FRAMES +#define DRMP3_SEEK_LEADING_MP3_FRAMES 2 +#endif + + +/* Standard library stuff. */ +#ifndef DRMP3_ASSERT +#include <assert.h> +#define DRMP3_ASSERT(expression) assert(expression) +#endif +#ifndef DRMP3_COPY_MEMORY +#define DRMP3_COPY_MEMORY(dst, src, sz) memcpy((dst), (src), (sz)) +#endif +#ifndef DRMP3_ZERO_MEMORY +#define DRMP3_ZERO_MEMORY(p, sz) memset((p), 0, (sz)) +#endif +#define DRMP3_ZERO_OBJECT(p) DRMP3_ZERO_MEMORY((p), sizeof(*(p))) +#ifndef DRMP3_MALLOC +#define DRMP3_MALLOC(sz) malloc((sz)) +#endif +#ifndef DRMP3_REALLOC +#define DRMP3_REALLOC(p, sz) realloc((p), (sz)) +#endif +#ifndef DRMP3_FREE +#define DRMP3_FREE(p) free((p)) +#endif + +#define drmp3_countof(x) (sizeof(x) / sizeof(x[0])) +#define drmp3_max(x, y) (((x) > (y)) ? (x) : (y)) +#define drmp3_min(x, y) (((x) < (y)) ? (x) : (y)) + +#define DRMP3_DATA_CHUNK_SIZE 16384 /* The size in bytes of each chunk of data to read from the MP3 stream. minimp3 recommends 16K. */ + +static DRMP3_INLINE float drmp3_mix_f32(float x, float y, float a) +{ + return x*(1-a) + y*a; +} + +static void drmp3_blend_f32(float* pOut, float* pInA, float* pInB, float factor, drmp3_uint32 channels) +{ + drmp3_uint32 i; + for (i = 0; i < channels; ++i) { + pOut[i] = drmp3_mix_f32(pInA[i], pInB[i], factor); + } +} + + +static void* drmp3__malloc_default(size_t sz, void* pUserData) +{ + (void)pUserData; + return DRMP3_MALLOC(sz); +} + +static void* drmp3__realloc_default(void* p, size_t sz, void* pUserData) +{ + (void)pUserData; + return DRMP3_REALLOC(p, sz); +} + +static void drmp3__free_default(void* p, void* pUserData) +{ + (void)pUserData; + DRMP3_FREE(p); +} + + +#if 0 /* Unused, but leaving here in case I need to add it again later. */ +static void* drmp3__malloc_from_callbacks(size_t sz, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks == NULL) { + return NULL; + } + + if (pAllocationCallbacks->onMalloc != NULL) { + return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData); + } + + /* Try using realloc(). */ + if (pAllocationCallbacks->onRealloc != NULL) { + return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData); + } + + return NULL; +} +#endif + +static void* drmp3__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks == NULL) { + return NULL; + } + + if (pAllocationCallbacks->onRealloc != NULL) { + return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData); + } + + /* Try emulating realloc() in terms of malloc()/free(). */ + if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) { + void* p2; + + p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData); + if (p2 == NULL) { + return NULL; + } + + DRMP3_COPY_MEMORY(p2, p, szOld); + pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData); + + return p2; + } + + return NULL; +} + +static void drmp3__free_from_callbacks(void* p, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (p == NULL || pAllocationCallbacks == NULL) { + return; + } + + if (pAllocationCallbacks->onFree != NULL) { + pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData); + } +} + + +drmp3_allocation_callbacks drmp3_copy_allocation_callbacks_or_defaults(const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks != NULL) { + /* Copy. */ + return *pAllocationCallbacks; + } else { + /* Defaults. */ + drmp3_allocation_callbacks allocationCallbacks; + allocationCallbacks.pUserData = NULL; + allocationCallbacks.onMalloc = drmp3__malloc_default; + allocationCallbacks.onRealloc = drmp3__realloc_default; + allocationCallbacks.onFree = drmp3__free_default; + return allocationCallbacks; + } +} + + +void drmp3_src_cache_init(drmp3_src* pSRC, drmp3_src_cache* pCache) +{ + DRMP3_ASSERT(pSRC != NULL); + DRMP3_ASSERT(pCache != NULL); + + pCache->pSRC = pSRC; + pCache->cachedFrameCount = 0; + pCache->iNextFrame = 0; +} + +drmp3_uint64 drmp3_src_cache_read_frames(drmp3_src_cache* pCache, drmp3_uint64 frameCount, float* pFramesOut) +{ + drmp3_uint32 channels; + drmp3_uint64 totalFramesRead = 0; + + DRMP3_ASSERT(pCache != NULL); + DRMP3_ASSERT(pCache->pSRC != NULL); + DRMP3_ASSERT(pCache->pSRC->onRead != NULL); + DRMP3_ASSERT(frameCount > 0); + DRMP3_ASSERT(pFramesOut != NULL); + + channels = pCache->pSRC->config.channels; + + while (frameCount > 0) { + /* If there's anything in memory go ahead and copy that over first. */ + drmp3_uint32 framesToReadFromClient; + drmp3_uint64 framesRemainingInMemory = pCache->cachedFrameCount - pCache->iNextFrame; + drmp3_uint64 framesToReadFromMemory = frameCount; + if (framesToReadFromMemory > framesRemainingInMemory) { + framesToReadFromMemory = framesRemainingInMemory; + } + + DRMP3_COPY_MEMORY(pFramesOut, pCache->pCachedFrames + pCache->iNextFrame*channels, (drmp3_uint32)(framesToReadFromMemory * channels * sizeof(float))); + pCache->iNextFrame += (drmp3_uint32)framesToReadFromMemory; + + totalFramesRead += framesToReadFromMemory; + frameCount -= framesToReadFromMemory; + if (frameCount == 0) { + break; + } + + + /* At this point there are still more frames to read from the client, so we'll need to reload the cache with fresh data. */ + DRMP3_ASSERT(frameCount > 0); + pFramesOut += framesToReadFromMemory * channels; + + pCache->iNextFrame = 0; + pCache->cachedFrameCount = 0; + + framesToReadFromClient = drmp3_countof(pCache->pCachedFrames) / pCache->pSRC->config.channels; + if (framesToReadFromClient > pCache->pSRC->config.cacheSizeInFrames) { + framesToReadFromClient = pCache->pSRC->config.cacheSizeInFrames; + } + + pCache->cachedFrameCount = (drmp3_uint32)pCache->pSRC->onRead(pCache->pSRC, framesToReadFromClient, pCache->pCachedFrames, pCache->pSRC->pUserData); + + + /* Get out of this loop if nothing was able to be retrieved. */ + if (pCache->cachedFrameCount == 0) { + break; + } + } + + return totalFramesRead; +} + + +drmp3_uint64 drmp3_src_read_frames_passthrough(drmp3_src* pSRC, drmp3_uint64 frameCount, void* pFramesOut, drmp3_bool32 flush); +drmp3_uint64 drmp3_src_read_frames_linear(drmp3_src* pSRC, drmp3_uint64 frameCount, void* pFramesOut, drmp3_bool32 flush); + +drmp3_bool32 drmp3_src_init(const drmp3_src_config* pConfig, drmp3_src_read_proc onRead, void* pUserData, drmp3_src* pSRC) +{ + if (pSRC == NULL) { + return DRMP3_FALSE; + } + + DRMP3_ZERO_OBJECT(pSRC); + + if (pConfig == NULL || onRead == NULL) { + return DRMP3_FALSE; + } + + if (pConfig->channels == 0 || pConfig->channels > 2) { + return DRMP3_FALSE; + } + + pSRC->config = *pConfig; + pSRC->onRead = onRead; + pSRC->pUserData = pUserData; + + if (pSRC->config.cacheSizeInFrames > DRMP3_SRC_CACHE_SIZE_IN_FRAMES || pSRC->config.cacheSizeInFrames == 0) { + pSRC->config.cacheSizeInFrames = DRMP3_SRC_CACHE_SIZE_IN_FRAMES; + } + + drmp3_src_cache_init(pSRC, &pSRC->cache); + return DRMP3_TRUE; +} + +drmp3_bool32 drmp3_src_set_input_sample_rate(drmp3_src* pSRC, drmp3_uint32 sampleRateIn) +{ + if (pSRC == NULL) { + return DRMP3_FALSE; + } + + /* Must have a sample rate of > 0. */ + if (sampleRateIn == 0) { + return DRMP3_FALSE; + } + + pSRC->config.sampleRateIn = sampleRateIn; + return DRMP3_TRUE; +} + +drmp3_bool32 drmp3_src_set_output_sample_rate(drmp3_src* pSRC, drmp3_uint32 sampleRateOut) +{ + if (pSRC == NULL) { + return DRMP3_FALSE; + } + + /* Must have a sample rate of > 0. */ + if (sampleRateOut == 0) { + return DRMP3_FALSE; + } + + pSRC->config.sampleRateOut = sampleRateOut; + return DRMP3_TRUE; +} + +drmp3_uint64 drmp3_src_read_frames_ex(drmp3_src* pSRC, drmp3_uint64 frameCount, void* pFramesOut, drmp3_bool32 flush) +{ + drmp3_src_algorithm algorithm; + + if (pSRC == NULL || frameCount == 0 || pFramesOut == NULL) { + return 0; + } + + algorithm = pSRC->config.algorithm; + + /* Always use passthrough if the sample rates are the same. */ + if (pSRC->config.sampleRateIn == pSRC->config.sampleRateOut) { + algorithm = drmp3_src_algorithm_none; + } + + /* Could just use a function pointer instead of a switch for this... */ + switch (algorithm) + { + case drmp3_src_algorithm_none: return drmp3_src_read_frames_passthrough(pSRC, frameCount, pFramesOut, flush); + case drmp3_src_algorithm_linear: return drmp3_src_read_frames_linear(pSRC, frameCount, pFramesOut, flush); + default: return 0; + } +} + +drmp3_uint64 drmp3_src_read_frames(drmp3_src* pSRC, drmp3_uint64 frameCount, void* pFramesOut) +{ + return drmp3_src_read_frames_ex(pSRC, frameCount, pFramesOut, DRMP3_FALSE); +} + +drmp3_uint64 drmp3_src_read_frames_passthrough(drmp3_src* pSRC, drmp3_uint64 frameCount, void* pFramesOut, drmp3_bool32 flush) +{ + DRMP3_ASSERT(pSRC != NULL); + DRMP3_ASSERT(frameCount > 0); + DRMP3_ASSERT(pFramesOut != NULL); + + (void)flush; /* Passthrough need not care about flushing. */ + return pSRC->onRead(pSRC, frameCount, pFramesOut, pSRC->pUserData); +} + +drmp3_uint64 drmp3_src_read_frames_linear(drmp3_src* pSRC, drmp3_uint64 frameCount, void* pFramesOut, drmp3_bool32 flush) +{ + double factor; + drmp3_uint64 totalFramesRead; + + DRMP3_ASSERT(pSRC != NULL); + DRMP3_ASSERT(frameCount > 0); + DRMP3_ASSERT(pFramesOut != NULL); + + /* For linear SRC, the bin is only 2 frames: 1 prior, 1 future. */ + + /* Load the bin if necessary. */ + if (!pSRC->algo.linear.isPrevFramesLoaded) { + drmp3_uint64 framesRead = drmp3_src_cache_read_frames(&pSRC->cache, 1, pSRC->bin); + if (framesRead == 0) { + return 0; + } + pSRC->algo.linear.isPrevFramesLoaded = DRMP3_TRUE; + } + if (!pSRC->algo.linear.isNextFramesLoaded) { + drmp3_uint64 framesRead = drmp3_src_cache_read_frames(&pSRC->cache, 1, pSRC->bin + pSRC->config.channels); + if (framesRead == 0) { + return 0; + } + pSRC->algo.linear.isNextFramesLoaded = DRMP3_TRUE; + } + + factor = (double)pSRC->config.sampleRateIn / pSRC->config.sampleRateOut; + + totalFramesRead = 0; + while (frameCount > 0) { + drmp3_uint32 i; + drmp3_uint32 framesToReadFromClient; + + /* The bin is where the previous and next frames are located. */ + float* pPrevFrame = pSRC->bin; + float* pNextFrame = pSRC->bin + pSRC->config.channels; + + drmp3_blend_f32((float*)pFramesOut, pPrevFrame, pNextFrame, (float)pSRC->algo.linear.alpha, pSRC->config.channels); + + pSRC->algo.linear.alpha += factor; + + /* The new alpha value is how we determine whether or not we need to read fresh frames. */ + framesToReadFromClient = (drmp3_uint32)pSRC->algo.linear.alpha; + pSRC->algo.linear.alpha = pSRC->algo.linear.alpha - framesToReadFromClient; + + for (i = 0; i < framesToReadFromClient; ++i) { + drmp3_uint64 framesRead; + drmp3_uint32 j; + + for (j = 0; j < pSRC->config.channels; ++j) { + pPrevFrame[j] = pNextFrame[j]; + } + + framesRead = drmp3_src_cache_read_frames(&pSRC->cache, 1, pNextFrame); + if (framesRead == 0) { + drmp3_uint32 k; + for (k = 0; k < pSRC->config.channels; ++k) { + pNextFrame[k] = 0; + } + + if (pSRC->algo.linear.isNextFramesLoaded) { + pSRC->algo.linear.isNextFramesLoaded = DRMP3_FALSE; + } else { + if (flush) { + pSRC->algo.linear.isPrevFramesLoaded = DRMP3_FALSE; + } + } + + break; + } + } + + pFramesOut = (drmp3_uint8*)pFramesOut + (1 * pSRC->config.channels * sizeof(float)); + frameCount -= 1; + totalFramesRead += 1; + + /* If there's no frames available we need to get out of this loop. */ + if (!pSRC->algo.linear.isNextFramesLoaded && (!flush || !pSRC->algo.linear.isPrevFramesLoaded)) { + break; + } + } + + return totalFramesRead; +} + + +static size_t drmp3__on_read(drmp3* pMP3, void* pBufferOut, size_t bytesToRead) +{ + size_t bytesRead = pMP3->onRead(pMP3->pUserData, pBufferOut, bytesToRead); + pMP3->streamCursor += bytesRead; + return bytesRead; +} + +static drmp3_bool32 drmp3__on_seek(drmp3* pMP3, int offset, drmp3_seek_origin origin) +{ + DRMP3_ASSERT(offset >= 0); + + if (!pMP3->onSeek(pMP3->pUserData, offset, origin)) { + return DRMP3_FALSE; + } + + if (origin == drmp3_seek_origin_start) { + pMP3->streamCursor = (drmp3_uint64)offset; + } else { + pMP3->streamCursor += offset; + } + + return DRMP3_TRUE; +} + +static drmp3_bool32 drmp3__on_seek_64(drmp3* pMP3, drmp3_uint64 offset, drmp3_seek_origin origin) +{ + if (offset <= 0x7FFFFFFF) { + return drmp3__on_seek(pMP3, (int)offset, origin); + } + + + /* Getting here "offset" is too large for a 32-bit integer. We just keep seeking forward until we hit the offset. */ + if (!drmp3__on_seek(pMP3, 0x7FFFFFFF, drmp3_seek_origin_start)) { + return DRMP3_FALSE; + } + + offset -= 0x7FFFFFFF; + while (offset > 0) { + if (offset <= 0x7FFFFFFF) { + if (!drmp3__on_seek(pMP3, (int)offset, drmp3_seek_origin_current)) { + return DRMP3_FALSE; + } + offset = 0; + } else { + if (!drmp3__on_seek(pMP3, 0x7FFFFFFF, drmp3_seek_origin_current)) { + return DRMP3_FALSE; + } + offset -= 0x7FFFFFFF; + } + } + + return DRMP3_TRUE; +} + +static drmp3_uint32 drmp3_decode_next_frame_ex(drmp3* pMP3, drmp3d_sample_t* pPCMFrames, drmp3_bool32 discard); +static drmp3_uint32 drmp3_decode_next_frame(drmp3* pMP3); + +static drmp3_uint64 drmp3_read_src(drmp3_src* pSRC, drmp3_uint64 frameCount, void* pFramesOut, void* pUserData) +{ + drmp3* pMP3 = (drmp3*)pUserData; + float* pFramesOutF = (float*)pFramesOut; + drmp3_uint64 totalFramesRead = 0; + + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(pMP3->onRead != NULL); + + while (frameCount > 0) { + /* Read from the in-memory buffer first. */ + while (pMP3->pcmFramesRemainingInMP3Frame > 0 && frameCount > 0) { + drmp3d_sample_t* frames = (drmp3d_sample_t*)pMP3->pcmFrames; +#ifndef DR_MP3_FLOAT_OUTPUT + if (pMP3->mp3FrameChannels == 1) { + if (pMP3->channels == 1) { + /* Mono -> Mono. */ + pFramesOutF[0] = frames[pMP3->pcmFramesConsumedInMP3Frame] / 32768.0f; + } else { + /* Mono -> Stereo. */ + pFramesOutF[0] = frames[pMP3->pcmFramesConsumedInMP3Frame] / 32768.0f; + pFramesOutF[1] = frames[pMP3->pcmFramesConsumedInMP3Frame] / 32768.0f; + } + } else { + if (pMP3->channels == 1) { + /* Stereo -> Mono */ + float sample = 0; + sample += frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+0] / 32768.0f; + sample += frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+1] / 32768.0f; + pFramesOutF[0] = sample * 0.5f; + } else { + /* Stereo -> Stereo */ + pFramesOutF[0] = frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+0] / 32768.0f; + pFramesOutF[1] = frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+1] / 32768.0f; + } + } +#else + if (pMP3->mp3FrameChannels == 1) { + if (pMP3->channels == 1) { + /* Mono -> Mono. */ + pFramesOutF[0] = frames[pMP3->pcmFramesConsumedInMP3Frame]; + } else { + /* Mono -> Stereo. */ + pFramesOutF[0] = frames[pMP3->pcmFramesConsumedInMP3Frame]; + pFramesOutF[1] = frames[pMP3->pcmFramesConsumedInMP3Frame]; + } + } else { + if (pMP3->channels == 1) { + /* Stereo -> Mono */ + float sample = 0; + sample += frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+0]; + sample += frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+1]; + pFramesOutF[0] = sample * 0.5f; + } else { + /* Stereo -> Stereo */ + pFramesOutF[0] = frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+0]; + pFramesOutF[1] = frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+1]; + } + } +#endif + + pMP3->pcmFramesConsumedInMP3Frame += 1; + pMP3->pcmFramesRemainingInMP3Frame -= 1; + totalFramesRead += 1; + frameCount -= 1; + pFramesOutF += pSRC->config.channels; + } + + if (frameCount == 0) { + break; + } + + DRMP3_ASSERT(pMP3->pcmFramesRemainingInMP3Frame == 0); + + /* + At this point we have exhausted our in-memory buffer so we need to re-fill. Note that the sample rate may have changed + at this point which means we'll also need to update our sample rate conversion pipeline. + */ + if (drmp3_decode_next_frame(pMP3) == 0) { + break; + } + } + + return totalFramesRead; +} + +static drmp3_bool32 drmp3_init_src(drmp3* pMP3) +{ + drmp3_src_config srcConfig; + DRMP3_ZERO_OBJECT(&srcConfig); + srcConfig.sampleRateIn = DR_MP3_DEFAULT_SAMPLE_RATE; + srcConfig.sampleRateOut = pMP3->sampleRate; + srcConfig.channels = pMP3->channels; + srcConfig.algorithm = drmp3_src_algorithm_linear; + if (!drmp3_src_init(&srcConfig, drmp3_read_src, pMP3, &pMP3->src)) { + drmp3_uninit(pMP3); + return DRMP3_FALSE; + } + + return DRMP3_TRUE; +} + +static drmp3_uint32 drmp3_decode_next_frame_ex(drmp3* pMP3, drmp3d_sample_t* pPCMFrames, drmp3_bool32 discard) +{ + drmp3_uint32 pcmFramesRead = 0; + + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(pMP3->onRead != NULL); + + if (pMP3->atEnd) { + return 0; + } + + do { + drmp3dec_frame_info info; + size_t leftoverDataSize; + + /* minimp3 recommends doing data submission in 16K chunks. If we don't have at least 16K bytes available, get more. */ + if (pMP3->dataSize < DRMP3_DATA_CHUNK_SIZE) { + size_t bytesRead; + + if (pMP3->dataCapacity < DRMP3_DATA_CHUNK_SIZE) { + drmp3_uint8* pNewData; + size_t newDataCap; + + newDataCap = DRMP3_DATA_CHUNK_SIZE; + + pNewData = (drmp3_uint8*)drmp3__realloc_from_callbacks(pMP3->pData, newDataCap, pMP3->dataCapacity, &pMP3->allocationCallbacks); + if (pNewData == NULL) { + return 0; /* Out of memory. */ + } + + pMP3->pData = pNewData; + pMP3->dataCapacity = newDataCap; + } + + bytesRead = drmp3__on_read(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize)); + if (bytesRead == 0) { + if (pMP3->dataSize == 0) { + pMP3->atEnd = DRMP3_TRUE; + return 0; /* No data. */ + } + } + + pMP3->dataSize += bytesRead; + } + + if (pMP3->dataSize > INT_MAX) { + pMP3->atEnd = DRMP3_TRUE; + return 0; /* File too big. */ + } + + pcmFramesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->pData, (int)pMP3->dataSize, pPCMFrames, &info); /* <-- Safe size_t -> int conversion thanks to the check above. */ + + /* Consume the data. */ + leftoverDataSize = (pMP3->dataSize - (size_t)info.frame_bytes); + if (info.frame_bytes > 0) { + memmove(pMP3->pData, pMP3->pData + info.frame_bytes, leftoverDataSize); + pMP3->dataSize = leftoverDataSize; + } + + /* + pcmFramesRead will be equal to 0 if decoding failed. If it is zero and info.frame_bytes > 0 then we have successfully + decoded the frame. A special case is if we are wanting to discard the frame, in which case we return successfully. + */ + if (pcmFramesRead > 0 || (info.frame_bytes > 0 && discard)) { + pcmFramesRead = drmp3_hdr_frame_samples(pMP3->decoder.header); + pMP3->pcmFramesConsumedInMP3Frame = 0; + pMP3->pcmFramesRemainingInMP3Frame = pcmFramesRead; + pMP3->mp3FrameChannels = info.channels; + pMP3->mp3FrameSampleRate = info.hz; + + /* We need to initialize the resampler if we don't yet have the channel count or sample rate. */ + if (pMP3->channels == 0 || pMP3->sampleRate == 0) { + if (pMP3->channels == 0) { + pMP3->channels = info.channels; + } + if (pMP3->sampleRate == 0) { + pMP3->sampleRate = info.hz; + } + drmp3_init_src(pMP3); + } + + drmp3_src_set_input_sample_rate(&pMP3->src, pMP3->mp3FrameSampleRate); + break; + } else if (info.frame_bytes == 0) { + size_t bytesRead; + + /* Need more data. minimp3 recommends doing data submission in 16K chunks. */ + if (pMP3->dataCapacity == pMP3->dataSize) { + /* No room. Expand. */ + drmp3_uint8* pNewData; + size_t newDataCap; + + newDataCap = pMP3->dataCapacity + DRMP3_DATA_CHUNK_SIZE; + + pNewData = (drmp3_uint8*)drmp3__realloc_from_callbacks(pMP3->pData, newDataCap, pMP3->dataCapacity, &pMP3->allocationCallbacks); + if (pNewData == NULL) { + return 0; /* Out of memory. */ + } + + pMP3->pData = pNewData; + pMP3->dataCapacity = newDataCap; + } + + /* Fill in a chunk. */ + bytesRead = drmp3__on_read(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize)); + if (bytesRead == 0) { + pMP3->atEnd = DRMP3_TRUE; + return 0; /* Error reading more data. */ + } + + pMP3->dataSize += bytesRead; + } + } while (DRMP3_TRUE); + + return pcmFramesRead; +} + +static drmp3_uint32 drmp3_decode_next_frame(drmp3* pMP3) +{ + DRMP3_ASSERT(pMP3 != NULL); + return drmp3_decode_next_frame_ex(pMP3, (drmp3d_sample_t*)pMP3->pcmFrames, DRMP3_FALSE); +} + +#if 0 +static drmp3_uint32 drmp3_seek_next_frame(drmp3* pMP3) +{ + drmp3_uint32 pcmFrameCount; + + DRMP3_ASSERT(pMP3 != NULL); + + pcmFrameCount = drmp3_decode_next_frame_ex(pMP3, NULL); + if (pcmFrameCount == 0) { + return 0; + } + + /* We have essentially just skipped past the frame, so just set the remaining samples to 0. */ + pMP3->currentPCMFrame += pcmFrameCount; + pMP3->pcmFramesConsumedInMP3Frame = pcmFrameCount; + pMP3->pcmFramesRemainingInMP3Frame = 0; + + return pcmFrameCount; +} +#endif + +drmp3_bool32 drmp3_init_internal(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, const drmp3_config* pConfig, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3_config config; + + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(onRead != NULL); + + /* This function assumes the output object has already been reset to 0. Do not do that here, otherwise things will break. */ + drmp3dec_init(&pMP3->decoder); + + /* The config can be null in which case we use defaults. */ + if (pConfig != NULL) { + config = *pConfig; + } else { + DRMP3_ZERO_OBJECT(&config); + } + + pMP3->channels = config.outputChannels; + + /* Cannot have more than 2 channels. */ + if (pMP3->channels > 2) { + pMP3->channels = 2; + } + + pMP3->sampleRate = config.outputSampleRate; + + pMP3->onRead = onRead; + pMP3->onSeek = onSeek; + pMP3->pUserData = pUserData; + pMP3->allocationCallbacks = drmp3_copy_allocation_callbacks_or_defaults(pAllocationCallbacks); + + if (pMP3->allocationCallbacks.onFree == NULL || (pMP3->allocationCallbacks.onMalloc == NULL && pMP3->allocationCallbacks.onRealloc == NULL)) { + return DRMP3_FALSE; /* Invalid allocation callbacks. */ + } + + /* + We need a sample rate converter for converting the sample rate from the MP3 frames to the requested output sample rate. Note that if + we don't yet know the channel count or sample rate we defer this until the first frame is read. + */ + if (pMP3->channels != 0 && pMP3->sampleRate != 0) { + drmp3_init_src(pMP3); + } + + /* Decode the first frame to confirm that it is indeed a valid MP3 stream. */ + if (!drmp3_decode_next_frame(pMP3)) { + drmp3_uninit(pMP3); + return DRMP3_FALSE; /* Not a valid MP3 stream. */ + } + + return DRMP3_TRUE; +} + +drmp3_bool32 drmp3_init(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, const drmp3_config* pConfig, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (pMP3 == NULL || onRead == NULL) { + return DRMP3_FALSE; + } + + DRMP3_ZERO_OBJECT(pMP3); + return drmp3_init_internal(pMP3, onRead, onSeek, pUserData, pConfig, pAllocationCallbacks); +} + + +static size_t drmp3__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead) +{ + drmp3* pMP3 = (drmp3*)pUserData; + size_t bytesRemaining; + + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(pMP3->memory.dataSize >= pMP3->memory.currentReadPos); + + bytesRemaining = pMP3->memory.dataSize - pMP3->memory.currentReadPos; + if (bytesToRead > bytesRemaining) { + bytesToRead = bytesRemaining; + } + + if (bytesToRead > 0) { + DRMP3_COPY_MEMORY(pBufferOut, pMP3->memory.pData + pMP3->memory.currentReadPos, bytesToRead); + pMP3->memory.currentReadPos += bytesToRead; + } + + return bytesToRead; +} + +static drmp3_bool32 drmp3__on_seek_memory(void* pUserData, int byteOffset, drmp3_seek_origin origin) +{ + drmp3* pMP3 = (drmp3*)pUserData; + + DRMP3_ASSERT(pMP3 != NULL); + + if (origin == drmp3_seek_origin_current) { + if (byteOffset > 0) { + if (pMP3->memory.currentReadPos + byteOffset > pMP3->memory.dataSize) { + byteOffset = (int)(pMP3->memory.dataSize - pMP3->memory.currentReadPos); /* Trying to seek too far forward. */ + } + } else { + if (pMP3->memory.currentReadPos < (size_t)-byteOffset) { + byteOffset = -(int)pMP3->memory.currentReadPos; /* Trying to seek too far backwards. */ + } + } + + /* This will never underflow thanks to the clamps above. */ + pMP3->memory.currentReadPos += byteOffset; + } else { + if ((drmp3_uint32)byteOffset <= pMP3->memory.dataSize) { + pMP3->memory.currentReadPos = byteOffset; + } else { + pMP3->memory.currentReadPos = pMP3->memory.dataSize; /* Trying to seek too far forward. */ + } + } + + return DRMP3_TRUE; +} + +drmp3_bool32 drmp3_init_memory(drmp3* pMP3, const void* pData, size_t dataSize, const drmp3_config* pConfig, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (pMP3 == NULL) { + return DRMP3_FALSE; + } + + DRMP3_ZERO_OBJECT(pMP3); + + if (pData == NULL || dataSize == 0) { + return DRMP3_FALSE; + } + + pMP3->memory.pData = (const drmp3_uint8*)pData; + pMP3->memory.dataSize = dataSize; + pMP3->memory.currentReadPos = 0; + + return drmp3_init_internal(pMP3, drmp3__on_read_memory, drmp3__on_seek_memory, pMP3, pConfig, pAllocationCallbacks); +} + + +#ifndef DR_MP3_NO_STDIO +#include <stdio.h> + +static size_t drmp3__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead) +{ + return fread(pBufferOut, 1, bytesToRead, (FILE*)pUserData); +} + +static drmp3_bool32 drmp3__on_seek_stdio(void* pUserData, int offset, drmp3_seek_origin origin) +{ + return fseek((FILE*)pUserData, offset, (origin == drmp3_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0; +} + +drmp3_bool32 drmp3_init_file(drmp3* pMP3, const char* filePath, const drmp3_config* pConfig, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + FILE* pFile; +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (fopen_s(&pFile, filePath, "rb") != 0) { + return DRMP3_FALSE; + } +#else + pFile = fopen(filePath, "rb"); + if (pFile == NULL) { + return DRMP3_FALSE; + } +#endif + + return drmp3_init(pMP3, drmp3__on_read_stdio, drmp3__on_seek_stdio, (void*)pFile, pConfig, pAllocationCallbacks); +} +#endif + +void drmp3_uninit(drmp3* pMP3) +{ + if (pMP3 == NULL) { + return; + } + +#ifndef DR_MP3_NO_STDIO + if (pMP3->onRead == drmp3__on_read_stdio) { + fclose((FILE*)pMP3->pUserData); + } +#endif + + drmp3__free_from_callbacks(pMP3->pData, &pMP3->allocationCallbacks); +} + +drmp3_uint64 drmp3_read_pcm_frames_f32(drmp3* pMP3, drmp3_uint64 framesToRead, float* pBufferOut) +{ + drmp3_uint64 totalFramesRead = 0; + + if (pMP3 == NULL || pMP3->onRead == NULL) { + return 0; + } + + if (pBufferOut == NULL) { + float temp[4096]; + while (framesToRead > 0) { + drmp3_uint64 framesJustRead; + drmp3_uint64 framesToReadRightNow = sizeof(temp)/sizeof(temp[0]) / pMP3->channels; + if (framesToReadRightNow > framesToRead) { + framesToReadRightNow = framesToRead; + } + + framesJustRead = drmp3_read_pcm_frames_f32(pMP3, framesToReadRightNow, temp); + if (framesJustRead == 0) { + break; + } + + framesToRead -= framesJustRead; + totalFramesRead += framesJustRead; + } + } else { + totalFramesRead = drmp3_src_read_frames_ex(&pMP3->src, framesToRead, pBufferOut, DRMP3_TRUE); + pMP3->currentPCMFrame += totalFramesRead; + } + + return totalFramesRead; +} + +drmp3_uint64 drmp3_read_pcm_frames_s16(drmp3* pMP3, drmp3_uint64 framesToRead, drmp3_int16* pBufferOut) +{ + float tempF32[4096]; + drmp3_uint64 pcmFramesJustRead; + drmp3_uint64 totalPCMFramesRead = 0; + + if (pMP3 == NULL || pMP3->onRead == NULL) { + return 0; + } + + /* Naive implementation: read into a temp f32 buffer, then convert. */ + for (;;) { + drmp3_uint64 pcmFramesToReadThisIteration = (framesToRead - totalPCMFramesRead); + if (pcmFramesToReadThisIteration > drmp3_countof(tempF32)/pMP3->channels) { + pcmFramesToReadThisIteration = drmp3_countof(tempF32)/pMP3->channels; + } + + pcmFramesJustRead = drmp3_read_pcm_frames_f32(pMP3, pcmFramesToReadThisIteration, tempF32); + if (pcmFramesJustRead == 0) { + break; + } + + drmp3dec_f32_to_s16(tempF32, pBufferOut, (int)(pcmFramesJustRead * pMP3->channels)); /* <-- Safe cast since pcmFramesJustRead will be clamped based on the size of tempF32 which is always small. */ + pBufferOut += pcmFramesJustRead * pMP3->channels; + + totalPCMFramesRead += pcmFramesJustRead; + + if (pcmFramesJustRead < pcmFramesToReadThisIteration) { + break; + } + } + + return totalPCMFramesRead; +} + +void drmp3_reset(drmp3* pMP3) +{ + DRMP3_ASSERT(pMP3 != NULL); + + pMP3->pcmFramesConsumedInMP3Frame = 0; + pMP3->pcmFramesRemainingInMP3Frame = 0; + pMP3->currentPCMFrame = 0; + pMP3->dataSize = 0; + pMP3->atEnd = DRMP3_FALSE; + pMP3->src.bin[0] = 0; + pMP3->src.bin[1] = 0; + pMP3->src.bin[2] = 0; + pMP3->src.bin[3] = 0; + pMP3->src.cache.cachedFrameCount = 0; + pMP3->src.cache.iNextFrame = 0; + pMP3->src.algo.linear.alpha = 0; + pMP3->src.algo.linear.isNextFramesLoaded = 0; + pMP3->src.algo.linear.isPrevFramesLoaded = 0; + drmp3dec_init(&pMP3->decoder); +} + +drmp3_bool32 drmp3_seek_to_start_of_stream(drmp3* pMP3) +{ + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(pMP3->onSeek != NULL); + + /* Seek to the start of the stream to begin with. */ + if (!drmp3__on_seek(pMP3, 0, drmp3_seek_origin_start)) { + return DRMP3_FALSE; + } + + /* Clear any cached data. */ + drmp3_reset(pMP3); + return DRMP3_TRUE; +} + +float drmp3_get_cached_pcm_frame_count_from_src(drmp3* pMP3) +{ + return (pMP3->src.cache.cachedFrameCount - pMP3->src.cache.iNextFrame) + (float)pMP3->src.algo.linear.alpha; +} + +float drmp3_get_pcm_frames_remaining_in_mp3_frame(drmp3* pMP3) +{ + float factor = (float)pMP3->src.config.sampleRateOut / (float)pMP3->src.config.sampleRateIn; + float frameCountPreSRC = drmp3_get_cached_pcm_frame_count_from_src(pMP3) + pMP3->pcmFramesRemainingInMP3Frame; + return frameCountPreSRC * factor; +} + +/* +NOTE ON SEEKING +=============== +The seeking code below is a complete mess and is broken for cases when the sample rate changes. The problem +is with the resampling and the crappy resampler used by dr_mp3. What needs to happen is the following: + +1) The resampler needs to be replaced. +2) The resampler has state which needs to be updated whenever an MP3 frame is decoded outside of + drmp3_read_pcm_frames_f32(). The resampler needs an API to "flush" some imaginary input so that it's + state is updated accordingly. +*/ +drmp3_bool32 drmp3_seek_forward_by_pcm_frames__brute_force(drmp3* pMP3, drmp3_uint64 frameOffset) +{ + drmp3_uint64 framesRead; + +#if 0 + /* + MP3 is a bit annoying when it comes to seeking because of the bit reservoir. It basically means that an MP3 frame can possibly + depend on some of the data of prior frames. This means it's not as simple as seeking to the first byte of the MP3 frame that + contains the sample because that MP3 frame will need the data from the previous MP3 frame (which we just seeked past!). To + resolve this we seek past a number of MP3 frames up to a point, and then read-and-discard the remainder. + */ + drmp3_uint64 maxFramesToReadAndDiscard = (drmp3_uint64)(DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME * 3 * ((float)pMP3->src.config.sampleRateOut / (float)pMP3->src.config.sampleRateIn)); + + /* Now get rid of leading whole frames. */ + while (frameOffset > maxFramesToReadAndDiscard) { + float pcmFramesRemainingInCurrentMP3FrameF = drmp3_get_pcm_frames_remaining_in_mp3_frame(pMP3); + drmp3_uint32 pcmFramesRemainingInCurrentMP3Frame = (drmp3_uint32)pcmFramesRemainingInCurrentMP3FrameF; + if (frameOffset > pcmFramesRemainingInCurrentMP3Frame) { + frameOffset -= pcmFramesRemainingInCurrentMP3Frame; + pMP3->currentPCMFrame += pcmFramesRemainingInCurrentMP3Frame; + pMP3->pcmFramesConsumedInMP3Frame += pMP3->pcmFramesRemainingInMP3Frame; + pMP3->pcmFramesRemainingInMP3Frame = 0; + } else { + break; + } + + drmp3_uint32 pcmFrameCount = drmp3_decode_next_frame_ex(pMP3, pMP3->pcmFrames, DRMP3_FALSE); + if (pcmFrameCount == 0) { + break; + } + } + + /* The last step is to read-and-discard any remaining PCM frames to make it sample-exact. */ + framesRead = drmp3_read_pcm_frames_f32(pMP3, frameOffset, NULL); + if (framesRead != frameOffset) { + return DRMP3_FALSE; + } +#else + /* Just using a dumb read-and-discard for now pending updates to the resampler. */ + framesRead = drmp3_read_pcm_frames_f32(pMP3, frameOffset, NULL); + if (framesRead != frameOffset) { + return DRMP3_FALSE; + } +#endif + + return DRMP3_TRUE; +} + +drmp3_bool32 drmp3_seek_to_pcm_frame__brute_force(drmp3* pMP3, drmp3_uint64 frameIndex) +{ + DRMP3_ASSERT(pMP3 != NULL); + + if (frameIndex == pMP3->currentPCMFrame) { + return DRMP3_TRUE; + } + + /* + If we're moving foward we just read from where we're at. Otherwise we need to move back to the start of + the stream and read from the beginning. + */ + if (frameIndex < pMP3->currentPCMFrame) { + /* Moving backward. Move to the start of the stream and then move forward. */ + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + } + + DRMP3_ASSERT(frameIndex >= pMP3->currentPCMFrame); + return drmp3_seek_forward_by_pcm_frames__brute_force(pMP3, (frameIndex - pMP3->currentPCMFrame)); +} + +drmp3_bool32 drmp3_find_closest_seek_point(drmp3* pMP3, drmp3_uint64 frameIndex, drmp3_uint32* pSeekPointIndex) +{ + drmp3_uint32 iSeekPoint; + + DRMP3_ASSERT(pSeekPointIndex != NULL); + + *pSeekPointIndex = 0; + + if (frameIndex < pMP3->pSeekPoints[0].pcmFrameIndex) { + return DRMP3_FALSE; + } + + /* Linear search for simplicity to begin with while I'm getting this thing working. Once it's all working change this to a binary search. */ + for (iSeekPoint = 0; iSeekPoint < pMP3->seekPointCount; ++iSeekPoint) { + if (pMP3->pSeekPoints[iSeekPoint].pcmFrameIndex > frameIndex) { + break; /* Found it. */ + } + + *pSeekPointIndex = iSeekPoint; + } + + return DRMP3_TRUE; +} + +drmp3_bool32 drmp3_seek_to_pcm_frame__seek_table(drmp3* pMP3, drmp3_uint64 frameIndex) +{ + drmp3_seek_point seekPoint; + drmp3_uint32 priorSeekPointIndex; + drmp3_uint16 iMP3Frame; + drmp3_uint64 leftoverFrames; + + DRMP3_ASSERT(pMP3 != NULL); + DRMP3_ASSERT(pMP3->pSeekPoints != NULL); + DRMP3_ASSERT(pMP3->seekPointCount > 0); + + /* If there is no prior seekpoint it means the target PCM frame comes before the first seek point. Just assume a seekpoint at the start of the file in this case. */ + if (drmp3_find_closest_seek_point(pMP3, frameIndex, &priorSeekPointIndex)) { + seekPoint = pMP3->pSeekPoints[priorSeekPointIndex]; + } else { + seekPoint.seekPosInBytes = 0; + seekPoint.pcmFrameIndex = 0; + seekPoint.mp3FramesToDiscard = 0; + seekPoint.pcmFramesToDiscard = 0; + } + + /* First thing to do is seek to the first byte of the relevant MP3 frame. */ + if (!drmp3__on_seek_64(pMP3, seekPoint.seekPosInBytes, drmp3_seek_origin_start)) { + return DRMP3_FALSE; /* Failed to seek. */ + } + + /* Clear any cached data. */ + drmp3_reset(pMP3); + + /* Whole MP3 frames need to be discarded first. */ + for (iMP3Frame = 0; iMP3Frame < seekPoint.mp3FramesToDiscard; ++iMP3Frame) { + drmp3_uint32 pcmFramesReadPreSRC; + drmp3d_sample_t* pPCMFrames; + + /* Pass in non-null for the last frame because we want to ensure the sample rate converter is preloaded correctly. */ + pPCMFrames = NULL; + if (iMP3Frame == seekPoint.mp3FramesToDiscard-1) { + pPCMFrames = (drmp3d_sample_t*)pMP3->pcmFrames; + } + + /* We first need to decode the next frame, and then we need to flush the resampler. */ + pcmFramesReadPreSRC = drmp3_decode_next_frame_ex(pMP3, pPCMFrames, DRMP3_TRUE); + if (pcmFramesReadPreSRC == 0) { + return DRMP3_FALSE; + } + } + + /* We seeked to an MP3 frame in the raw stream so we need to make sure the current PCM frame is set correctly. */ + pMP3->currentPCMFrame = seekPoint.pcmFrameIndex - seekPoint.pcmFramesToDiscard; + + /* + Update resampler. This is wrong. Need to instead update it on a per MP3 frame basis. Also broken for cases when + the sample rate is being reduced in my testing. Should work fine when the input and output sample rate is the same + or a clean multiple. + */ + pMP3->src.algo.linear.alpha = (drmp3_int64)pMP3->currentPCMFrame * ((double)pMP3->src.config.sampleRateIn / pMP3->src.config.sampleRateOut); /* <-- Cast to int64 is required for VC6. */ + pMP3->src.algo.linear.alpha = pMP3->src.algo.linear.alpha - (drmp3_uint32)(pMP3->src.algo.linear.alpha); + if (pMP3->src.algo.linear.alpha > 0) { + pMP3->src.algo.linear.isPrevFramesLoaded = 1; + } + + /* + Now at this point we can follow the same process as the brute force technique where we just skip over unnecessary MP3 frames and then + read-and-discard at least 2 whole MP3 frames. + */ + leftoverFrames = frameIndex - pMP3->currentPCMFrame; + return drmp3_seek_forward_by_pcm_frames__brute_force(pMP3, leftoverFrames); +} + +drmp3_bool32 drmp3_seek_to_pcm_frame(drmp3* pMP3, drmp3_uint64 frameIndex) +{ + if (pMP3 == NULL || pMP3->onSeek == NULL) { + return DRMP3_FALSE; + } + + if (frameIndex == 0) { + return drmp3_seek_to_start_of_stream(pMP3); + } + + /* Use the seek table if we have one. */ + if (pMP3->pSeekPoints != NULL && pMP3->seekPointCount > 0) { + return drmp3_seek_to_pcm_frame__seek_table(pMP3, frameIndex); + } else { + return drmp3_seek_to_pcm_frame__brute_force(pMP3, frameIndex); + } +} + +drmp3_bool32 drmp3_get_mp3_and_pcm_frame_count(drmp3* pMP3, drmp3_uint64* pMP3FrameCount, drmp3_uint64* pPCMFrameCount) +{ + drmp3_uint64 currentPCMFrame; + drmp3_uint64 totalPCMFrameCount; + drmp3_uint64 totalMP3FrameCount; + float totalPCMFrameCountFractionalPart; + + if (pMP3 == NULL) { + return DRMP3_FALSE; + } + + /* + The way this works is we move back to the start of the stream, iterate over each MP3 frame and calculate the frame count based + on our output sample rate, the seek back to the PCM frame we were sitting on before calling this function. + */ + + /* The stream must support seeking for this to work. */ + if (pMP3->onSeek == NULL) { + return DRMP3_FALSE; + } + + /* We'll need to seek back to where we were, so grab the PCM frame we're currently sitting on so we can restore later. */ + currentPCMFrame = pMP3->currentPCMFrame; + + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + + totalPCMFrameCount = 0; + totalMP3FrameCount = 0; + + totalPCMFrameCountFractionalPart = 0; /* <-- With resampling there will be a fractional part to each MP3 frame that we need to accumulate. */ + for (;;) { + drmp3_uint32 pcmFramesInCurrentMP3FrameIn; + float srcRatio; + float pcmFramesInCurrentMP3FrameOutF; + drmp3_uint32 pcmFramesInCurrentMP3FrameOut; + + pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL, DRMP3_FALSE); + if (pcmFramesInCurrentMP3FrameIn == 0) { + break; + } + + srcRatio = (float)pMP3->mp3FrameSampleRate / (float)pMP3->sampleRate; + DRMP3_ASSERT(srcRatio > 0); + + pcmFramesInCurrentMP3FrameOutF = totalPCMFrameCountFractionalPart + (pcmFramesInCurrentMP3FrameIn / srcRatio); + pcmFramesInCurrentMP3FrameOut = (drmp3_uint32)pcmFramesInCurrentMP3FrameOutF; + totalPCMFrameCountFractionalPart = pcmFramesInCurrentMP3FrameOutF - pcmFramesInCurrentMP3FrameOut; + totalPCMFrameCount += pcmFramesInCurrentMP3FrameOut; + totalMP3FrameCount += 1; + } + + /* Finally, we need to seek back to where we were. */ + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + + if (!drmp3_seek_to_pcm_frame(pMP3, currentPCMFrame)) { + return DRMP3_FALSE; + } + + if (pMP3FrameCount != NULL) { + *pMP3FrameCount = totalMP3FrameCount; + } + if (pPCMFrameCount != NULL) { + *pPCMFrameCount = totalPCMFrameCount; + } + + return DRMP3_TRUE; +} + +drmp3_uint64 drmp3_get_pcm_frame_count(drmp3* pMP3) +{ + drmp3_uint64 totalPCMFrameCount; + if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, NULL, &totalPCMFrameCount)) { + return 0; + } + + return totalPCMFrameCount; +} + +drmp3_uint64 drmp3_get_mp3_frame_count(drmp3* pMP3) +{ + drmp3_uint64 totalMP3FrameCount; + if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, &totalMP3FrameCount, NULL)) { + return 0; + } + + return totalMP3FrameCount; +} + +void drmp3__accumulate_running_pcm_frame_count(drmp3* pMP3, drmp3_uint32 pcmFrameCountIn, drmp3_uint64* pRunningPCMFrameCount, float* pRunningPCMFrameCountFractionalPart) +{ + float srcRatio; + float pcmFrameCountOutF; + drmp3_uint32 pcmFrameCountOut; + + srcRatio = (float)pMP3->mp3FrameSampleRate / (float)pMP3->sampleRate; + DRMP3_ASSERT(srcRatio > 0); + + pcmFrameCountOutF = *pRunningPCMFrameCountFractionalPart + (pcmFrameCountIn / srcRatio); + pcmFrameCountOut = (drmp3_uint32)pcmFrameCountOutF; + *pRunningPCMFrameCountFractionalPart = pcmFrameCountOutF - pcmFrameCountOut; + *pRunningPCMFrameCount += pcmFrameCountOut; +} + +typedef struct +{ + drmp3_uint64 bytePos; + drmp3_uint64 pcmFrameIndex; /* <-- After sample rate conversion. */ +} drmp3__seeking_mp3_frame_info; + +drmp3_bool32 drmp3_calculate_seek_points(drmp3* pMP3, drmp3_uint32* pSeekPointCount, drmp3_seek_point* pSeekPoints) +{ + drmp3_uint32 seekPointCount; + drmp3_uint64 currentPCMFrame; + drmp3_uint64 totalMP3FrameCount; + drmp3_uint64 totalPCMFrameCount; + + if (pMP3 == NULL || pSeekPointCount == NULL || pSeekPoints == NULL) { + return DRMP3_FALSE; /* Invalid args. */ + } + + seekPointCount = *pSeekPointCount; + if (seekPointCount == 0) { + return DRMP3_FALSE; /* The client has requested no seek points. Consider this to be invalid arguments since the client has probably not intended this. */ + } + + /* We'll need to seek back to the current sample after calculating the seekpoints so we need to go ahead and grab the current location at the top. */ + currentPCMFrame = pMP3->currentPCMFrame; + + /* We never do more than the total number of MP3 frames and we limit it to 32-bits. */ + if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, &totalMP3FrameCount, &totalPCMFrameCount)) { + return DRMP3_FALSE; + } + + /* If there's less than DRMP3_SEEK_LEADING_MP3_FRAMES+1 frames we just report 1 seek point which will be the very start of the stream. */ + if (totalMP3FrameCount < DRMP3_SEEK_LEADING_MP3_FRAMES+1) { + seekPointCount = 1; + pSeekPoints[0].seekPosInBytes = 0; + pSeekPoints[0].pcmFrameIndex = 0; + pSeekPoints[0].mp3FramesToDiscard = 0; + pSeekPoints[0].pcmFramesToDiscard = 0; + } else { + drmp3_uint64 pcmFramesBetweenSeekPoints; + drmp3__seeking_mp3_frame_info mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES+1]; + drmp3_uint64 runningPCMFrameCount = 0; + float runningPCMFrameCountFractionalPart = 0; + drmp3_uint64 nextTargetPCMFrame; + drmp3_uint32 iMP3Frame; + drmp3_uint32 iSeekPoint; + + if (seekPointCount > totalMP3FrameCount-1) { + seekPointCount = (drmp3_uint32)totalMP3FrameCount-1; + } + + pcmFramesBetweenSeekPoints = totalPCMFrameCount / (seekPointCount+1); + + /* + Here is where we actually calculate the seek points. We need to start by moving the start of the stream. We then enumerate over each + MP3 frame. + */ + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + + /* + We need to cache the byte positions of the previous MP3 frames. As a new MP3 frame is iterated, we cycle the byte positions in this + array. The value in the first item in this array is the byte position that will be reported in the next seek point. + */ + + /* We need to initialize the array of MP3 byte positions for the leading MP3 frames. */ + for (iMP3Frame = 0; iMP3Frame < DRMP3_SEEK_LEADING_MP3_FRAMES+1; ++iMP3Frame) { + drmp3_uint32 pcmFramesInCurrentMP3FrameIn; + + /* The byte position of the next frame will be the stream's cursor position, minus whatever is sitting in the buffer. */ + DRMP3_ASSERT(pMP3->streamCursor >= pMP3->dataSize); + mp3FrameInfo[iMP3Frame].bytePos = pMP3->streamCursor - pMP3->dataSize; + mp3FrameInfo[iMP3Frame].pcmFrameIndex = runningPCMFrameCount; + + /* We need to get information about this frame so we can know how many samples it contained. */ + pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL, DRMP3_FALSE); + if (pcmFramesInCurrentMP3FrameIn == 0) { + return DRMP3_FALSE; /* This should never happen. */ + } + + drmp3__accumulate_running_pcm_frame_count(pMP3, pcmFramesInCurrentMP3FrameIn, &runningPCMFrameCount, &runningPCMFrameCountFractionalPart); + } + + /* + At this point we will have extracted the byte positions of the leading MP3 frames. We can now start iterating over each seek point and + calculate them. + */ + nextTargetPCMFrame = 0; + for (iSeekPoint = 0; iSeekPoint < seekPointCount; ++iSeekPoint) { + nextTargetPCMFrame += pcmFramesBetweenSeekPoints; + + for (;;) { + if (nextTargetPCMFrame < runningPCMFrameCount) { + /* The next seek point is in the current MP3 frame. */ + pSeekPoints[iSeekPoint].seekPosInBytes = mp3FrameInfo[0].bytePos; + pSeekPoints[iSeekPoint].pcmFrameIndex = nextTargetPCMFrame; + pSeekPoints[iSeekPoint].mp3FramesToDiscard = DRMP3_SEEK_LEADING_MP3_FRAMES; + pSeekPoints[iSeekPoint].pcmFramesToDiscard = (drmp3_uint16)(nextTargetPCMFrame - mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES-1].pcmFrameIndex); + break; + } else { + size_t i; + drmp3_uint32 pcmFramesInCurrentMP3FrameIn; + + /* + The next seek point is not in the current MP3 frame, so continue on to the next one. The first thing to do is cycle the cached + MP3 frame info. + */ + for (i = 0; i < drmp3_countof(mp3FrameInfo)-1; ++i) { + mp3FrameInfo[i] = mp3FrameInfo[i+1]; + } + + /* Cache previous MP3 frame info. */ + mp3FrameInfo[drmp3_countof(mp3FrameInfo)-1].bytePos = pMP3->streamCursor - pMP3->dataSize; + mp3FrameInfo[drmp3_countof(mp3FrameInfo)-1].pcmFrameIndex = runningPCMFrameCount; + + /* + Go to the next MP3 frame. This shouldn't ever fail, but just in case it does we just set the seek point and break. If it happens, it + should only ever do it for the last seek point. + */ + pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL, DRMP3_TRUE); + if (pcmFramesInCurrentMP3FrameIn == 0) { + pSeekPoints[iSeekPoint].seekPosInBytes = mp3FrameInfo[0].bytePos; + pSeekPoints[iSeekPoint].pcmFrameIndex = nextTargetPCMFrame; + pSeekPoints[iSeekPoint].mp3FramesToDiscard = DRMP3_SEEK_LEADING_MP3_FRAMES; + pSeekPoints[iSeekPoint].pcmFramesToDiscard = (drmp3_uint16)(nextTargetPCMFrame - mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES-1].pcmFrameIndex); + break; + } + + drmp3__accumulate_running_pcm_frame_count(pMP3, pcmFramesInCurrentMP3FrameIn, &runningPCMFrameCount, &runningPCMFrameCountFractionalPart); + } + } + } + + /* Finally, we need to seek back to where we were. */ + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + if (!drmp3_seek_to_pcm_frame(pMP3, currentPCMFrame)) { + return DRMP3_FALSE; + } + } + + *pSeekPointCount = seekPointCount; + return DRMP3_TRUE; +} + +drmp3_bool32 drmp3_bind_seek_table(drmp3* pMP3, drmp3_uint32 seekPointCount, drmp3_seek_point* pSeekPoints) +{ + if (pMP3 == NULL) { + return DRMP3_FALSE; + } + + if (seekPointCount == 0 || pSeekPoints == NULL) { + /* Unbinding. */ + pMP3->seekPointCount = 0; + pMP3->pSeekPoints = NULL; + } else { + /* Binding. */ + pMP3->seekPointCount = seekPointCount; + pMP3->pSeekPoints = pSeekPoints; + } + + return DRMP3_TRUE; +} + + +float* drmp3__full_read_and_close_f32(drmp3* pMP3, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount) +{ + drmp3_uint64 totalFramesRead = 0; + drmp3_uint64 framesCapacity = 0; + float* pFrames = NULL; + float temp[4096]; + + DRMP3_ASSERT(pMP3 != NULL); + + for (;;) { + drmp3_uint64 framesToReadRightNow = drmp3_countof(temp) / pMP3->channels; + drmp3_uint64 framesJustRead = drmp3_read_pcm_frames_f32(pMP3, framesToReadRightNow, temp); + if (framesJustRead == 0) { + break; + } + + /* Reallocate the output buffer if there's not enough room. */ + if (framesCapacity < totalFramesRead + framesJustRead) { + drmp3_uint64 oldFramesBufferSize; + drmp3_uint64 newFramesBufferSize; + drmp3_uint64 newFramesCap; + float* pNewFrames; + + newFramesCap = framesCapacity * 2; + if (newFramesCap < totalFramesRead + framesJustRead) { + newFramesCap = totalFramesRead + framesJustRead; + } + + oldFramesBufferSize = framesCapacity * pMP3->channels * sizeof(float); + newFramesBufferSize = newFramesCap * pMP3->channels * sizeof(float); + if (newFramesBufferSize > DRMP3_SIZE_MAX) { + break; + } + + pNewFrames = (float*)drmp3__realloc_from_callbacks(pFrames, (size_t)newFramesBufferSize, (size_t)oldFramesBufferSize, &pMP3->allocationCallbacks); + if (pNewFrames == NULL) { + drmp3__free_from_callbacks(pFrames, &pMP3->allocationCallbacks); + break; + } + + pFrames = pNewFrames; + framesCapacity = newFramesCap; + } + + DRMP3_COPY_MEMORY(pFrames + totalFramesRead*pMP3->channels, temp, (size_t)(framesJustRead*pMP3->channels*sizeof(float))); + totalFramesRead += framesJustRead; + + /* If the number of frames we asked for is less that what we actually read it means we've reached the end. */ + if (framesJustRead != framesToReadRightNow) { + break; + } + } + + if (pConfig != NULL) { + pConfig->outputChannels = pMP3->channels; + pConfig->outputSampleRate = pMP3->sampleRate; + } + + drmp3_uninit(pMP3); + + if (pTotalFrameCount) { + *pTotalFrameCount = totalFramesRead; + } + + return pFrames; +} + +drmp3_int16* drmp3__full_read_and_close_s16(drmp3* pMP3, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount) +{ + drmp3_uint64 totalFramesRead = 0; + drmp3_uint64 framesCapacity = 0; + drmp3_int16* pFrames = NULL; + drmp3_int16 temp[4096]; + + DRMP3_ASSERT(pMP3 != NULL); + + for (;;) { + drmp3_uint64 framesToReadRightNow = drmp3_countof(temp) / pMP3->channels; + drmp3_uint64 framesJustRead = drmp3_read_pcm_frames_s16(pMP3, framesToReadRightNow, temp); + if (framesJustRead == 0) { + break; + } + + /* Reallocate the output buffer if there's not enough room. */ + if (framesCapacity < totalFramesRead + framesJustRead) { + drmp3_uint64 newFramesBufferSize; + drmp3_uint64 oldFramesBufferSize; + drmp3_uint64 newFramesCap; + drmp3_int16* pNewFrames; + + newFramesCap = framesCapacity * 2; + if (newFramesCap < totalFramesRead + framesJustRead) { + newFramesCap = totalFramesRead + framesJustRead; + } + + oldFramesBufferSize = framesCapacity * pMP3->channels * sizeof(drmp3_int16); + newFramesBufferSize = newFramesCap * pMP3->channels * sizeof(drmp3_int16); + if (newFramesBufferSize > DRMP3_SIZE_MAX) { + break; + } + + pNewFrames = (drmp3_int16*)drmp3__realloc_from_callbacks(pFrames, (size_t)newFramesBufferSize, (size_t)oldFramesBufferSize, &pMP3->allocationCallbacks); + if (pNewFrames == NULL) { + drmp3__free_from_callbacks(pFrames, &pMP3->allocationCallbacks); + break; + } + + pFrames = pNewFrames; + } + + DRMP3_COPY_MEMORY(pFrames + totalFramesRead*pMP3->channels, temp, (size_t)(framesJustRead*pMP3->channels*sizeof(drmp3_int16))); + totalFramesRead += framesJustRead; + + /* If the number of frames we asked for is less that what we actually read it means we've reached the end. */ + if (framesJustRead != framesToReadRightNow) { + break; + } + } + + if (pConfig != NULL) { + pConfig->outputChannels = pMP3->channels; + pConfig->outputSampleRate = pMP3->sampleRate; + } + + drmp3_uninit(pMP3); + + if (pTotalFrameCount) { + *pTotalFrameCount = totalFramesRead; + } + + return pFrames; +} + + +float* drmp3_open_and_read_pcm_frames_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3 mp3; + if (!drmp3_init(&mp3, onRead, onSeek, pUserData, pConfig, pAllocationCallbacks)) { + return NULL; + } + + return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount); +} + +drmp3_int16* drmp3_open_and_read_pcm_frames_s16(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3 mp3; + if (!drmp3_init(&mp3, onRead, onSeek, pUserData, pConfig, pAllocationCallbacks)) { + return NULL; + } + + return drmp3__full_read_and_close_s16(&mp3, pConfig, pTotalFrameCount); +} + + +float* drmp3_open_memory_and_read_pcm_frames_f32(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3 mp3; + if (!drmp3_init_memory(&mp3, pData, dataSize, pConfig, pAllocationCallbacks)) { + return NULL; + } + + return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount); +} + +drmp3_int16* drmp3_open_memory_and_read_pcm_frames_s16(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3 mp3; + if (!drmp3_init_memory(&mp3, pData, dataSize, pConfig, pAllocationCallbacks)) { + return NULL; + } + + return drmp3__full_read_and_close_s16(&mp3, pConfig, pTotalFrameCount); +} + + +#ifndef DR_MP3_NO_STDIO +float* drmp3_open_file_and_read_pcm_frames_f32(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3 mp3; + if (!drmp3_init_file(&mp3, filePath, pConfig, pAllocationCallbacks)) { + return NULL; + } + + return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount); +} + +drmp3_int16* drmp3_open_file_and_read_pcm_frames_s16(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + drmp3 mp3; + if (!drmp3_init_file(&mp3, filePath, pConfig, pAllocationCallbacks)) { + return NULL; + } + + return drmp3__full_read_and_close_s16(&mp3, pConfig, pTotalFrameCount); +} +#endif + +void drmp3_free(void* p, const drmp3_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks != NULL) { + drmp3__free_from_callbacks(p, pAllocationCallbacks); + } else { + drmp3__free_default(p, NULL); + } +} + +#endif /*DR_MP3_IMPLEMENTATION*/ + +/* +DIFFERENCES BETWEEN minimp3 AND dr_mp3 +====================================== +- First, keep in mind that minimp3 (https://github.com/lieff/minimp3) is where all the real work was done. All of the + code relating to the actual decoding remains mostly unmodified, apart from some namespacing changes. +- dr_mp3 adds a pulling style API which allows you to deliver raw data via callbacks. So, rather than pushing data + to the decoder, the decoder _pulls_ data from your callbacks. +- In addition to callbacks, a decoder can be initialized from a block of memory and a file. +- The dr_mp3 pull API reads PCM frames rather than whole MP3 frames. +- dr_mp3 adds convenience APIs for opening and decoding entire files in one go. +- dr_mp3 is fully namespaced, including the implementation section, which is more suitable when compiling projects + as a single translation unit (aka unity builds). At the time of writing this, a unity build is not possible when + using minimp3 in conjunction with stb_vorbis. dr_mp3 addresses this. +*/ + +/* +REVISION HISTORY +================ +v0.5.3 - 2019-11-14 + - Fix typos in documentation. + +v0.5.2 - 2019-11-02 + - Bring up to date with minimp3. + +v0.5.1 - 2019-10-08 + - Fix a warning with GCC. + +v0.5.0 - 2019-10-07 + - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation + routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs: + - drmp3_init() + - drmp3_init_file() + - drmp3_init_memory() + - drmp3_open_and_read_pcm_frames_f32() + - drmp3_open_and_read_pcm_frames_s16() + - drmp3_open_memory_and_read_pcm_frames_f32() + - drmp3_open_memory_and_read_pcm_frames_s16() + - drmp3_open_file_and_read_pcm_frames_f32() + - drmp3_open_file_and_read_pcm_frames_s16() + - API CHANGE: Renamed the following APIs: + - drmp3_open_and_read_f32() -> drmp3_open_and_read_pcm_frames_f32() + - drmp3_open_and_read_s16() -> drmp3_open_and_read_pcm_frames_s16() + - drmp3_open_memory_and_read_f32() -> drmp3_open_memory_and_read_pcm_frames_f32() + - drmp3_open_memory_and_read_s16() -> drmp3_open_memory_and_read_pcm_frames_s16() + - drmp3_open_file_and_read_f32() -> drmp3_open_file_and_read_pcm_frames_f32() + - drmp3_open_file_and_read_s16() -> drmp3_open_file_and_read_pcm_frames_s16() + +v0.4.7 - 2019-07-28 + - Fix a compiler error. + +v0.4.6 - 2019-06-14 + - Fix a compiler error. + +v0.4.5 - 2019-06-06 + - Bring up to date with minimp3. + +v0.4.4 - 2019-05-06 + - Fixes to the VC6 build. + +v0.4.3 - 2019-05-05 + - Use the channel count and/or sample rate of the first MP3 frame instead of DR_MP3_DEFAULT_CHANNELS and + DR_MP3_DEFAULT_SAMPLE_RATE when they are set to 0. To use the old behaviour, just set the relevant property to + DR_MP3_DEFAULT_CHANNELS or DR_MP3_DEFAULT_SAMPLE_RATE. + - Add s16 reading APIs + - drmp3_read_pcm_frames_s16 + - drmp3_open_memory_and_read_pcm_frames_s16 + - drmp3_open_and_read_pcm_frames_s16 + - drmp3_open_file_and_read_pcm_frames_s16 + - Add drmp3_get_mp3_and_pcm_frame_count() to the public header section. + - Add support for C89. + - Change license to choice of public domain or MIT-0. + +v0.4.2 - 2019-02-21 + - Fix a warning. + +v0.4.1 - 2018-12-30 + - Fix a warning. + +v0.4.0 - 2018-12-16 + - API CHANGE: Rename some APIs: + - drmp3_read_f32 -> to drmp3_read_pcm_frames_f32 + - drmp3_seek_to_frame -> drmp3_seek_to_pcm_frame + - drmp3_open_and_decode_f32 -> drmp3_open_and_read_pcm_frames_f32 + - drmp3_open_and_decode_memory_f32 -> drmp3_open_memory_and_read_pcm_frames_f32 + - drmp3_open_and_decode_file_f32 -> drmp3_open_file_and_read_pcm_frames_f32 + - Add drmp3_get_pcm_frame_count(). + - Add drmp3_get_mp3_frame_count(). + - Improve seeking performance. + +v0.3.2 - 2018-09-11 + - Fix a couple of memory leaks. + - Bring up to date with minimp3. + +v0.3.1 - 2018-08-25 + - Fix C++ build. + +v0.3.0 - 2018-08-25 + - Bring up to date with minimp3. This has a minor API change: the "pcm" parameter of drmp3dec_decode_frame() has + been changed from short* to void* because it can now output both s16 and f32 samples, depending on whether or + not the DR_MP3_FLOAT_OUTPUT option is set. + +v0.2.11 - 2018-08-08 + - Fix a bug where the last part of a file is not read. + +v0.2.10 - 2018-08-07 + - Improve 64-bit detection. + +v0.2.9 - 2018-08-05 + - Fix C++ build on older versions of GCC. + - Bring up to date with minimp3. + +v0.2.8 - 2018-08-02 + - Fix compilation errors with older versions of GCC. + +v0.2.7 - 2018-07-13 + - Bring up to date with minimp3. + +v0.2.6 - 2018-07-12 + - Bring up to date with minimp3. + +v0.2.5 - 2018-06-22 + - Bring up to date with minimp3. + +v0.2.4 - 2018-05-12 + - Bring up to date with minimp3. + +v0.2.3 - 2018-04-29 + - Fix TCC build. + +v0.2.2 - 2018-04-28 + - Fix bug when opening a decoder from memory. + +v0.2.1 - 2018-04-27 + - Efficiency improvements when the decoder reaches the end of the stream. + +v0.2 - 2018-04-21 + - Bring up to date with minimp3. + - Start using major.minor.revision versioning. + +v0.1d - 2018-03-30 + - Bring up to date with minimp3. + +v0.1c - 2018-03-11 + - Fix C++ build error. + +v0.1b - 2018-03-07 + - Bring up to date with minimp3. + +v0.1a - 2018-02-28 + - Fix compilation error on GCC/Clang. + - Fix some warnings. + +v0.1 - 2018-02-xx + - Initial versioned release. +*/ + +/* +This software is available as a choice of the following licenses. Choose +whichever you prefer. + +=============================================================================== +ALTERNATIVE 1 - Public Domain (www.unlicense.org) +=============================================================================== +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. + +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to <http://unlicense.org/> + +=============================================================================== +ALTERNATIVE 2 - MIT No Attribution +=============================================================================== +Copyright 2018 David Reid + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +/* + https://github.com/lieff/minimp3 + To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. + This software is distributed without any warranty. + See <http://creativecommons.org/publicdomain/zero/1.0/>. +*/ diff --git a/include/kfr/io/dr/dr_wav.h b/include/kfr/io/dr/dr_wav.h @@ -1,117 +1,293 @@ -// clang-format off - -// WAV audio loader and writer. Public domain. See "unlicense" statement at the end of this file. -// dr_wav - v0.8.5 - 2018-09-11 -// -// David Reid - [email protected] - -// USAGE -// -// This is a single-file library. To use it, do something like the following in one .c file. -// #define DR_WAV_IMPLEMENTATION -// #include "dr_wav.h" -// -// You can then #include this file in other parts of the program as you would with any other header file. Do something -// like the following to read audio data: -// -// drwav wav; -// if (!drwav_init_file(&wav, "my_song.wav")) { -// // Error opening WAV file. -// } -// -// drwav_int32* pDecodedInterleavedSamples = malloc(wav.totalSampleCount * sizeof(drwav_int32)); -// size_t numberOfSamplesActuallyDecoded = drwav_read_s32(&wav, wav.totalSampleCount, pDecodedInterleavedSamples); -// -// ... -// -// drwav_uninit(&wav); -// -// You can also use drwav_open() to allocate and initialize the loader for you: -// -// drwav* pWav = drwav_open_file("my_song.wav"); -// if (pWav == NULL) { -// // Error opening WAV file. -// } -// -// ... -// -// drwav_close(pWav); -// -// If you just want to quickly open and read the audio data in a single operation you can do something like this: -// -// unsigned int channels; -// unsigned int sampleRate; -// drwav_uint64 totalSampleCount; -// float* pSampleData = drwav_open_and_read_file_s32("my_song.wav", &channels, &sampleRate, &totalSampleCount); -// if (pSampleData == NULL) { -// // Error opening and reading WAV file. -// } -// -// ... -// -// drwav_free(pSampleData); -// -// The examples above use versions of the API that convert the audio data to a consistent format (32-bit signed PCM, in -// this case), but you can still output the audio data in its internal format (see notes below for supported formats): -// -// size_t samplesRead = drwav_read(&wav, wav.totalSampleCount, pDecodedInterleavedSamples); -// -// You can also read the raw bytes of audio data, which could be useful if dr_wav does not have native support for -// a particular data format: -// -// size_t bytesRead = drwav_read_raw(&wav, bytesToRead, pRawDataBuffer); -// -// -// dr_wav has seamless support the Sony Wave64 format. The decoder will automatically detect it and it should Just Work -// without any manual intervention. -// -// -// dr_wav can also be used to output WAV files. This does not currently support compressed formats. To use this, look at -// drwav_open_write(), drwav_open_file_write(), etc. Use drwav_write() to write samples, or drwav_write_raw() to write -// raw data in the "data" chunk. -// -// drwav_data_format format; -// format.container = drwav_container_riff; // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64. -// format.format = DR_WAVE_FORMAT_PCM; // <-- Any of the DR_WAVE_FORMAT_* codes. -// format.channels = 2; -// format.sampleRate = 44100; -// format.bitsPerSample = 16; -// drwav* pWav = drwav_open_file_write("data/recording.wav", &format); -// -// ... -// -// drwav_uint64 samplesWritten = drwav_write(pWav, sampleCount, pSamples); -// -// -// -// OPTIONS -// #define these options before including this file. -// -// #define DR_WAV_NO_CONVERSION_API -// Disables conversion APIs such as drwav_read_f32() and drwav_s16_to_f32(). -// -// #define DR_WAV_NO_STDIO -// Disables drwav_open_file(), drwav_open_file_write(), etc. -// -// -// -// QUICK NOTES -// - Samples are always interleaved. -// - The default read function does not do any data conversion. Use drwav_read_f32() to read and convert audio data -// to IEEE 32-bit floating point samples, drwav_read_s32() to read samples as signed 32-bit PCM and drwav_read_s16() -// to read samples as signed 16-bit PCM. Tested and supported internal formats include the following: -// - Unsigned 8-bit PCM -// - Signed 12-bit PCM -// - Signed 16-bit PCM -// - Signed 24-bit PCM -// - Signed 32-bit PCM -// - IEEE 32-bit floating point -// - IEEE 64-bit floating point -// - A-law and u-law -// - Microsoft ADPCM -// - IMA ADPCM (DVI, format code 0x11) -// - dr_wav will try to read the WAV file as best it can, even if it's not strictly conformant to the WAV format. +/* +WAV audio loader and writer. Choice of public domain or MIT-0. See license statements at the end of this file. +dr_wav - v0.11.1 - 2019-10-07 + +David Reid - [email protected] +*/ + +/* +RELEASE NOTES - v0.11.0 +======================= +Version 0.11.0 has breaking API changes. + +Improved Client-Defined Memory Allocation +----------------------------------------- +The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The +existing system of DRWAV_MALLOC, DRWAV_REALLOC and DRWAV_FREE are still in place and will be used by default when no custom +allocation callbacks are specified. + +To use the new system, you pass in a pointer to a drwav_allocation_callbacks object to drwav_init() and family, like this: + + void* my_malloc(size_t sz, void* pUserData) + { + return malloc(sz); + } + void* my_realloc(void* p, size_t sz, void* pUserData) + { + return realloc(p, sz); + } + void my_free(void* p, void* pUserData) + { + free(p); + } + + ... + + drwav_allocation_callbacks allocationCallbacks; + allocationCallbacks.pUserData = &myData; + allocationCallbacks.onMalloc = my_malloc; + allocationCallbacks.onRealloc = my_realloc; + allocationCallbacks.onFree = my_free; + drwav_init_file(&wav, "my_file.wav", &allocationCallbacks); + +The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines. + +Passing in null for the allocation callbacks object will cause dr_wav to use defaults which is the same as DRWAV_MALLOC, +DRWAV_REALLOC and DRWAV_FREE and the equivalent of how it worked in previous versions. + +Every API that opens a drwav object now takes this extra parameter. These include the following: + + drwav_init() + drwav_init_ex() + drwav_init_file() + drwav_init_file_ex() + drwav_init_file_w() + drwav_init_file_w_ex() + drwav_init_memory() + drwav_init_memory_ex() + drwav_init_write() + drwav_init_write_sequential() + drwav_init_write_sequential_pcm_frames() + drwav_init_file_write() + drwav_init_file_write_sequential() + drwav_init_file_write_sequential_pcm_frames() + drwav_init_file_write_w() + drwav_init_file_write_sequential_w() + drwav_init_file_write_sequential_pcm_frames_w() + drwav_init_memory_write() + drwav_init_memory_write_sequential() + drwav_init_memory_write_sequential_pcm_frames() + drwav_open_and_read_pcm_frames_s16() + drwav_open_and_read_pcm_frames_f32() + drwav_open_and_read_pcm_frames_s32() + drwav_open_file_and_read_pcm_frames_s16() + drwav_open_file_and_read_pcm_frames_f32() + drwav_open_file_and_read_pcm_frames_s32() + drwav_open_file_and_read_pcm_frames_s16_w() + drwav_open_file_and_read_pcm_frames_f32_w() + drwav_open_file_and_read_pcm_frames_s32_w() + drwav_open_memory_and_read_pcm_frames_s16() + drwav_open_memory_and_read_pcm_frames_f32() + drwav_open_memory_and_read_pcm_frames_s32() + +Endian Improvements +------------------- +Previously, the following APIs returned little-endian audio data. These now return native-endian data. This improves compatibility +on big-endian architectures. + + drwav_read_pcm_frames() + drwav_read_pcm_frames_s16() + drwav_read_pcm_frames_s32() + drwav_read_pcm_frames_f32() + drwav_open_and_read_pcm_frames_s16() + drwav_open_and_read_pcm_frames_s32() + drwav_open_and_read_pcm_frames_f32() + drwav_open_file_and_read_pcm_frames_s16() + drwav_open_file_and_read_pcm_frames_s32() + drwav_open_file_and_read_pcm_frames_f32() + drwav_open_file_and_read_pcm_frames_s16_w() + drwav_open_file_and_read_pcm_frames_s32_w() + drwav_open_file_and_read_pcm_frames_f32_w() + drwav_open_memory_and_read_pcm_frames_s16() + drwav_open_memory_and_read_pcm_frames_s32() + drwav_open_memory_and_read_pcm_frames_f32() + +APIs have been added to give you explicit control over whether or not audio data is read or written in big- or little-endian byte +order: + + drwav_read_pcm_frames_le() + drwav_read_pcm_frames_be() + drwav_read_pcm_frames_s16le() + drwav_read_pcm_frames_s16be() + drwav_read_pcm_frames_f32le() + drwav_read_pcm_frames_f32be() + drwav_read_pcm_frames_s32le() + drwav_read_pcm_frames_s32be() + drwav_write_pcm_frames_le() + drwav_write_pcm_frames_be() + +Removed APIs +------------ +The following APIs were deprecated in version 0.10.0 and have now been removed: + + drwav_open() + drwav_open_ex() + drwav_open_write() + drwav_open_write_sequential() + drwav_open_file() + drwav_open_file_ex() + drwav_open_file_write() + drwav_open_file_write_sequential() + drwav_open_memory() + drwav_open_memory_ex() + drwav_open_memory_write() + drwav_open_memory_write_sequential() + drwav_close() + + + +RELEASE NOTES - v0.10.0 +======================= +Version 0.10.0 has breaking API changes. There are no significant bug fixes in this release, so if you are affected you do +not need to upgrade. + +Removed APIs +------------ +The following APIs were deprecated in version 0.9.0 and have been completely removed in version 0.10.0: + + drwav_read() + drwav_read_s16() + drwav_read_f32() + drwav_read_s32() + drwav_seek_to_sample() + drwav_write() + drwav_open_and_read_s16() + drwav_open_and_read_f32() + drwav_open_and_read_s32() + drwav_open_file_and_read_s16() + drwav_open_file_and_read_f32() + drwav_open_file_and_read_s32() + drwav_open_memory_and_read_s16() + drwav_open_memory_and_read_f32() + drwav_open_memory_and_read_s32() + drwav::totalSampleCount + +See release notes for version 0.9.0 at the bottom of this file for replacement APIs. + +Deprecated APIs +--------------- +The following APIs have been deprecated. There is a confusing and completely arbitrary difference between drwav_init*() and +drwav_open*(), where drwav_init*() initializes a pre-allocated drwav object, whereas drwav_open*() will first allocated a +drwav object on the heap and then initialize it. drwav_open*() has been deprecated which means you must now use a pre- +allocated drwav object with drwav_init*(). If you need the previous functionality, you can just do a malloc() followed by +a called to one of the drwav_init*() APIs. + + drwav_open() + drwav_open_ex() + drwav_open_write() + drwav_open_write_sequential() + drwav_open_file() + drwav_open_file_ex() + drwav_open_file_write() + drwav_open_file_write_sequential() + drwav_open_memory() + drwav_open_memory_ex() + drwav_open_memory_write() + drwav_open_memory_write_sequential() + drwav_close() + +These APIs will be removed completely in a future version. The rationale for this change is to remove confusion between the +two different ways to initialize a drwav object. +*/ + +/* +USAGE +===== +This is a single-file library. To use it, do something like the following in one .c file. + #define DR_WAV_IMPLEMENTATION + #include "dr_wav.h" + +You can then #include this file in other parts of the program as you would with any other header file. Do something +like the following to read audio data: + + drwav wav; + if (!drwav_init_file(&wav, "my_song.wav")) { + // Error opening WAV file. + } + + drwav_int32* pDecodedInterleavedPCMFrames = malloc(wav.totalPCMFrameCount * wav.channels * sizeof(drwav_int32)); + size_t numberOfSamplesActuallyDecoded = drwav_read_pcm_frames_s32(&wav, wav.totalPCMFrameCount, pDecodedInterleavedPCMFrames); + + ... + + drwav_uninit(&wav); + +If you just want to quickly open and read the audio data in a single operation you can do something like this: + + unsigned int channels; + unsigned int sampleRate; + drwav_uint64 totalPCMFrameCount; + float* pSampleData = drwav_open_file_and_read_pcm_frames_f32("my_song.wav", &channels, &sampleRate, &totalPCMFrameCount); + if (pSampleData == NULL) { + // Error opening and reading WAV file. + } + + ... + + drwav_free(pSampleData); + +The examples above use versions of the API that convert the audio data to a consistent format (32-bit signed PCM, in +this case), but you can still output the audio data in its internal format (see notes below for supported formats): + + size_t framesRead = drwav_read_pcm_frames(&wav, wav.totalPCMFrameCount, pDecodedInterleavedPCMFrames); + +You can also read the raw bytes of audio data, which could be useful if dr_wav does not have native support for +a particular data format: + size_t bytesRead = drwav_read_raw(&wav, bytesToRead, pRawDataBuffer); + + +dr_wav can also be used to output WAV files. This does not currently support compressed formats. To use this, look at +drwav_init_write(), drwav_init_file_write(), etc. Use drwav_write_pcm_frames() to write samples, or drwav_write_raw() +to write raw data in the "data" chunk. + + drwav_data_format format; + format.container = drwav_container_riff; // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64. + format.format = DR_WAVE_FORMAT_PCM; // <-- Any of the DR_WAVE_FORMAT_* codes. + format.channels = 2; + format.sampleRate = 44100; + format.bitsPerSample = 16; + drwav_init_file_write(&wav, "data/recording.wav", &format); + + ... + + drwav_uint64 framesWritten = drwav_write_pcm_frames(pWav, frameCount, pSamples); + + +dr_wav has seamless support the Sony Wave64 format. The decoder will automatically detect it and it should Just Work +without any manual intervention. + + +OPTIONS +======= +#define these options before including this file. + +#define DR_WAV_NO_CONVERSION_API + Disables conversion APIs such as drwav_read_pcm_frames_f32() and drwav_s16_to_f32(). + +#define DR_WAV_NO_STDIO + Disables APIs that initialize a decoder from a file such as drwav_init_file(), drwav_init_file_write(), etc. + + + +QUICK NOTES +=========== +- Samples are always interleaved. +- The default read function does not do any data conversion. Use drwav_read_pcm_frames_f32(), drwav_read_pcm_frames_s32() + and drwav_read_pcm_frames_s16() to read and convert audio data to 32-bit floating point, signed 32-bit integer and + signed 16-bit integer samples respectively. Tested and supported internal formats include the following: + - Unsigned 8-bit PCM + - Signed 12-bit PCM + - Signed 16-bit PCM + - Signed 24-bit PCM + - Signed 32-bit PCM + - IEEE 32-bit floating point + - IEEE 64-bit floating point + - A-law and u-law + - Microsoft ADPCM + - IMA ADPCM (DVI, format code 0x11) +- dr_wav will try to read the WAV file as best it can, even if it's not strictly conformant to the WAV format. +*/ #ifndef dr_wav_h #define dr_wav_h @@ -147,7 +323,15 @@ typedef drwav_uint32 drwav_bool32; extern "C" { #endif -// Common data formats. +typedef drwav_int32 drwav_result; +#define DRWAV_SUCCESS 0 +#define DRWAV_ERROR -1 +#define DRWAV_INVALID_ARGS -2 +#define DRWAV_INVALID_OPERATION -3 +#define DRWAV_INVALID_FILE -100 +#define DRWAV_EOF -101 + +/* Common data formats. */ #define DR_WAVE_FORMAT_PCM 0x1 #define DR_WAVE_FORMAT_ADPCM 0x2 #define DR_WAVE_FORMAT_IEEE_FLOAT 0x3 @@ -156,6 +340,14 @@ extern "C" { #define DR_WAVE_FORMAT_DVI_ADPCM 0x11 #define DR_WAVE_FORMAT_EXTENSIBLE 0xFFFE +/* Constants. */ +#ifndef DRWAV_MAX_SMPL_LOOPS +#define DRWAV_MAX_SMPL_LOOPS 1 +#endif + +/* Flags to pass into drwav_init_ex(), etc. */ +#define DRWAV_SEQUENTIAL 0x00000001 + typedef enum { drwav_seek_origin_start, @@ -168,42 +360,92 @@ typedef enum drwav_container_w64 } drwav_container; -// Callback for when data is read. Return value is the number of bytes actually read. -// -// pUserData [in] The user data that was passed to drwav_init(), drwav_open() and family. -// pBufferOut [out] The output buffer. -// bytesToRead [in] The number of bytes to read. -// -// Returns the number of bytes actually read. -// -// A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until -// either the entire bytesToRead is filled or you have reached the end of the stream. +typedef struct +{ + union + { + drwav_uint8 fourcc[4]; + drwav_uint8 guid[16]; + } id; + + /* The size in bytes of the chunk. */ + drwav_uint64 sizeInBytes; + + /* + RIFF = 2 byte alignment. + W64 = 8 byte alignment. + */ + unsigned int paddingSize; +} drwav_chunk_header; + +/* +Callback for when data is read. Return value is the number of bytes actually read. + +pUserData [in] The user data that was passed to drwav_init() and family. +pBufferOut [out] The output buffer. +bytesToRead [in] The number of bytes to read. + +Returns the number of bytes actually read. + +A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until +either the entire bytesToRead is filled or you have reached the end of the stream. +*/ typedef size_t (* drwav_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead); -// Callback for when data is written. Returns value is the number of bytes actually written. -// -// pUserData [in] The user data that was passed to drwav_init_write(), drwav_open_write() and family. -// pData [out] A pointer to the data to write. -// bytesToWrite [in] The number of bytes to write. -// -// Returns the number of bytes actually written. -// -// If the return value differs from bytesToWrite, it indicates an error. +/* +Callback for when data is written. Returns value is the number of bytes actually written. + +pUserData [in] The user data that was passed to drwav_init_write() and family. +pData [out] A pointer to the data to write. +bytesToWrite [in] The number of bytes to write. + +Returns the number of bytes actually written. + +If the return value differs from bytesToWrite, it indicates an error. +*/ typedef size_t (* drwav_write_proc)(void* pUserData, const void* pData, size_t bytesToWrite); -// Callback for when data needs to be seeked. -// -// pUserData [in] The user data that was passed to drwav_init(), drwav_open() and family. -// offset [in] The number of bytes to move, relative to the origin. Will never be negative. -// origin [in] The origin of the seek - the current position or the start of the stream. -// -// Returns whether or not the seek was successful. -// -// Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which -// will be either drwav_seek_origin_start or drwav_seek_origin_current. +/* +Callback for when data needs to be seeked. + +pUserData [in] The user data that was passed to drwav_init() and family. +offset [in] The number of bytes to move, relative to the origin. Will never be negative. +origin [in] The origin of the seek - the current position or the start of the stream. + +Returns whether or not the seek was successful. + +Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which +will be either drwav_seek_origin_start or drwav_seek_origin_current. +*/ typedef drwav_bool32 (* drwav_seek_proc)(void* pUserData, int offset, drwav_seek_origin origin); -// Structure for internal use. Only used for loaders opened with drwav_open_memory(). +/* +Callback for when drwav_init_ex() finds a chunk. + +pChunkUserData [in] The user data that was passed to the pChunkUserData parameter of drwav_init_ex() and family. +onRead [in] A pointer to the function to call when reading. +onSeek [in] A pointer to the function to call when seeking. +pReadSeekUserData [in] The user data that was passed to the pReadSeekUserData parameter of drwav_init_ex() and family. +pChunkHeader [in] A pointer to an object containing basic header information about the chunk. Use this to identify the chunk. + +Returns the number of bytes read + seeked. + +To read data from the chunk, call onRead(), passing in pReadSeekUserData as the first parameter. Do the same +for seeking with onSeek(). The return value must be the total number of bytes you have read _plus_ seeked. + +You must not attempt to read beyond the boundary of the chunk. +*/ +typedef drwav_uint64 (* drwav_chunk_proc)(void* pChunkUserData, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_chunk_header* pChunkHeader); + +typedef struct +{ + void* pUserData; + void* (* onMalloc)(size_t sz, void* pUserData); + void* (* onRealloc)(void* p, size_t sz, void* pUserData); + void (* onFree)(void* p, void* pUserData); +} drwav_allocation_callbacks; + +/* Structure for internal use. Only used for loaders opened with drwav_init_memory(). */ typedef struct { const drwav_uint8* data; @@ -211,7 +453,7 @@ typedef struct size_t currentReadPos; } drwav__memory_stream; -// Structure for internal use. Only used for writers opened with drwav_open_memory_write(). +/* Structure for internal use. Only used for writers opened with drwav_init_memory_write(). */ typedef struct { void** ppData; @@ -223,8 +465,8 @@ typedef struct typedef struct { - drwav_container container; // RIFF, W64. - drwav_uint32 format; // DR_WAVE_FORMAT_* + drwav_container container; /* RIFF, W64. */ + drwav_uint32 format; /* DR_WAVE_FORMAT_* */ drwav_uint32 channels; drwav_uint32 sampleRate; drwav_uint32 bitsPerSample; @@ -232,473 +474,516 @@ typedef struct typedef struct { - // The format tag exactly as specified in the wave file's "fmt" chunk. This can be used by applications - // that require support for data formats not natively supported by dr_wav. + /* + The format tag exactly as specified in the wave file's "fmt" chunk. This can be used by applications + that require support for data formats not natively supported by dr_wav. + */ drwav_uint16 formatTag; - // The number of channels making up the audio data. When this is set to 1 it is mono, 2 is stereo, etc. + /* The number of channels making up the audio data. When this is set to 1 it is mono, 2 is stereo, etc. */ drwav_uint16 channels; - // The sample rate. Usually set to something like 44100. + /* The sample rate. Usually set to something like 44100. */ drwav_uint32 sampleRate; - // Average bytes per second. You probably don't need this, but it's left here for informational purposes. + /* Average bytes per second. You probably don't need this, but it's left here for informational purposes. */ drwav_uint32 avgBytesPerSec; - // Block align. This is equal to the number of channels * bytes per sample. + /* Block align. This is equal to the number of channels * bytes per sample. */ drwav_uint16 blockAlign; - // Bits per sample. + /* Bits per sample. */ drwav_uint16 bitsPerSample; - // The size of the extended data. Only used internally for validation, but left here for informational purposes. + /* The size of the extended data. Only used internally for validation, but left here for informational purposes. */ drwav_uint16 extendedSize; - // The number of valid bits per sample. When <formatTag> is equal to WAVE_FORMAT_EXTENSIBLE, <bitsPerSample> - // is always rounded up to the nearest multiple of 8. This variable contains information about exactly how - // many bits a valid per sample. Mainly used for informational purposes. + /* + The number of valid bits per sample. When <formatTag> is equal to WAVE_FORMAT_EXTENSIBLE, <bitsPerSample> + is always rounded up to the nearest multiple of 8. This variable contains information about exactly how + many bits a valid per sample. Mainly used for informational purposes. + */ drwav_uint16 validBitsPerSample; - // The channel mask. Not used at the moment. + /* The channel mask. Not used at the moment. */ drwav_uint32 channelMask; - // The sub-format, exactly as specified by the wave file. + /* The sub-format, exactly as specified by the wave file. */ drwav_uint8 subFormat[16]; } drwav_fmt; typedef struct { - // A pointer to the function to call when more data is needed. + drwav_uint32 cuePointId; + drwav_uint32 type; + drwav_uint32 start; + drwav_uint32 end; + drwav_uint32 fraction; + drwav_uint32 playCount; +} drwav_smpl_loop; + + typedef struct +{ + drwav_uint32 manufacturer; + drwav_uint32 product; + drwav_uint32 samplePeriod; + drwav_uint32 midiUnityNotes; + drwav_uint32 midiPitchFraction; + drwav_uint32 smpteFormat; + drwav_uint32 smpteOffset; + drwav_uint32 numSampleLoops; + drwav_uint32 samplerData; + drwav_smpl_loop loops[DRWAV_MAX_SMPL_LOOPS]; +} drwav_smpl; + +typedef struct +{ + /* A pointer to the function to call when more data is needed. */ drwav_read_proc onRead; - // A pointer to the function to call when data needs to be written. Only used when the drwav object is opened in write mode. + /* A pointer to the function to call when data needs to be written. Only used when the drwav object is opened in write mode. */ drwav_write_proc onWrite; - // A pointer to the function to call when the wav file needs to be seeked. + /* A pointer to the function to call when the wav file needs to be seeked. */ drwav_seek_proc onSeek; - // The user data to pass to callbacks. + /* The user data to pass to callbacks. */ void* pUserData; + /* Allocation callbacks. */ + drwav_allocation_callbacks allocationCallbacks; + - // Whether or not the WAV file is formatted as a standard RIFF file or W64. + /* Whether or not the WAV file is formatted as a standard RIFF file or W64. */ drwav_container container; - // Structure containing format information exactly as specified by the wav file. + /* Structure containing format information exactly as specified by the wav file. */ drwav_fmt fmt; - // The sample rate. Will be set to something like 44100. + /* The sample rate. Will be set to something like 44100. */ drwav_uint32 sampleRate; - // The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. + /* The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. */ drwav_uint16 channels; - // The bits per sample. Will be set to something like 16, 24, etc. + /* The bits per sample. Will be set to something like 16, 24, etc. */ drwav_uint16 bitsPerSample; - // The number of bytes per sample. - drwav_uint16 bytesPerSample; - - // Equal to fmt.formatTag, or the value specified by fmt.subFormat if fmt.formatTag is equal to 65534 (WAVE_FORMAT_EXTENSIBLE). + /* Equal to fmt.formatTag, or the value specified by fmt.subFormat if fmt.formatTag is equal to 65534 (WAVE_FORMAT_EXTENSIBLE). */ drwav_uint16 translatedFormatTag; - // The total number of samples making up the audio data. Use <totalSampleCount> * <bytesPerSample> to calculate - // the required size of a buffer to hold the entire audio data. - drwav_uint64 totalSampleCount; + /* The total number of PCM frames making up the audio data. */ + drwav_uint64 totalPCMFrameCount; - // The size in bytes of the data chunk. + /* The size in bytes of the data chunk. */ drwav_uint64 dataChunkDataSize; - - // The position in the stream of the first byte of the data chunk. This is used for seeking. + + /* The position in the stream of the first byte of the data chunk. This is used for seeking. */ drwav_uint64 dataChunkDataPos; - // The number of bytes remaining in the data chunk. + /* The number of bytes remaining in the data chunk. */ drwav_uint64 bytesRemaining; - // Only used in sequential write mode. Keeps track of the desired size of the "data" chunk at the point of initialization time. Always - // set to 0 for non-sequential writes and when the drwav object is opened in read mode. Used for validation. + /* + Only used in sequential write mode. Keeps track of the desired size of the "data" chunk at the point of initialization time. Always + set to 0 for non-sequential writes and when the drwav object is opened in read mode. Used for validation. + */ drwav_uint64 dataChunkDataSizeTargetWrite; - // Keeps track of whether or not the wav writer was initialized in sequential mode. + /* Keeps track of whether or not the wav writer was initialized in sequential mode. */ drwav_bool32 isSequentialWrite; - // A hack to avoid a DRWAV_MALLOC() when opening a decoder with drwav_open_memory(). + /* smpl chunk. */ + drwav_smpl smpl; + + + /* A hack to avoid a DRWAV_MALLOC() when opening a decoder with drwav_init_memory(). */ drwav__memory_stream memoryStream; drwav__memory_stream_write memoryStreamWrite; - // Generic data for compressed formats. This data is shared across all block-compressed formats. + /* Generic data for compressed formats. This data is shared across all block-compressed formats. */ struct { - drwav_uint64 iCurrentSample; // The index of the next sample that will be read by drwav_read_*(). This is used with "totalSampleCount" to ensure we don't read excess samples at the end of the last block. + drwav_uint64 iCurrentPCMFrame; /* The index of the next PCM frame that will be read by drwav_read_*(). This is used with "totalPCMFrameCount" to ensure we don't read excess samples at the end of the last block. */ } compressed; - - // Microsoft ADPCM specific data. + + /* Microsoft ADPCM specific data. */ struct { drwav_uint32 bytesRemainingInBlock; drwav_uint16 predictor[2]; drwav_int32 delta[2]; - drwav_int32 cachedSamples[4]; // Samples are stored in this cache during decoding. - drwav_uint32 cachedSampleCount; - drwav_int32 prevSamples[2][2]; // The previous 2 samples for each channel (2 channels at most). + drwav_int32 cachedFrames[4]; /* Samples are stored in this cache during decoding. */ + drwav_uint32 cachedFrameCount; + drwav_int32 prevFrames[2][2]; /* The previous 2 samples for each channel (2 channels at most). */ } msadpcm; - // IMA ADPCM specific data. + /* IMA ADPCM specific data. */ struct { drwav_uint32 bytesRemainingInBlock; drwav_int32 predictor[2]; drwav_int32 stepIndex[2]; - drwav_int32 cachedSamples[16]; // Samples are stored in this cache during decoding. - drwav_uint32 cachedSampleCount; + drwav_int32 cachedFrames[16]; /* Samples are stored in this cache during decoding. */ + drwav_uint32 cachedFrameCount; } ima; } drwav; -// Initializes a pre-allocated drwav object. -// -// onRead [in] The function to call when data needs to be read from the client. -// onSeek [in] The function to call when the read position of the client data needs to move. -// pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek. -// -// Returns true if successful; false otherwise. -// -// Close the loader with drwav_uninit(). -// -// This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory() -// to open the stream from a file or from a block of memory respectively. -// -// If you want dr_wav to manage the memory allocation for you, consider using drwav_open() instead. This will allocate -// a drwav object on the heap and return a pointer to it. -// -// See also: drwav_init_file(), drwav_init_memory(), drwav_uninit() -drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData); - -// Initializes a pre-allocated drwav object for writing. -// -// onWrite [in] The function to call when data needs to be written. -// onSeek [in] The function to call when the write position needs to move. -// pUserData [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek. -// -// Returns true if successful; false otherwise. -// -// Close the writer with drwav_uninit(). -// -// This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory() -// to open the stream from a file or from a block of memory respectively. -// -// If the total sample count is known, you can use drwav_init_write_sequential(). This avoids the need for dr_wav to perform -// a post-processing step for storing the total sample count and the size of the data chunk which requires a backwards seek. -// -// If you want dr_wav to manage the memory allocation for you, consider using drwav_open() instead. This will allocate -// a drwav object on the heap and return a pointer to it. -// -// See also: drwav_init_file_write(), drwav_init_memory_write(), drwav_uninit() -drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData); -drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData); - -// Uninitializes the given drwav object. -// -// Use this only for objects initialized with drwav_init(). -void drwav_uninit(drwav* pWav); - - -// Opens a wav file using the given callbacks. -// -// onRead [in] The function to call when data needs to be read from the client. -// onSeek [in] The function to call when the read position of the client data needs to move. -// pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek. -// -// Returns null on error. -// -// Close the loader with drwav_close(). -// -// You can also use drwav_open_file() and drwav_open_memory() to open the stream from a file or from a block of -// memory respectively. -// -// This is different from drwav_init() in that it will allocate the drwav object for you via DRWAV_MALLOC() before -// initializing it. -// -// See also: drwav_open_file(), drwav_open_memory(), drwav_close() -drwav* drwav_open(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData); - -// Opens a wav file for writing using the given callbacks. -// -// onWrite [in] The function to call when data needs to be written. -// onSeek [in] The function to call when the write position needs to move. -// pUserData [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek. -// -// Returns null on error. -// -// Close the loader with drwav_close(). -// -// You can also use drwav_open_file_write() and drwav_open_memory_write() to open the stream from a file or from a block -// of memory respectively. -// -// This is different from drwav_init_write() in that it will allocate the drwav object for you via DRWAV_MALLOC() before -// initializing it. -// -// See also: drwav_open_file_write(), drwav_open_memory_write(), drwav_close() -drwav* drwav_open_write(const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData); -drwav* drwav_open_write_sequential(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData); - -// Uninitializes and deletes the the given drwav object. -// -// Use this only for objects created with drwav_open(). -void drwav_close(drwav* pWav); - - -// Reads raw audio data. -// -// This is the lowest level function for reading audio data. It simply reads the given number of -// bytes of the raw internal sample data. -// -// Consider using drwav_read_s16(), drwav_read_s32() or drwav_read_f32() for reading sample data in -// a consistent format. -// -// Returns the number of bytes actually read. +/* +Initializes a pre-allocated drwav object for reading. + +pWav [out] A pointer to the drwav object being initialized. +onRead [in] The function to call when data needs to be read from the client. +onSeek [in] The function to call when the read position of the client data needs to move. +onChunk [in, optional] The function to call when a chunk is enumerated at initialized time. +pUserData, pReadSeekUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek. +pChunkUserData [in, optional] A pointer to application defined data that will be passed to onChunk. +flags [in, optional] A set of flags for controlling how things are loaded. + +Returns true if successful; false otherwise. + +Close the loader with drwav_uninit(). + +This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory() +to open the stream from a file or from a block of memory respectively. + +Possible values for flags: + DRWAV_SEQUENTIAL: Never perform a backwards seek while loading. This disables the chunk callback and will cause this function + to return as soon as the data chunk is found. Any chunks after the data chunk will be ignored. + +drwav_init() is equivalent to "drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0);". + +The onChunk callback is not called for the WAVE or FMT chunks. The contents of the FMT chunk can be read from pWav->fmt +after the function returns. + +See also: drwav_init_file(), drwav_init_memory(), drwav_uninit() +*/ +drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks); + +/* +Initializes a pre-allocated drwav object for writing. + +onWrite [in] The function to call when data needs to be written. +onSeek [in] The function to call when the write position needs to move. +pUserData [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek. + +Returns true if successful; false otherwise. + +Close the writer with drwav_uninit(). + +This is the lowest level function for initializing a WAV file. You can also use drwav_init_file_write() and drwav_init_memory_write() +to open the stream from a file or from a block of memory respectively. + +If the total sample count is known, you can use drwav_init_write_sequential(). This avoids the need for dr_wav to perform +a post-processing step for storing the total sample count and the size of the data chunk which requires a backwards seek. + +See also: drwav_init_file_write(), drwav_init_memory_write(), drwav_uninit() +*/ +drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks); + +/* +Utility function to determine the target size of the entire data to be written (including all headers and chunks). + +Returns the target size in bytes. + +Useful if the application needs to know the size to allocate. + +Only writing to the RIFF chunk and one data chunk is currently supported. + +See also: drwav_init_write(), drwav_init_file_write(), drwav_init_memory_write() +*/ +drwav_uint64 drwav_target_write_size_bytes(drwav_data_format const *format, drwav_uint64 totalSampleCount); + +/* +Uninitializes the given drwav object. + +Use this only for objects initialized with drwav_init*() functions (drwav_init(), drwav_init_ex(), drwav_init_write(), drwav_init_write_sequential()). +*/ +drwav_result drwav_uninit(drwav* pWav); + + +/* +Reads raw audio data. + +This is the lowest level function for reading audio data. It simply reads the given number of +bytes of the raw internal sample data. + +Consider using drwav_read_pcm_frames_s16(), drwav_read_pcm_frames_s32() or drwav_read_pcm_frames_f32() for +reading sample data in a consistent format. + +Returns the number of bytes actually read. +*/ size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut); -// Reads a chunk of audio data in the native internal format. -// -// This is typically the most efficient way to retrieve audio data, but it does not do any format -// conversions which means you'll need to convert the data manually if required. -// -// If the return value is less than <samplesToRead> it means the end of the file has been reached or -// you have requested more samples than can possibly fit in the output buffer. -// -// This function will only work when sample data is of a fixed size and uncompressed. If you are -// using a compressed format consider using drwav_read_raw() or drwav_read_s16/s32/f32/etc(). -drwav_uint64 drwav_read(drwav* pWav, drwav_uint64 samplesToRead, void* pBufferOut); - -// Seeks to the given sample. -// -// Returns true if successful; false otherwise. -drwav_bool32 drwav_seek_to_sample(drwav* pWav, drwav_uint64 sample); - - -// Writes raw audio data. -// -// Returns the number of bytes actually written. If this differs from bytesToWrite, it indicates an error. +/* +Reads up to the specified number of PCM frames from the WAV file. + +The output data will be in the file's internal format, converted to native-endian byte order. Use +drwav_read_pcm_frames_s16/f32/s32() to read data in a specific format. + +If the return value is less than <framesToRead> it means the end of the file has been reached or +you have requested more PCM frames than can possibly fit in the output buffer. + +This function will only work when sample data is of a fixed size and uncompressed. If you are +using a compressed format consider using drwav_read_raw() or drwav_read_pcm_frames_s16/s32/f32(). +*/ +drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut); +drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut); +drwav_uint64 drwav_read_pcm_frames_be(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut); + +/* +Seeks to the given PCM frame. + +Returns true if successful; false otherwise. +*/ +drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex); + + +/* +Writes raw audio data. + +Returns the number of bytes actually written. If this differs from bytesToWrite, it indicates an error. +*/ size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData); -// Writes audio data based on sample counts. -// -// Returns the number of samples written. -drwav_uint64 drwav_write(drwav* pWav, drwav_uint64 samplesToWrite, const void* pData); +/* +Writes PCM frames. + +Returns the number of PCM frames written. +Input samples need to be in native-endian byte order. On big-endian architectures the input data will be converted to +little-endian. Use drwav_write_raw() to write raw audio data without performing any conversion. +*/ +drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData); +drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint64 framesToWrite, const void* pData); +drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 framesToWrite, const void* pData); -//// Conversion Utilities //// +/* Conversion Utilities */ #ifndef DR_WAV_NO_CONVERSION_API -// Reads a chunk of audio data and converts it to signed 16-bit PCM samples. -// -// Returns the number of samples actually read. -// -// If the return value is less than <samplesToRead> it means the end of the file has been reached. -drwav_uint64 drwav_read_s16(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut); +/* +Reads a chunk of audio data and converts it to signed 16-bit PCM samples. + +Returns the number of PCM frames actually read. + +If the return value is less than <framesToRead> it means the end of the file has been reached. +*/ +drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut); +drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut); +drwav_uint64 drwav_read_pcm_frames_s16be(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut); -// Low-level function for converting unsigned 8-bit PCM samples to signed 16-bit PCM samples. +/* Low-level function for converting unsigned 8-bit PCM samples to signed 16-bit PCM samples. */ void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount); -// Low-level function for converting signed 24-bit PCM samples to signed 16-bit PCM samples. +/* Low-level function for converting signed 24-bit PCM samples to signed 16-bit PCM samples. */ void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount); -// Low-level function for converting signed 32-bit PCM samples to signed 16-bit PCM samples. +/* Low-level function for converting signed 32-bit PCM samples to signed 16-bit PCM samples. */ void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount); -// Low-level function for converting IEEE 32-bit floating point samples to signed 16-bit PCM samples. +/* Low-level function for converting IEEE 32-bit floating point samples to signed 16-bit PCM samples. */ void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, size_t sampleCount); -// Low-level function for converting IEEE 64-bit floating point samples to signed 16-bit PCM samples. +/* Low-level function for converting IEEE 64-bit floating point samples to signed 16-bit PCM samples. */ void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount); -// Low-level function for converting A-law samples to signed 16-bit PCM samples. +/* Low-level function for converting A-law samples to signed 16-bit PCM samples. */ void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount); -// Low-level function for converting u-law samples to signed 16-bit PCM samples. +/* Low-level function for converting u-law samples to signed 16-bit PCM samples. */ void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount); -// Reads a chunk of audio data and converts it to IEEE 32-bit floating point samples. -// -// Returns the number of samples actually read. -// -// If the return value is less than <samplesToRead> it means the end of the file has been reached. -drwav_uint64 drwav_read_f32(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut); +/* +Reads a chunk of audio data and converts it to IEEE 32-bit floating point samples. + +Returns the number of PCM frames actually read. + +If the return value is less than <framesToRead> it means the end of the file has been reached. +*/ +drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut); +drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut); +drwav_uint64 drwav_read_pcm_frames_f32be(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut); -// Low-level function for converting unsigned 8-bit PCM samples to IEEE 32-bit floating point samples. +/* Low-level function for converting unsigned 8-bit PCM samples to IEEE 32-bit floating point samples. */ void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount); -// Low-level function for converting signed 16-bit PCM samples to IEEE 32-bit floating point samples. +/* Low-level function for converting signed 16-bit PCM samples to IEEE 32-bit floating point samples. */ void drwav_s16_to_f32(float* pOut, const drwav_int16* pIn, size_t sampleCount); -// Low-level function for converting signed 24-bit PCM samples to IEEE 32-bit floating point samples. +/* Low-level function for converting signed 24-bit PCM samples to IEEE 32-bit floating point samples. */ void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount); -// Low-level function for converting signed 32-bit PCM samples to IEEE 32-bit floating point samples. +/* Low-level function for converting signed 32-bit PCM samples to IEEE 32-bit floating point samples. */ void drwav_s32_to_f32(float* pOut, const drwav_int32* pIn, size_t sampleCount); -// Low-level function for converting IEEE 64-bit floating point samples to IEEE 32-bit floating point samples. +/* Low-level function for converting IEEE 64-bit floating point samples to IEEE 32-bit floating point samples. */ void drwav_f64_to_f32(float* pOut, const double* pIn, size_t sampleCount); -// Low-level function for converting A-law samples to IEEE 32-bit floating point samples. +/* Low-level function for converting A-law samples to IEEE 32-bit floating point samples. */ void drwav_alaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount); -// Low-level function for converting u-law samples to IEEE 32-bit floating point samples. +/* Low-level function for converting u-law samples to IEEE 32-bit floating point samples. */ void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount); -// Reads a chunk of audio data and converts it to signed 32-bit PCM samples. -// -// Returns the number of samples actually read. -// -// If the return value is less than <samplesToRead> it means the end of the file has been reached. -drwav_uint64 drwav_read_s32(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut); +/* +Reads a chunk of audio data and converts it to signed 32-bit PCM samples. + +Returns the number of PCM frames actually read. + +If the return value is less than <framesToRead> it means the end of the file has been reached. +*/ +drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut); +drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut); +drwav_uint64 drwav_read_pcm_frames_s32be(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut); -// Low-level function for converting unsigned 8-bit PCM samples to signed 32-bit PCM samples. +/* Low-level function for converting unsigned 8-bit PCM samples to signed 32-bit PCM samples. */ void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount); -// Low-level function for converting signed 16-bit PCM samples to signed 32-bit PCM samples. +/* Low-level function for converting signed 16-bit PCM samples to signed 32-bit PCM samples. */ void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount); -// Low-level function for converting signed 24-bit PCM samples to signed 32-bit PCM samples. +/* Low-level function for converting signed 24-bit PCM samples to signed 32-bit PCM samples. */ void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount); -// Low-level function for converting IEEE 32-bit floating point samples to signed 32-bit PCM samples. +/* Low-level function for converting IEEE 32-bit floating point samples to signed 32-bit PCM samples. */ void drwav_f32_to_s32(drwav_int32* pOut, const float* pIn, size_t sampleCount); -// Low-level function for converting IEEE 64-bit floating point samples to signed 32-bit PCM samples. +/* Low-level function for converting IEEE 64-bit floating point samples to signed 32-bit PCM samples. */ void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount); -// Low-level function for converting A-law samples to signed 32-bit PCM samples. +/* Low-level function for converting A-law samples to signed 32-bit PCM samples. */ void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount); -// Low-level function for converting u-law samples to signed 32-bit PCM samples. +/* Low-level function for converting u-law samples to signed 32-bit PCM samples. */ void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount); -#endif //DR_WAV_NO_CONVERSION_API +#endif /* DR_WAV_NO_CONVERSION_API */ -//// High-Level Convenience Helpers //// +/* High-Level Convenience Helpers */ #ifndef DR_WAV_NO_STDIO +/* +Helper for initializing a wave file for reading using stdio. + +This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav +objects because the operating system may restrict the number of file handles an application can have open at +any given time. +*/ +drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* filename, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks); + +/* +Helper for initializing a wave file for writing using stdio. + +This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav +objects because the operating system may restrict the number of file handles an application can have open at +any given time. +*/ +drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_bool32 drwav_init_file_write_sequential_pcm_frames(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_bool32 drwav_init_file_write_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_bool32 drwav_init_file_write_sequential_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_bool32 drwav_init_file_write_sequential_pcm_frames_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks); +#endif /* DR_WAV_NO_STDIO */ + +/* +Helper for initializing a loader from a pre-allocated memory buffer. + +This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for +the lifetime of the drwav object. + +The buffer should contain the contents of the entire wave file, not just the sample data. +*/ +drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks); + +/* +Helper for initializing a writer which outputs data to a memory buffer. + +dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free(). -// Helper for initializing a wave file using stdio. -// -// This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav -// objects because the operating system may restrict the number of file handles an application can have open at -// any given time. -drwav_bool32 drwav_init_file(drwav* pWav, const char* filename); - -// Helper for initializing a wave file for writing using stdio. -// -// This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav -// objects because the operating system may restrict the number of file handles an application can have open at -// any given time. -drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat); -drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount); - -// Helper for opening a wave file using stdio. -// -// This holds the internal FILE object until drwav_close() is called. Keep this in mind if you're caching drwav -// objects because the operating system may restrict the number of file handles an application can have open at -// any given time. -drwav* drwav_open_file(const char* filename); - -// Helper for opening a wave file for writing using stdio. -// -// This holds the internal FILE object until drwav_close() is called. Keep this in mind if you're caching drwav -// objects because the operating system may restrict the number of file handles an application can have open at -// any given time. -drwav* drwav_open_file_write(const char* filename, const drwav_data_format* pFormat); -drwav* drwav_open_file_write_sequential(const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount); - -#endif //DR_WAV_NO_STDIO - -// Helper for initializing a loader from a pre-allocated memory buffer. -// -// This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for -// the lifetime of the drwav object. -// -// The buffer should contain the contents of the entire wave file, not just the sample data. -drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize); - -// Helper for initializing a writer which outputs data to a memory buffer. -// -// dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free(). -// -// The buffer will remain allocated even after drwav_uninit() is called. Indeed, the buffer should not be -// considered valid until after drwav_uninit() has been called anyway. -drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat); -drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount); - -// Helper for opening a loader from a pre-allocated memory buffer. -// -// This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for -// the lifetime of the drwav object. -// -// The buffer should contain the contents of the entire wave file, not just the sample data. -drwav* drwav_open_memory(const void* data, size_t dataSize); - -// Helper for opening a writer which outputs data to a memory buffer. -// -// dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free(). -// -// The buffer will remain allocated even after drwav_close() is called. Indeed, the buffer should not be -// considered valid until after drwav_close() has been called anyway. -drwav* drwav_open_memory_write(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat); -drwav* drwav_open_memory_write_sequential(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount); +The buffer will remain allocated even after drwav_uninit() is called. Indeed, the buffer should not be +considered valid until after drwav_uninit() has been called anyway. +*/ +drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_bool32 drwav_init_memory_write_sequential_pcm_frames(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks); #ifndef DR_WAV_NO_CONVERSION_API -// Opens and reads a wav file in a single operation. -drwav_int16* drwav_open_and_read_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); -float* drwav_open_and_read_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); -drwav_int32* drwav_open_and_read_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); +/* +Opens and reads an entire wav file in a single operation. + +The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer. +*/ +drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks); +float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks); #ifndef DR_WAV_NO_STDIO -// Opens and decodes a wav file in a single operation. -drwav_int16* drwav_open_and_read_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); -float* drwav_open_and_read_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); -drwav_int32* drwav_open_and_read_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); +/* +Opens and decodes an entire wav file in a single operation. + +The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer. +*/ +drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks); +float* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_int16* drwav_open_file_and_read_pcm_frames_s16_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks); +float* drwav_open_file_and_read_pcm_frames_f32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_int32* drwav_open_file_and_read_pcm_frames_s32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks); #endif +/* +Opens and decodes an entire wav file from a block of memory in a single operation. -// Opens and decodes a wav file from a block of memory in a single operation. -drwav_int16* drwav_open_and_read_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); -float* drwav_open_and_read_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); -drwav_int32* drwav_open_and_read_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); +The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer. +*/ +drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks); +float* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks); +drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks); #endif -// Frees data that was allocated internally by dr_wav. -void drwav_free(void* pDataReturnedByOpenAndRead); +/* Frees data that was allocated internally by dr_wav. */ +void drwav_free(void* p, const drwav_allocation_callbacks* pAllocationCallbacks); #ifdef __cplusplus } #endif -#endif // dr_wav_h +#endif /* dr_wav_h */ + +/************************************************************************************************************************************************************ + ************************************************************************************************************************************************************ -///////////////////////////////////////////////////// -// -// IMPLEMENTATION -// -///////////////////////////////////////////////////// + IMPLEMENTATION + ************************************************************************************************************************************************************ + ************************************************************************************************************************************************************/ #ifdef DR_WAV_IMPLEMENTATION #include <stdlib.h> -#include <string.h> // For memcpy(), memset() -#include <limits.h> // For INT_MAX +#include <string.h> /* For memcpy(), memset() */ +#include <limits.h> /* For INT_MAX */ #ifndef DR_WAV_NO_STDIO #include <stdio.h> +#include <wchar.h> #endif -// Standard library stuff. +/* Standard library stuff. */ #ifndef DRWAV_ASSERT #include <assert.h> #define DRWAV_ASSERT(expression) assert(expression) @@ -725,21 +1010,34 @@ void drwav_free(void* pDataReturnedByOpenAndRead); #define drwav_max(a, b) (((a) > (b)) ? (a) : (b)) #define drwav_clamp(x, lo, hi) (drwav_max((lo), drwav_min((hi), (x)))) -#define drwav_assert DRWAV_ASSERT -#define drwav_copy_memory DRWAV_COPY_MEMORY -#define drwav_zero_memory DRWAV_ZERO_MEMORY +#define DRWAV_MAX_SIMD_VECTOR_SIZE 64 /* 64 for AVX-512 in the future. */ - -#define DRWAV_MAX_SIMD_VECTOR_SIZE 64 // 64 for AVX-512 in the future. +/* CPU architecture. */ +#if defined(__x86_64__) || defined(_M_X64) + #define DRWAV_X64 +#elif defined(__i386) || defined(_M_IX86) + #define DRWAV_X86 +#elif defined(__arm__) || defined(_M_ARM) + #define DRWAV_ARM +#endif #ifdef _MSC_VER -#define DRWAV_INLINE __forceinline -#else -#ifdef __GNUC__ -#define DRWAV_INLINE inline __attribute__((always_inline)) + #define DRWAV_INLINE __forceinline +#elif defined(__GNUC__) + /* + I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when + the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some + case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the + command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue + I am using "__inline__" only when we're compiling in strict ANSI mode. + */ + #if defined(__STRICT_ANSI__) + #define DRWAV_INLINE __inline__ __attribute__((always_inline)) + #else + #define DRWAV_INLINE inline __attribute__((always_inline)) + #endif #else -#define DRWAV_INLINE inline -#endif + #define DRWAV_INLINE #endif #if defined(SIZE_MAX) @@ -752,12 +1050,39 @@ void drwav_free(void* pDataReturnedByOpenAndRead); #endif #endif -static const drwav_uint8 drwavGUID_W64_RIFF[16] = {0x72,0x69,0x66,0x66, 0x2E,0x91, 0xCF,0x11, 0xA5,0xD6, 0x28,0xDB,0x04,0xC1,0x00,0x00}; // 66666972-912E-11CF-A5D6-28DB04C10000 -static const drwav_uint8 drwavGUID_W64_WAVE[16] = {0x77,0x61,0x76,0x65, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; // 65766177-ACF3-11D3-8CD1-00C04F8EDB8A -static const drwav_uint8 drwavGUID_W64_JUNK[16] = {0x6A,0x75,0x6E,0x6B, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; // 6B6E756A-ACF3-11D3-8CD1-00C04F8EDB8A -static const drwav_uint8 drwavGUID_W64_FMT [16] = {0x66,0x6D,0x74,0x20, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; // 20746D66-ACF3-11D3-8CD1-00C04F8EDB8A -static const drwav_uint8 drwavGUID_W64_FACT[16] = {0x66,0x61,0x63,0x74, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; // 74636166-ACF3-11D3-8CD1-00C04F8EDB8A -static const drwav_uint8 drwavGUID_W64_DATA[16] = {0x64,0x61,0x74,0x61, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; // 61746164-ACF3-11D3-8CD1-00C04F8EDB8A +#if defined(_MSC_VER) && _MSC_VER >= 1300 + #define DRWAV_HAS_BYTESWAP16_INTRINSIC + #define DRWAV_HAS_BYTESWAP32_INTRINSIC + #define DRWAV_HAS_BYTESWAP64_INTRINSIC +#elif defined(__clang__) + #if defined(__has_builtin) + #if __has_builtin(__builtin_bswap16) + #define DRWAV_HAS_BYTESWAP16_INTRINSIC + #endif + #if __has_builtin(__builtin_bswap32) + #define DRWAV_HAS_BYTESWAP32_INTRINSIC + #endif + #if __has_builtin(__builtin_bswap64) + #define DRWAV_HAS_BYTESWAP64_INTRINSIC + #endif + #endif +#elif defined(__GNUC__) + #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define DRWAV_HAS_BYTESWAP32_INTRINSIC + #define DRWAV_HAS_BYTESWAP64_INTRINSIC + #endif + #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) + #define DRWAV_HAS_BYTESWAP16_INTRINSIC + #endif +#endif + +static const drwav_uint8 drwavGUID_W64_RIFF[16] = {0x72,0x69,0x66,0x66, 0x2E,0x91, 0xCF,0x11, 0xA5,0xD6, 0x28,0xDB,0x04,0xC1,0x00,0x00}; /* 66666972-912E-11CF-A5D6-28DB04C10000 */ +static const drwav_uint8 drwavGUID_W64_WAVE[16] = {0x77,0x61,0x76,0x65, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; /* 65766177-ACF3-11D3-8CD1-00C04F8EDB8A */ +static const drwav_uint8 drwavGUID_W64_JUNK[16] = {0x6A,0x75,0x6E,0x6B, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; /* 6B6E756A-ACF3-11D3-8CD1-00C04F8EDB8A */ +static const drwav_uint8 drwavGUID_W64_FMT [16] = {0x66,0x6D,0x74,0x20, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; /* 20746D66-ACF3-11D3-8CD1-00C04F8EDB8A */ +static const drwav_uint8 drwavGUID_W64_FACT[16] = {0x66,0x61,0x63,0x74, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; /* 74636166-ACF3-11D3-8CD1-00C04F8EDB8A */ +static const drwav_uint8 drwavGUID_W64_DATA[16] = {0x64,0x61,0x74,0x61, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; /* 61746164-ACF3-11D3-8CD1-00C04F8EDB8A */ +static const drwav_uint8 drwavGUID_W64_SMPL[16] = {0x73,0x6D,0x70,0x6C, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; /* 6C706D73-ACF3-11D3-8CD1-00C04F8EDB8A */ static DRWAV_INLINE drwav_bool32 drwav__guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16]) { @@ -784,8 +1109,14 @@ static DRWAV_INLINE drwav_bool32 drwav__fourcc_equal(const unsigned char* a, con static DRWAV_INLINE int drwav__is_little_endian() { +#if defined(DRWAV_X86) || defined(DRWAV_X64) + return DRWAV_TRUE; +#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN + return DRWAV_TRUE; +#else int n = 1; return (*(char*)&n) == 1; +#endif } static DRWAV_INLINE unsigned short drwav__bytes_to_u16(const unsigned char* data) @@ -812,73 +1143,401 @@ static DRWAV_INLINE drwav_uint64 drwav__bytes_to_u64(const unsigned char* data) static DRWAV_INLINE void drwav__bytes_to_guid(const unsigned char* data, drwav_uint8* guid) { - for (int i = 0; i < 16; ++i) { + int i; + for (i = 0; i < 16; ++i) { guid[i] = data[i]; } } -static DRWAV_INLINE drwav_bool32 drwav__is_compressed_format_tag(drwav_uint16 formatTag) +static DRWAV_INLINE drwav_uint16 drwav__bswap16(drwav_uint16 n) { - return - formatTag == DR_WAVE_FORMAT_ADPCM || - formatTag == DR_WAVE_FORMAT_DVI_ADPCM; +#ifdef DRWAV_HAS_BYTESWAP16_INTRINSIC + #if defined(_MSC_VER) + return _byteswap_ushort(n); + #elif defined(__GNUC__) || defined(__clang__) + return __builtin_bswap16(n); + #else + #error "This compiler does not support the byte swap intrinsic." + #endif +#else + return ((n & 0xFF00) >> 8) | + ((n & 0x00FF) << 8); +#endif +} + +static DRWAV_INLINE drwav_uint32 drwav__bswap32(drwav_uint32 n) +{ +#ifdef DRWAV_HAS_BYTESWAP32_INTRINSIC + #if defined(_MSC_VER) + return _byteswap_ulong(n); + #elif defined(__GNUC__) || defined(__clang__) + #if defined(DRWAV_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRWAV_64BIT) /* <-- 64-bit inline assembly has not been tested, so disabling for now. */ + /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */ + drwav_uint32 r; + __asm__ __volatile__ ( + #if defined(DRWAV_64BIT) + "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n) /* <-- This is untested. If someone in the community could test this, that would be appreciated! */ + #else + "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n) + #endif + ); + return r; + #else + return __builtin_bswap32(n); + #endif + #else + #error "This compiler does not support the byte swap intrinsic." + #endif +#else + return ((n & 0xFF000000) >> 24) | + ((n & 0x00FF0000) >> 8) | + ((n & 0x0000FF00) << 8) | + ((n & 0x000000FF) << 24); +#endif } +static DRWAV_INLINE drwav_uint64 drwav__bswap64(drwav_uint64 n) +{ +#ifdef DRWAV_HAS_BYTESWAP64_INTRINSIC + #if defined(_MSC_VER) + return _byteswap_uint64(n); + #elif defined(__GNUC__) || defined(__clang__) + return __builtin_bswap64(n); + #else + #error "This compiler does not support the byte swap intrinsic." + #endif +#else + return ((n & (drwav_uint64)0xFF00000000000000) >> 56) | + ((n & (drwav_uint64)0x00FF000000000000) >> 40) | + ((n & (drwav_uint64)0x0000FF0000000000) >> 24) | + ((n & (drwav_uint64)0x000000FF00000000) >> 8) | + ((n & (drwav_uint64)0x00000000FF000000) << 8) | + ((n & (drwav_uint64)0x0000000000FF0000) << 24) | + ((n & (drwav_uint64)0x000000000000FF00) << 40) | + ((n & (drwav_uint64)0x00000000000000FF) << 56); +#endif +} -drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut); -drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut); -drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData); -drwav* drwav_open_write__internal(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData); -typedef struct +static DRWAV_INLINE drwav_int16 drwav__bswap_s16(drwav_int16 n) { - union - { - drwav_uint8 fourcc[4]; - drwav_uint8 guid[16]; - } id; + return (drwav_int16)drwav__bswap16((drwav_uint16)n); +} - // The size in bytes of the chunk. - drwav_uint64 sizeInBytes; +static DRWAV_INLINE void drwav__bswap_samples_s16(drwav_int16* pSamples, drwav_uint64 sampleCount) +{ + drwav_uint64 iSample; + for (iSample = 0; iSample < sampleCount; iSample += 1) { + pSamples[iSample] = drwav__bswap_s16(pSamples[iSample]); + } +} - // RIFF = 2 byte alignment. - // W64 = 8 byte alignment. - unsigned int paddingSize; -} drwav__chunk_header; +static DRWAV_INLINE void drwav__bswap_s24(drwav_uint8* p) +{ + drwav_uint8 t; + t = p[0]; + p[0] = p[2]; + p[2] = t; +} -static drwav_bool32 drwav__read_chunk_header(drwav_read_proc onRead, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav__chunk_header* pHeaderOut) +static DRWAV_INLINE void drwav__bswap_samples_s24(drwav_uint8* pSamples, drwav_uint64 sampleCount) { - if (container == drwav_container_riff) { - if (onRead(pUserData, pHeaderOut->id.fourcc, 4) != 4) { - return DRWAV_FALSE; - } + drwav_uint64 iSample; + for (iSample = 0; iSample < sampleCount; iSample += 1) { + drwav_uint8* pSample = pSamples + (iSample*3); + drwav__bswap_s24(pSample); + } +} - unsigned char sizeInBytes[4]; - if (onRead(pUserData, sizeInBytes, 4) != 4) { - return DRWAV_FALSE; - } - pHeaderOut->sizeInBytes = drwav__bytes_to_u32(sizeInBytes); - pHeaderOut->paddingSize = (unsigned int)(pHeaderOut->sizeInBytes % 2); - *pRunningBytesReadOut += 8; - } else { - if (onRead(pUserData, pHeaderOut->id.guid, 16) != 16) { - return DRWAV_FALSE; - } +static DRWAV_INLINE drwav_int32 drwav__bswap_s32(drwav_int32 n) +{ + return (drwav_int32)drwav__bswap32((drwav_uint32)n); +} + +static DRWAV_INLINE void drwav__bswap_samples_s32(drwav_int32* pSamples, drwav_uint64 sampleCount) +{ + drwav_uint64 iSample; + for (iSample = 0; iSample < sampleCount; iSample += 1) { + pSamples[iSample] = drwav__bswap_s32(pSamples[iSample]); + } +} + + +static DRWAV_INLINE float drwav__bswap_f32(float n) +{ + union { + drwav_uint32 i; + float f; + } x; + x.f = n; + x.i = drwav__bswap32(x.i); + + return x.f; +} + +static DRWAV_INLINE void drwav__bswap_samples_f32(float* pSamples, drwav_uint64 sampleCount) +{ + drwav_uint64 iSample; + for (iSample = 0; iSample < sampleCount; iSample += 1) { + pSamples[iSample] = drwav__bswap_f32(pSamples[iSample]); + } +} + + +static DRWAV_INLINE double drwav__bswap_f64(double n) +{ + union { + drwav_uint64 i; + double f; + } x; + x.f = n; + x.i = drwav__bswap64(x.i); + + return x.f; +} + +static DRWAV_INLINE void drwav__bswap_samples_f64(double* pSamples, drwav_uint64 sampleCount) +{ + drwav_uint64 iSample; + for (iSample = 0; iSample < sampleCount; iSample += 1) { + pSamples[iSample] = drwav__bswap_f64(pSamples[iSample]); + } +} + + +static DRWAV_INLINE void drwav__bswap_samples_pcm(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample) +{ + /* Assumes integer PCM. Floating point PCM is done in drwav__bswap_samples_ieee(). */ + switch (bytesPerSample) + { + case 2: /* s16, s12 (loosely packed) */ + { + drwav__bswap_samples_s16((drwav_int16*)pSamples, sampleCount); + } break; + case 3: /* s24 */ + { + drwav__bswap_samples_s24((drwav_uint8*)pSamples, sampleCount); + } break; + case 4: /* s32 */ + { + drwav__bswap_samples_s32((drwav_int32*)pSamples, sampleCount); + } break; + default: + { + /* Unsupported format. */ + DRWAV_ASSERT(DRWAV_FALSE); + } break; + } +} + +static DRWAV_INLINE void drwav__bswap_samples_ieee(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample) +{ + switch (bytesPerSample) + { + #if 0 /* Contributions welcome for f16 support. */ + case 2: /* f16 */ + { + drwav__bswap_samples_f16((drwav_float16*)pSamples, sampleCount); + } break; + #endif + case 4: /* f32 */ + { + drwav__bswap_samples_f32((float*)pSamples, sampleCount); + } break; + case 8: /* f64 */ + { + drwav__bswap_samples_f64((double*)pSamples, sampleCount); + } break; + default: + { + /* Unsupported format. */ + DRWAV_ASSERT(DRWAV_FALSE); + } break; + } +} + +static DRWAV_INLINE void drwav__bswap_samples(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample, drwav_uint16 format) +{ + switch (format) + { + case DR_WAVE_FORMAT_PCM: + { + drwav__bswap_samples_pcm(pSamples, sampleCount, bytesPerSample); + } break; + + case DR_WAVE_FORMAT_IEEE_FLOAT: + { + drwav__bswap_samples_ieee(pSamples, sampleCount, bytesPerSample); + } break; + + case DR_WAVE_FORMAT_ALAW: + case DR_WAVE_FORMAT_MULAW: + { + drwav__bswap_samples_s16((drwav_int16*)pSamples, sampleCount); + } break; + + case DR_WAVE_FORMAT_ADPCM: + case DR_WAVE_FORMAT_DVI_ADPCM: + default: + { + /* Unsupported format. */ + DRWAV_ASSERT(DRWAV_FALSE); + } break; + } +} + + +static void* drwav__malloc_default(size_t sz, void* pUserData) +{ + (void)pUserData; + return DRWAV_MALLOC(sz); +} + +static void* drwav__realloc_default(void* p, size_t sz, void* pUserData) +{ + (void)pUserData; + return DRWAV_REALLOC(p, sz); +} + +static void drwav__free_default(void* p, void* pUserData) +{ + (void)pUserData; + DRWAV_FREE(p); +} + + +static void* drwav__malloc_from_callbacks(size_t sz, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks == NULL) { + return NULL; + } + + if (pAllocationCallbacks->onMalloc != NULL) { + return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData); + } + + /* Try using realloc(). */ + if (pAllocationCallbacks->onRealloc != NULL) { + return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData); + } + + return NULL; +} + +static void* drwav__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks == NULL) { + return NULL; + } + if (pAllocationCallbacks->onRealloc != NULL) { + return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData); + } + + /* Try emulating realloc() in terms of malloc()/free(). */ + if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) { + void* p2; + + p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData); + if (p2 == NULL) { + return NULL; + } + + DRWAV_COPY_MEMORY(p2, p, szOld); + pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData); + + return p2; + } + + return NULL; +} + +static void drwav__free_from_callbacks(void* p, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + if (p == NULL || pAllocationCallbacks == NULL) { + return; + } + + if (pAllocationCallbacks->onFree != NULL) { + pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData); + } +} + + +drwav_allocation_callbacks drwav_copy_allocation_callbacks_or_defaults(const drwav_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks != NULL) { + /* Copy. */ + return *pAllocationCallbacks; + } else { + /* Defaults. */ + drwav_allocation_callbacks allocationCallbacks; + allocationCallbacks.pUserData = NULL; + allocationCallbacks.onMalloc = drwav__malloc_default; + allocationCallbacks.onRealloc = drwav__realloc_default; + allocationCallbacks.onFree = drwav__free_default; + return allocationCallbacks; + } +} + + +static DRWAV_INLINE drwav_bool32 drwav__is_compressed_format_tag(drwav_uint16 formatTag) +{ + return + formatTag == DR_WAVE_FORMAT_ADPCM || + formatTag == DR_WAVE_FORMAT_DVI_ADPCM; +} + +static unsigned int drwav__chunk_padding_size_riff(drwav_uint64 chunkSize) +{ + return (unsigned int)(chunkSize % 2); +} + +static unsigned int drwav__chunk_padding_size_w64(drwav_uint64 chunkSize) +{ + return (unsigned int)(chunkSize % 8); +} + +drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut); +drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut); +drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount); + +static drwav_result drwav__read_chunk_header(drwav_read_proc onRead, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_chunk_header* pHeaderOut) +{ + if (container == drwav_container_riff) { + unsigned char sizeInBytes[4]; + + if (onRead(pUserData, pHeaderOut->id.fourcc, 4) != 4) { + return DRWAV_EOF; + } + + if (onRead(pUserData, sizeInBytes, 4) != 4) { + return DRWAV_INVALID_FILE; + } + + pHeaderOut->sizeInBytes = drwav__bytes_to_u32(sizeInBytes); + pHeaderOut->paddingSize = drwav__chunk_padding_size_riff(pHeaderOut->sizeInBytes); + *pRunningBytesReadOut += 8; + } else { unsigned char sizeInBytes[8]; + + if (onRead(pUserData, pHeaderOut->id.guid, 16) != 16) { + return DRWAV_EOF; + } + if (onRead(pUserData, sizeInBytes, 8) != 8) { - return DRWAV_FALSE; + return DRWAV_INVALID_FILE; } - pHeaderOut->sizeInBytes = drwav__bytes_to_u64(sizeInBytes) - 24; // <-- Subtract 24 because w64 includes the size of the header. - pHeaderOut->paddingSize = (unsigned int)(pHeaderOut->sizeInBytes % 8); + pHeaderOut->sizeInBytes = drwav__bytes_to_u64(sizeInBytes) - 24; /* <-- Subtract 24 because w64 includes the size of the header. */ + pHeaderOut->paddingSize = drwav__chunk_padding_size_w64(pHeaderOut->sizeInBytes); *pRunningBytesReadOut += 24; } - return DRWAV_TRUE; + return DRWAV_SUCCESS; } static drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData) @@ -901,30 +1560,59 @@ static drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 off return DRWAV_TRUE; } +static drwav_bool32 drwav__seek_from_start(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData) +{ + if (offset <= 0x7FFFFFFF) { + return onSeek(pUserData, (int)offset, drwav_seek_origin_start); + } + + /* Larger than 32-bit seek. */ + if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_start)) { + return DRWAV_FALSE; + } + offset -= 0x7FFFFFFF; + + for (;;) { + if (offset <= 0x7FFFFFFF) { + return onSeek(pUserData, (int)offset, drwav_seek_origin_current); + } + + if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) { + return DRWAV_FALSE; + } + offset -= 0x7FFFFFFF; + } + + /* Should never get here. */ + /*return DRWAV_TRUE; */ +} + static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_fmt* fmtOut) { - drwav__chunk_header header; - if (!drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header)) { + drwav_chunk_header header; + unsigned char fmt[16]; + + if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) { return DRWAV_FALSE; } - // Skip non-fmt chunks. + /* Skip non-fmt chunks. */ while ((container == drwav_container_riff && !drwav__fourcc_equal(header.id.fourcc, "fmt ")) || (container == drwav_container_w64 && !drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT))) { if (!drwav__seek_forward(onSeek, header.sizeInBytes + header.paddingSize, pUserData)) { return DRWAV_FALSE; } *pRunningBytesReadOut += header.sizeInBytes + header.paddingSize; - // Try the next header. - if (!drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header)) { + /* Try the next header. */ + if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) { return DRWAV_FALSE; } } - // Validation. + /* Validation. */ if (container == drwav_container_riff) { if (!drwav__fourcc_equal(header.id.fourcc, "fmt ")) { return DRWAV_FALSE; @@ -936,7 +1624,6 @@ static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSe } - unsigned char fmt[16]; if (onRead(pUserData, fmt, sizeof(fmt)) != sizeof(fmt)) { return DRWAV_FALSE; } @@ -956,16 +1643,18 @@ static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSe if (header.sizeInBytes > 16) { unsigned char fmt_cbSize[2]; + int bytesReadSoFar = 0; + if (onRead(pUserData, fmt_cbSize, sizeof(fmt_cbSize)) != sizeof(fmt_cbSize)) { - return DRWAV_FALSE; // Expecting more data. + return DRWAV_FALSE; /* Expecting more data. */ } *pRunningBytesReadOut += sizeof(fmt_cbSize); - int bytesReadSoFar = 18; + bytesReadSoFar = 18; fmtOut->extendedSize = drwav__bytes_to_u16(fmt_cbSize); if (fmtOut->extendedSize > 0) { - // Simple validation. + /* Simple validation. */ if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) { if (fmtOut->extendedSize != 22) { return DRWAV_FALSE; @@ -975,7 +1664,7 @@ static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSe if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) { unsigned char fmtext[22]; if (onRead(pUserData, fmtext, fmtOut->extendedSize) != fmtOut->extendedSize) { - return DRWAV_FALSE; // Expecting more data. + return DRWAV_FALSE; /* Expecting more data. */ } fmtOut->validBitsPerSample = drwav__bytes_to_u16(fmtext + 0); @@ -991,7 +1680,7 @@ static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSe bytesReadSoFar += fmtOut->extendedSize; } - // Seek past any leftover bytes. For w64 the leftover will be defined based on the chunk size. + /* Seek past any leftover bytes. For w64 the leftover will be defined based on the chunk size. */ if (!onSeek(pUserData, (int)(header.sizeInBytes - bytesReadSoFar), drwav_seek_origin_current)) { return DRWAV_FALSE; } @@ -1009,867 +1698,1151 @@ static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSe } -#ifndef DR_WAV_NO_STDIO -FILE* drwav_fopen(const char* filePath, const char* openMode) +size_t drwav__on_read(drwav_read_proc onRead, void* pUserData, void* pBufferOut, size_t bytesToRead, drwav_uint64* pCursor) { - FILE* pFile; -#if defined(_MSC_VER) && _MSC_VER >= 1400 - if (fopen_s(&pFile, filePath, openMode) != 0) { - return DRWAV_FALSE; - } -#else - pFile = fopen(filePath, openMode); - if (pFile == NULL) { - return DRWAV_FALSE; - } -#endif + size_t bytesRead; - return pFile; -} - -static size_t drwav__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead) -{ - return fread(pBufferOut, 1, bytesToRead, (FILE*)pUserData); -} + DRWAV_ASSERT(onRead != NULL); + DRWAV_ASSERT(pCursor != NULL); -static size_t drwav__on_write_stdio(void* pUserData, const void* pData, size_t bytesToWrite) -{ - return fwrite(pData, 1, bytesToWrite, (FILE*)pUserData); + bytesRead = onRead(pUserData, pBufferOut, bytesToRead); + *pCursor += bytesRead; + return bytesRead; } -static drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, drwav_seek_origin origin) +drwav_bool32 drwav__on_seek(drwav_seek_proc onSeek, void* pUserData, int offset, drwav_seek_origin origin, drwav_uint64* pCursor) { - return fseek((FILE*)pUserData, offset, (origin == drwav_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0; -} + DRWAV_ASSERT(onSeek != NULL); + DRWAV_ASSERT(pCursor != NULL); -drwav_bool32 drwav_init_file(drwav* pWav, const char* filename) -{ - FILE* pFile = drwav_fopen(filename, "rb"); - if (pFile == NULL) { + if (!onSeek(pUserData, offset, origin)) { return DRWAV_FALSE; } - return drwav_init(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile); -} - - -drwav_bool32 drwav_init_file_write__internal(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential) -{ - FILE* pFile = drwav_fopen(filename, "wb"); - if (pFile == NULL) { - return DRWAV_FALSE; + if (origin == drwav_seek_origin_start) { + *pCursor = offset; + } else { + *pCursor += offset; } - return drwav_init_write__internal(pWav, pFormat, totalSampleCount, isSequential, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile); + return DRWAV_TRUE; } -drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat) -{ - return drwav_init_file_write__internal(pWav, filename, pFormat, 0, DRWAV_FALSE); -} -drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount) -{ - return drwav_init_file_write__internal(pWav, filename, pFormat, totalSampleCount, DRWAV_TRUE); -} -drwav* drwav_open_file(const char* filename) +static drwav_uint32 drwav_get_bytes_per_pcm_frame(drwav* pWav) { - FILE* pFile = drwav_fopen(filename, "rb"); - if (pFile == NULL) { - return DRWAV_FALSE; - } - - drwav* pWav = drwav_open(drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile); - if (pWav == NULL) { - fclose(pFile); - return NULL; + /* + The bytes per frame is a bit ambiguous. It can be either be based on the bits per sample, or the block align. The way I'm doing it here + is that if the bits per sample is a multiple of 8, use floor(bitsPerSample*channels/8), otherwise fall back to the block align. + */ + if ((pWav->bitsPerSample & 0x7) == 0) { + /* Bits per sample is a multiple of 8. */ + return (pWav->bitsPerSample * pWav->fmt.channels) >> 3; + } else { + return pWav->fmt.blockAlign; } - - return pWav; } -drwav* drwav_open_file_write__internal(const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential) +drwav_bool32 drwav_preinit(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_allocation_callbacks* pAllocationCallbacks) { - FILE* pFile = drwav_fopen(filename, "wb"); - if (pFile == NULL) { + if (pWav == NULL || onRead == NULL || onSeek == NULL) { return DRWAV_FALSE; } - drwav* pWav = drwav_open_write__internal(pFormat, totalSampleCount, isSequential, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile); - if (pWav == NULL) { - fclose(pFile); - return NULL; - } + DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav)); + pWav->onRead = onRead; + pWav->onSeek = onSeek; + pWav->pUserData = pReadSeekUserData; + pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks); - return pWav; -} + if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) { + return DRWAV_FALSE; /* Invalid allocation callbacks. */ + } -drwav* drwav_open_file_write(const char* filename, const drwav_data_format* pFormat) -{ - return drwav_open_file_write__internal(filename, pFormat, 0, DRWAV_FALSE); + return DRWAV_TRUE; } -drwav* drwav_open_file_write_sequential(const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount) +drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags) { - return drwav_open_file_write__internal(filename, pFormat, totalSampleCount, DRWAV_TRUE); -} -#endif //DR_WAV_NO_STDIO + /* This function assumes drwav_preinit() has been called beforehand. */ + drwav_uint64 cursor; /* <-- Keeps track of the byte position so we can seek to specific locations. */ + drwav_bool32 sequential; + unsigned char riff[4]; + drwav_fmt fmt; + unsigned short translatedFormatTag; + drwav_uint64 sampleCountFromFactChunk; + drwav_bool32 foundDataChunk; + drwav_uint64 dataChunkSize; + drwav_uint64 chunkSize; -static size_t drwav__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead) -{ - drwav__memory_stream* memory = (drwav__memory_stream*)pUserData; - drwav_assert(memory != NULL); - drwav_assert(memory->dataSize >= memory->currentReadPos); + cursor = 0; + sequential = (flags & DRWAV_SEQUENTIAL) != 0; - size_t bytesRemaining = memory->dataSize - memory->currentReadPos; - if (bytesToRead > bytesRemaining) { - bytesToRead = bytesRemaining; + /* The first 4 bytes should be the RIFF identifier. */ + if (drwav__on_read(pWav->onRead, pWav->pUserData, riff, sizeof(riff), &cursor) != sizeof(riff)) { + return DRWAV_FALSE; } - if (bytesToRead > 0) { - DRWAV_COPY_MEMORY(pBufferOut, memory->data + memory->currentReadPos, bytesToRead); - memory->currentReadPos += bytesToRead; - } + /* + The first 4 bytes can be used to identify the container. For RIFF files it will start with "RIFF" and for + w64 it will start with "riff". + */ + if (drwav__fourcc_equal(riff, "RIFF")) { + pWav->container = drwav_container_riff; + } else if (drwav__fourcc_equal(riff, "riff")) { + int i; + drwav_uint8 riff2[12]; - return bytesToRead; -} + pWav->container = drwav_container_w64; -static drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, drwav_seek_origin origin) -{ - drwav__memory_stream* memory = (drwav__memory_stream*)pUserData; - drwav_assert(memory != NULL); + /* Check the rest of the GUID for validity. */ + if (drwav__on_read(pWav->onRead, pWav->pUserData, riff2, sizeof(riff2), &cursor) != sizeof(riff2)) { + return DRWAV_FALSE; + } - if (origin == drwav_seek_origin_current) { - if (offset > 0) { - if (memory->currentReadPos + offset > memory->dataSize) { - return DRWAV_FALSE; // Trying to seek too far forward. - } - } else { - if (memory->currentReadPos < (size_t)-offset) { - return DRWAV_FALSE; // Trying to seek too far backwards. + for (i = 0; i < 12; ++i) { + if (riff2[i] != drwavGUID_W64_RIFF[i+4]) { + return DRWAV_FALSE; } } - - // This will never underflow thanks to the clamps above. - memory->currentReadPos += offset; } else { - if ((drwav_uint32)offset <= memory->dataSize) { - memory->currentReadPos = offset; - } else { - return DRWAV_FALSE; // Trying to seek too far forward. - } + return DRWAV_FALSE; /* Unknown or unsupported container. */ } - return DRWAV_TRUE; -} - -static size_t drwav__on_write_memory(void* pUserData, const void* pDataIn, size_t bytesToWrite) -{ - drwav__memory_stream_write* memory = (drwav__memory_stream_write*)pUserData; - drwav_assert(memory != NULL); - drwav_assert(memory->dataCapacity >= memory->currentWritePos); - size_t bytesRemaining = memory->dataCapacity - memory->currentWritePos; - if (bytesRemaining < bytesToWrite) { - // Need to reallocate. - size_t newDataCapacity = (memory->dataCapacity == 0) ? 256 : memory->dataCapacity * 2; + if (pWav->container == drwav_container_riff) { + unsigned char chunkSizeBytes[4]; + unsigned char wave[4]; - // If doubling wasn't enough, just make it the minimum required size to write the data. - if ((newDataCapacity - memory->currentWritePos) < bytesToWrite) { - newDataCapacity = memory->currentWritePos + bytesToWrite; + /* RIFF/WAVE */ + if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) { + return DRWAV_FALSE; } - void* pNewData = DRWAV_REALLOC(*memory->ppData, newDataCapacity); - if (pNewData == NULL) { - return 0; + if (drwav__bytes_to_u32(chunkSizeBytes) < 36) { + return DRWAV_FALSE; /* Chunk size should always be at least 36 bytes. */ } - *memory->ppData = pNewData; - memory->dataCapacity = newDataCapacity; - } + if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) { + return DRWAV_FALSE; + } - drwav_uint8* pDataOut = (drwav_uint8*)(*memory->ppData); - DRWAV_COPY_MEMORY(pDataOut + memory->currentWritePos, pDataIn, bytesToWrite); + if (!drwav__fourcc_equal(wave, "WAVE")) { + return DRWAV_FALSE; /* Expecting "WAVE". */ + } + } else { + unsigned char chunkSizeBytes[8]; + drwav_uint8 wave[16]; - memory->currentWritePos += bytesToWrite; - if (memory->dataSize < memory->currentWritePos) { - memory->dataSize = memory->currentWritePos; - } + /* W64 */ + if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) { + return DRWAV_FALSE; + } - *memory->pDataSize = memory->dataSize; + if (drwav__bytes_to_u64(chunkSizeBytes) < 80) { + return DRWAV_FALSE; + } - return bytesToWrite; -} + if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) { + return DRWAV_FALSE; + } -static drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drwav_seek_origin origin) -{ - drwav__memory_stream_write* memory = (drwav__memory_stream_write*)pUserData; - drwav_assert(memory != NULL); + if (!drwav__guid_equal(wave, drwavGUID_W64_WAVE)) { + return DRWAV_FALSE; + } + } - if (origin == drwav_seek_origin_current) { - if (offset > 0) { - if (memory->currentWritePos + offset > memory->dataSize) { - offset = (int)(memory->dataSize - memory->currentWritePos); // Trying to seek too far forward. + + /* The next bytes should be the "fmt " chunk. */ + if (!drwav__read_fmt(pWav->onRead, pWav->onSeek, pWav->pUserData, pWav->container, &cursor, &fmt)) { + return DRWAV_FALSE; /* Failed to read the "fmt " chunk. */ + } + + /* Basic validation. */ + if (fmt.sampleRate == 0 || fmt.channels == 0 || fmt.bitsPerSample == 0 || fmt.blockAlign == 0) { + return DRWAV_FALSE; /* Invalid channel count. Probably an invalid WAV file. */ + } + + + /* Translate the internal format. */ + translatedFormatTag = fmt.formatTag; + if (translatedFormatTag == DR_WAVE_FORMAT_EXTENSIBLE) { + translatedFormatTag = drwav__bytes_to_u16(fmt.subFormat + 0); + } + + + + sampleCountFromFactChunk = 0; + + /* + We need to enumerate over each chunk for two reasons: + 1) The "data" chunk may not be the next one + 2) We may want to report each chunk back to the client + + In order to correctly report each chunk back to the client we will need to keep looping until the end of the file. + */ + foundDataChunk = DRWAV_FALSE; + dataChunkSize = 0; + + /* The next chunk we care about is the "data" chunk. This is not necessarily the next chunk so we'll need to loop. */ + for (;;) + { + drwav_chunk_header header; + drwav_result result = drwav__read_chunk_header(pWav->onRead, pWav->pUserData, pWav->container, &cursor, &header); + if (result != DRWAV_SUCCESS) { + if (!foundDataChunk) { + return DRWAV_FALSE; + } else { + break; /* Probably at the end of the file. Get out of the loop. */ + } + } + + /* Tell the client about this chunk. */ + if (!sequential && onChunk != NULL) { + drwav_uint64 callbackBytesRead = onChunk(pChunkUserData, pWav->onRead, pWav->onSeek, pWav->pUserData, &header); + + /* + dr_wav may need to read the contents of the chunk, so we now need to seek back to the position before + we called the callback. + */ + if (callbackBytesRead > 0) { + if (!drwav__seek_from_start(pWav->onSeek, cursor, pWav->pUserData)) { + return DRWAV_FALSE; + } + } + } + + + if (!foundDataChunk) { + pWav->dataChunkDataPos = cursor; + } + + chunkSize = header.sizeInBytes; + if (pWav->container == drwav_container_riff) { + if (drwav__fourcc_equal(header.id.fourcc, "data")) { + foundDataChunk = DRWAV_TRUE; + dataChunkSize = chunkSize; } } else { - if (memory->currentWritePos < (size_t)-offset) { - offset = -(int)memory->currentWritePos; // Trying to seek too far backwards. + if (drwav__guid_equal(header.id.guid, drwavGUID_W64_DATA)) { + foundDataChunk = DRWAV_TRUE; + dataChunkSize = chunkSize; } } - // This will never underflow thanks to the clamps above. - memory->currentWritePos += offset; - } else { - if ((drwav_uint32)offset <= memory->dataSize) { - memory->currentWritePos = offset; + /* + If at this point we have found the data chunk and we're running in sequential mode, we need to break out of this loop. The reason for + this is that we would otherwise require a backwards seek which sequential mode forbids. + */ + if (foundDataChunk && sequential) { + break; + } + + /* Optional. Get the total sample count from the FACT chunk. This is useful for compressed formats. */ + if (pWav->container == drwav_container_riff) { + if (drwav__fourcc_equal(header.id.fourcc, "fact")) { + drwav_uint32 sampleCount; + if (drwav__on_read(pWav->onRead, pWav->pUserData, &sampleCount, 4, &cursor) != 4) { + return DRWAV_FALSE; + } + chunkSize -= 4; + + if (!foundDataChunk) { + pWav->dataChunkDataPos = cursor; + } + + /* + The sample count in the "fact" chunk is either unreliable, or I'm not understanding it properly. For now I am only enabling this + for Microsoft ADPCM formats. + */ + if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) { + sampleCountFromFactChunk = sampleCount; + } else { + sampleCountFromFactChunk = 0; + } + } + } else { + if (drwav__guid_equal(header.id.guid, drwavGUID_W64_FACT)) { + if (drwav__on_read(pWav->onRead, pWav->pUserData, &sampleCountFromFactChunk, 8, &cursor) != 8) { + return DRWAV_FALSE; + } + chunkSize -= 8; + + if (!foundDataChunk) { + pWav->dataChunkDataPos = cursor; + } + } + } + + /* "smpl" chunk. */ + if (pWav->container == drwav_container_riff) { + if (drwav__fourcc_equal(header.id.fourcc, "smpl")) { + unsigned char smplHeaderData[36]; /* 36 = size of the smpl header section, not including the loop data. */ + if (chunkSize >= sizeof(smplHeaderData)) { + drwav_uint64 bytesJustRead = drwav__on_read(pWav->onRead, pWav->pUserData, smplHeaderData, sizeof(smplHeaderData), &cursor); + chunkSize -= bytesJustRead; + + if (bytesJustRead == sizeof(smplHeaderData)) { + drwav_uint32 iLoop; + + pWav->smpl.manufacturer = drwav__bytes_to_u32(smplHeaderData+0); + pWav->smpl.product = drwav__bytes_to_u32(smplHeaderData+4); + pWav->smpl.samplePeriod = drwav__bytes_to_u32(smplHeaderData+8); + pWav->smpl.midiUnityNotes = drwav__bytes_to_u32(smplHeaderData+12); + pWav->smpl.midiPitchFraction = drwav__bytes_to_u32(smplHeaderData+16); + pWav->smpl.smpteFormat = drwav__bytes_to_u32(smplHeaderData+20); + pWav->smpl.smpteOffset = drwav__bytes_to_u32(smplHeaderData+24); + pWav->smpl.numSampleLoops = drwav__bytes_to_u32(smplHeaderData+28); + pWav->smpl.samplerData = drwav__bytes_to_u32(smplHeaderData+32); + + for (iLoop = 0; iLoop < pWav->smpl.numSampleLoops && iLoop < drwav_countof(pWav->smpl.loops); ++iLoop) { + unsigned char smplLoopData[24]; /* 24 = size of a loop section in the smpl chunk. */ + bytesJustRead = drwav__on_read(pWav->onRead, pWav->pUserData, smplLoopData, sizeof(smplLoopData), &cursor); + chunkSize -= bytesJustRead; + + if (bytesJustRead == sizeof(smplLoopData)) { + pWav->smpl.loops[iLoop].cuePointId = drwav__bytes_to_u32(smplLoopData+0); + pWav->smpl.loops[iLoop].type = drwav__bytes_to_u32(smplLoopData+4); + pWav->smpl.loops[iLoop].start = drwav__bytes_to_u32(smplLoopData+8); + pWav->smpl.loops[iLoop].end = drwav__bytes_to_u32(smplLoopData+12); + pWav->smpl.loops[iLoop].fraction = drwav__bytes_to_u32(smplLoopData+16); + pWav->smpl.loops[iLoop].playCount = drwav__bytes_to_u32(smplLoopData+20); + } else { + break; /* Break from the smpl loop for loop. */ + } + } + } + } else { + /* Looks like invalid data. Ignore the chunk. */ + } + } } else { - memory->currentWritePos = memory->dataSize; // Trying to seek too far forward. + if (drwav__guid_equal(header.id.guid, drwavGUID_W64_SMPL)) { + /* + This path will be hit when a W64 WAV file contains a smpl chunk. I don't have a sample file to test this path, so a contribution + is welcome to add support for this. + */ + } } - } - return DRWAV_TRUE; -} + /* Make sure we seek past the padding. */ + chunkSize += header.paddingSize; + if (!drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData)) { + break; + } + cursor += chunkSize; -drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize) -{ - if (data == NULL || dataSize == 0) { - return DRWAV_FALSE; + if (!foundDataChunk) { + pWav->dataChunkDataPos = cursor; + } } - drwav__memory_stream memoryStream; - drwav_zero_memory(&memoryStream, sizeof(memoryStream)); - memoryStream.data = (const unsigned char*)data; - memoryStream.dataSize = dataSize; - memoryStream.currentReadPos = 0; - - if (!drwav_init(pWav, drwav__on_read_memory, drwav__on_seek_memory, (void*)&memoryStream)) { + /* If we haven't found a data chunk, return an error. */ + if (!foundDataChunk) { return DRWAV_FALSE; } - pWav->memoryStream = memoryStream; - pWav->pUserData = &pWav->memoryStream; - return DRWAV_TRUE; -} + /* We may have moved passed the data chunk. If so we need to move back. If running in sequential mode we can assume we are already sitting on the data chunk. */ + if (!sequential) { + if (!drwav__seek_from_start(pWav->onSeek, pWav->dataChunkDataPos, pWav->pUserData)) { + return DRWAV_FALSE; + } + cursor = pWav->dataChunkDataPos; + } + + /* At this point we should be sitting on the first byte of the raw audio data. */ -drwav_bool32 drwav_init_memory_write__internal(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential) -{ - if (ppData == NULL) { - return DRWAV_FALSE; + pWav->fmt = fmt; + pWav->sampleRate = fmt.sampleRate; + pWav->channels = fmt.channels; + pWav->bitsPerSample = fmt.bitsPerSample; + pWav->bytesRemaining = dataChunkSize; + pWav->translatedFormatTag = translatedFormatTag; + pWav->dataChunkDataSize = dataChunkSize; + + if (sampleCountFromFactChunk != 0) { + pWav->totalPCMFrameCount = sampleCountFromFactChunk; + } else { + pWav->totalPCMFrameCount = dataChunkSize / drwav_get_bytes_per_pcm_frame(pWav); + + if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) { + drwav_uint64 totalBlockHeaderSizeInBytes; + drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign; + + /* Make sure any trailing partial block is accounted for. */ + if ((blockCount * fmt.blockAlign) < dataChunkSize) { + blockCount += 1; + } + + /* We decode two samples per byte. There will be blockCount headers in the data chunk. This is enough to know how to calculate the total PCM frame count. */ + totalBlockHeaderSizeInBytes = blockCount * (6*fmt.channels); + pWav->totalPCMFrameCount = ((dataChunkSize - totalBlockHeaderSizeInBytes) * 2) / fmt.channels; + } + if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) { + drwav_uint64 totalBlockHeaderSizeInBytes; + drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign; + + /* Make sure any trailing partial block is accounted for. */ + if ((blockCount * fmt.blockAlign) < dataChunkSize) { + blockCount += 1; + } + + /* We decode two samples per byte. There will be blockCount headers in the data chunk. This is enough to know how to calculate the total PCM frame count. */ + totalBlockHeaderSizeInBytes = blockCount * (4*fmt.channels); + pWav->totalPCMFrameCount = ((dataChunkSize - totalBlockHeaderSizeInBytes) * 2) / fmt.channels; + + /* The header includes a decoded sample for each channel which acts as the initial predictor sample. */ + pWav->totalPCMFrameCount += blockCount; + } } - *ppData = NULL; // Important because we're using realloc()! - *pDataSize = 0; + /* Some formats only support a certain number of channels. */ + if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM || pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) { + if (pWav->channels > 2) { + return DRWAV_FALSE; + } + } - drwav__memory_stream_write memoryStreamWrite; - drwav_zero_memory(&memoryStreamWrite, sizeof(memoryStreamWrite)); - memoryStreamWrite.ppData = ppData; - memoryStreamWrite.pDataSize = pDataSize; - memoryStreamWrite.dataSize = 0; - memoryStreamWrite.dataCapacity = 0; - memoryStreamWrite.currentWritePos = 0; - - if (!drwav_init_write__internal(pWav, pFormat, totalSampleCount, isSequential, drwav__on_write_memory, drwav__on_seek_memory_write, (void*)&memoryStreamWrite)) { - return DRWAV_FALSE; +#ifdef DR_WAV_LIBSNDFILE_COMPAT + /* + I use libsndfile as a benchmark for testing, however in the version I'm using (from the Windows installer on the libsndfile website), + it appears the total sample count libsndfile uses for MS-ADPCM is incorrect. It would seem they are computing the total sample count + from the number of blocks, however this results in the inclusion of extra silent samples at the end of the last block. The correct + way to know the total sample count is to inspect the "fact" chunk, which should always be present for compressed formats, and should + always include the sample count. This little block of code below is only used to emulate the libsndfile logic so I can properly run my + correctness tests against libsndfile, and is disabled by default. + */ + if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) { + drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign; + pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2)) / fmt.channels; /* x2 because two samples per byte. */ } + if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) { + drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign; + pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels)) / fmt.channels; + } +#endif - pWav->memoryStreamWrite = memoryStreamWrite; - pWav->pUserData = &pWav->memoryStreamWrite; return DRWAV_TRUE; } -drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat) +drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks) { - return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, 0, DRWAV_FALSE); + return drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0, pAllocationCallbacks); } -drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount) +drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks) { - return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, totalSampleCount, DRWAV_TRUE); + if (!drwav_preinit(pWav, onRead, onSeek, pReadSeekUserData, pAllocationCallbacks)) { + return DRWAV_FALSE; + } + + return drwav_init__internal(pWav, onChunk, pChunkUserData, flags); } -drwav* drwav_open_memory(const void* data, size_t dataSize) +static drwav_uint32 drwav__riff_chunk_size_riff(drwav_uint64 dataChunkSize) { - if (data == NULL || dataSize == 0) { - return NULL; - } + drwav_uint32 dataSubchunkPaddingSize = drwav__chunk_padding_size_riff(dataChunkSize); - drwav__memory_stream memoryStream; - drwav_zero_memory(&memoryStream, sizeof(memoryStream)); - memoryStream.data = (const unsigned char*)data; - memoryStream.dataSize = dataSize; - memoryStream.currentReadPos = 0; - - drwav* pWav = drwav_open(drwav__on_read_memory, drwav__on_seek_memory, (void*)&memoryStream); - if (pWav == NULL) { - return NULL; + if (dataChunkSize <= (0xFFFFFFFFUL - 36 - dataSubchunkPaddingSize)) { + return 36 + (drwav_uint32)(dataChunkSize + dataSubchunkPaddingSize); + } else { + return 0xFFFFFFFF; } - - pWav->memoryStream = memoryStream; - pWav->pUserData = &pWav->memoryStream; - return pWav; } - -drwav* drwav_open_memory_write__internal(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential) +static drwav_uint32 drwav__data_chunk_size_riff(drwav_uint64 dataChunkSize) { - if (ppData == NULL) { - return NULL; - } - - *ppData = NULL; // Important because we're using realloc()! - *pDataSize = 0; - - drwav__memory_stream_write memoryStreamWrite; - drwav_zero_memory(&memoryStreamWrite, sizeof(memoryStreamWrite)); - memoryStreamWrite.ppData = ppData; - memoryStreamWrite.pDataSize = pDataSize; - memoryStreamWrite.dataSize = 0; - memoryStreamWrite.dataCapacity = 0; - memoryStreamWrite.currentWritePos = 0; - - drwav* pWav = drwav_open_write__internal(pFormat, totalSampleCount, isSequential, drwav__on_write_memory, drwav__on_seek_memory_write, (void*)&memoryStreamWrite); - if (pWav == NULL) { - return NULL; + if (dataChunkSize <= 0xFFFFFFFFUL) { + return (drwav_uint32)dataChunkSize; + } else { + return 0xFFFFFFFFUL; } - - pWav->memoryStreamWrite = memoryStreamWrite; - pWav->pUserData = &pWav->memoryStreamWrite; - return pWav; } -drwav* drwav_open_memory_write(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat) +static drwav_uint64 drwav__riff_chunk_size_w64(drwav_uint64 dataChunkSize) { - return drwav_open_memory_write__internal(ppData, pDataSize, pFormat, 0, DRWAV_FALSE); + drwav_uint64 dataSubchunkPaddingSize = drwav__chunk_padding_size_w64(dataChunkSize); + + return 80 + 24 + dataChunkSize + dataSubchunkPaddingSize; /* +24 because W64 includes the size of the GUID and size fields. */ } -drwav* drwav_open_memory_write_sequential(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount) +static drwav_uint64 drwav__data_chunk_size_w64(drwav_uint64 dataChunkSize) { - return drwav_open_memory_write__internal(ppData, pDataSize, pFormat, totalSampleCount, DRWAV_TRUE); + return 24 + dataChunkSize; /* +24 because W64 includes the size of the GUID and size fields. */ } -drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData) +drwav_bool32 drwav_preinit_write(drwav* pWav, const drwav_data_format* pFormat, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks) { - if (onRead == NULL || onSeek == NULL) { + if (pWav == NULL || onWrite == NULL) { return DRWAV_FALSE; } - drwav_zero_memory(pWav, sizeof(*pWav)); + if (!isSequential && onSeek == NULL) { + return DRWAV_FALSE; /* <-- onSeek is required when in non-sequential mode. */ + } + + /* Not currently supporting compressed formats. Will need to add support for the "fact" chunk before we enable this. */ + if (pFormat->format == DR_WAVE_FORMAT_EXTENSIBLE) { + return DRWAV_FALSE; + } + if (pFormat->format == DR_WAVE_FORMAT_ADPCM || pFormat->format == DR_WAVE_FORMAT_DVI_ADPCM) { + return DRWAV_FALSE; + } + DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav)); + pWav->onWrite = onWrite; + pWav->onSeek = onSeek; + pWav->pUserData = pUserData; + pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks); - // The first 4 bytes should be the RIFF identifier. - unsigned char riff[4]; - if (onRead(pUserData, riff, sizeof(riff)) != sizeof(riff)) { - return DRWAV_FALSE; // Failed to read data. + if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) { + return DRWAV_FALSE; /* Invalid allocation callbacks. */ } - // The first 4 bytes can be used to identify the container. For RIFF files it will start with "RIFF" and for - // w64 it will start with "riff". - if (drwav__fourcc_equal(riff, "RIFF")) { - pWav->container = drwav_container_riff; - } else if (drwav__fourcc_equal(riff, "riff")) { - pWav->container = drwav_container_w64; + pWav->fmt.formatTag = (drwav_uint16)pFormat->format; + pWav->fmt.channels = (drwav_uint16)pFormat->channels; + pWav->fmt.sampleRate = pFormat->sampleRate; + pWav->fmt.avgBytesPerSec = (drwav_uint32)((pFormat->bitsPerSample * pFormat->sampleRate * pFormat->channels) / 8); + pWav->fmt.blockAlign = (drwav_uint16)((pFormat->channels * pFormat->bitsPerSample) / 8); + pWav->fmt.bitsPerSample = (drwav_uint16)pFormat->bitsPerSample; + pWav->fmt.extendedSize = 0; + pWav->isSequentialWrite = isSequential; - // Check the rest of the GUID for validity. - drwav_uint8 riff2[12]; - if (onRead(pUserData, riff2, sizeof(riff2)) != sizeof(riff2)) { - return DRWAV_FALSE; - } + return DRWAV_TRUE; +} - for (int i = 0; i < 12; ++i) { - if (riff2[i] != drwavGUID_W64_RIFF[i+4]) { - return DRWAV_FALSE; +drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount) +{ + /* The function assumes drwav_preinit_write() was called beforehand. */ + + size_t runningPos = 0; + drwav_uint64 initialDataChunkSize = 0; + drwav_uint64 chunkSizeFMT; + + /* + The initial values for the "RIFF" and "data" chunks depends on whether or not we are initializing in sequential mode or not. In + sequential mode we set this to its final values straight away since they can be calculated from the total sample count. In non- + sequential mode we initialize it all to zero and fill it out in drwav_uninit() using a backwards seek. + */ + if (pWav->isSequentialWrite) { + initialDataChunkSize = (totalSampleCount * pWav->fmt.bitsPerSample) / 8; + + /* + The RIFF container has a limit on the number of samples. drwav is not allowing this. There's no practical limits for Wave64 + so for the sake of simplicity I'm not doing any validation for that. + */ + if (pFormat->container == drwav_container_riff) { + if (initialDataChunkSize > (0xFFFFFFFFUL - 36)) { + return DRWAV_FALSE; /* Not enough room to store every sample. */ } } - } else { - return DRWAV_FALSE; // Unknown or unsupported container. } + pWav->dataChunkDataSizeTargetWrite = initialDataChunkSize; - if (pWav->container == drwav_container_riff) { - // RIFF/WAVE - unsigned char chunkSizeBytes[4]; - if (onRead(pUserData, chunkSizeBytes, sizeof(chunkSizeBytes)) != sizeof(chunkSizeBytes)) { - return DRWAV_FALSE; - } - unsigned int chunkSize = drwav__bytes_to_u32(chunkSizeBytes); - if (chunkSize < 36) { - return DRWAV_FALSE; // Chunk size should always be at least 36 bytes. - } + /* "RIFF" chunk. */ + if (pFormat->container == drwav_container_riff) { + drwav_uint32 chunkSizeRIFF = 36 + (drwav_uint32)initialDataChunkSize; /* +36 = "RIFF"+[RIFF Chunk Size]+"WAVE" + [sizeof "fmt " chunk] */ + runningPos += pWav->onWrite(pWav->pUserData, "RIFF", 4); + runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeRIFF, 4); + runningPos += pWav->onWrite(pWav->pUserData, "WAVE", 4); + } else { + drwav_uint64 chunkSizeRIFF = 80 + 24 + initialDataChunkSize; /* +24 because W64 includes the size of the GUID and size fields. */ + runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_RIFF, 16); + runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeRIFF, 8); + runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_WAVE, 16); + } - unsigned char wave[4]; - if (onRead(pUserData, wave, sizeof(wave)) != sizeof(wave)) { - return DRWAV_FALSE; - } + /* "fmt " chunk. */ + if (pFormat->container == drwav_container_riff) { + chunkSizeFMT = 16; + runningPos += pWav->onWrite(pWav->pUserData, "fmt ", 4); + runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeFMT, 4); + } else { + chunkSizeFMT = 40; + runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_FMT, 16); + runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeFMT, 8); + } - if (!drwav__fourcc_equal(wave, "WAVE")) { - return DRWAV_FALSE; // Expecting "WAVE". - } + runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.formatTag, 2); + runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.channels, 2); + runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.sampleRate, 4); + runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.avgBytesPerSec, 4); + runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.blockAlign, 2); + runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.bitsPerSample, 2); + + pWav->dataChunkDataPos = runningPos; - pWav->dataChunkDataPos = 4 + sizeof(chunkSizeBytes) + sizeof(wave); + /* "data" chunk. */ + if (pFormat->container == drwav_container_riff) { + drwav_uint32 chunkSizeDATA = (drwav_uint32)initialDataChunkSize; + runningPos += pWav->onWrite(pWav->pUserData, "data", 4); + runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeDATA, 4); } else { - // W64 - unsigned char chunkSize[8]; - if (onRead(pUserData, chunkSize, sizeof(chunkSize)) != sizeof(chunkSize)) { - return DRWAV_FALSE; - } + drwav_uint64 chunkSizeDATA = 24 + initialDataChunkSize; /* +24 because W64 includes the size of the GUID and size fields. */ + runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_DATA, 16); + runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeDATA, 8); + } - if (drwav__bytes_to_u64(chunkSize) < 80) { - return DRWAV_FALSE; - } - drwav_uint8 wave[16]; - if (onRead(pUserData, wave, sizeof(wave)) != sizeof(wave)) { + /* Simple validation. */ + if (pFormat->container == drwav_container_riff) { + if (runningPos != 20 + chunkSizeFMT + 8) { return DRWAV_FALSE; } - - if (!drwav__guid_equal(wave, drwavGUID_W64_WAVE)) { + } else { + if (runningPos != 40 + chunkSizeFMT + 24) { return DRWAV_FALSE; } + } + + + /* Set some properties for the client's convenience. */ + pWav->container = pFormat->container; + pWav->channels = (drwav_uint16)pFormat->channels; + pWav->sampleRate = pFormat->sampleRate; + pWav->bitsPerSample = (drwav_uint16)pFormat->bitsPerSample; + pWav->translatedFormatTag = (drwav_uint16)pFormat->format; + + return DRWAV_TRUE; +} + - pWav->dataChunkDataPos = 16 + sizeof(chunkSize) + sizeof(wave); +drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + if (!drwav_preinit_write(pWav, pFormat, DRWAV_FALSE, onWrite, onSeek, pUserData, pAllocationCallbacks)) { + return DRWAV_FALSE; } + return drwav_init_write__internal(pWav, pFormat, 0); /* DRWAV_FALSE = Not Sequential */ +} - // The next bytes should be the "fmt " chunk. - drwav_fmt fmt; - if (!drwav__read_fmt(onRead, onSeek, pUserData, pWav->container, &pWav->dataChunkDataPos, &fmt)) { - return DRWAV_FALSE; // Failed to read the "fmt " chunk. +drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + if (!drwav_preinit_write(pWav, pFormat, DRWAV_TRUE, onWrite, NULL, pUserData, pAllocationCallbacks)) { + return DRWAV_FALSE; } - // Basic validation. - if (fmt.sampleRate == 0 || fmt.channels == 0 || fmt.bitsPerSample == 0 || fmt.blockAlign == 0) { - return DRWAV_FALSE; // Invalid channel count. Probably an invalid WAV file. + return drwav_init_write__internal(pWav, pFormat, totalSampleCount); /* DRWAV_TRUE = Sequential */ +} + +drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + if (pFormat == NULL) { + return DRWAV_FALSE; } + return drwav_init_write_sequential(pWav, pFormat, totalPCMFrameCount*pFormat->channels, onWrite, pUserData, pAllocationCallbacks); +} + +drwav_uint64 drwav_target_write_size_bytes(drwav_data_format const *format, drwav_uint64 totalSampleCount) +{ + drwav_uint64 targetDataSizeBytes = (totalSampleCount * format->channels * format->bitsPerSample/8); + drwav_uint64 riffChunkSizeBytes; + drwav_uint64 fileSizeBytes; - // Translate the internal format. - unsigned short translatedFormatTag = fmt.formatTag; - if (translatedFormatTag == DR_WAVE_FORMAT_EXTENSIBLE) { - translatedFormatTag = drwav__bytes_to_u16(fmt.subFormat + 0); + if (format->container == drwav_container_riff) { + riffChunkSizeBytes = drwav__riff_chunk_size_riff(targetDataSizeBytes); + fileSizeBytes = (8 + riffChunkSizeBytes); /* +8 because WAV doesn't include the size of the ChunkID and ChunkSize fields. */ + } else { + riffChunkSizeBytes = drwav__riff_chunk_size_w64(targetDataSizeBytes); + fileSizeBytes = riffChunkSizeBytes; } + return fileSizeBytes; +} - drwav_uint64 sampleCountFromFactChunk = 0; - // The next chunk we care about is the "data" chunk. This is not necessarily the next chunk so we'll need to loop. - drwav_uint64 dataSize; - for (;;) +#ifndef DR_WAV_NO_STDIO +FILE* drwav_fopen(const char* filePath, const char* openMode) +{ + FILE* pFile; +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (fopen_s(&pFile, filePath, openMode) != 0) { + return NULL; + } +#else + pFile = fopen(filePath, openMode); + if (pFile == NULL) { + return NULL; + } +#endif + + return pFile; +} + +FILE* drwav_wfopen(const wchar_t* pFilePath, const wchar_t* pOpenMode, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + FILE* pFile; + +#if defined(_WIN32) + (void)pAllocationCallbacks; + #if defined(_MSC_VER) && _MSC_VER >= 1400 + if (_wfopen_s(&pFile, pFilePath, pOpenMode) != 0) { + return NULL; + } + #else + pFile = _wfopen(pFilePath, pOpenMode); + if (pFile == NULL) { + return NULL; + } + #endif +#else + /* + Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can + think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for + maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility. + */ { - drwav__chunk_header header; - if (!drwav__read_chunk_header(onRead, pUserData, pWav->container, &pWav->dataChunkDataPos, &header)) { - return DRWAV_FALSE; + mbstate_t mbs; + size_t lenMB; + const wchar_t* pFilePathTemp = pFilePath; + char* pFilePathMB = NULL; + const wchar_t* pOpenModeMBTemp = pOpenMode; + char pOpenModeMB[16]; + drwav_allocation_callbacks allocationCallbacks; + + allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks); + + /* Get the length first. */ + DRWAV_ZERO_MEMORY(&mbs, sizeof(mbs)); + lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs); + if (lenMB == (size_t)-1) { + return NULL; } - dataSize = header.sizeInBytes; - if (pWav->container == drwav_container_riff) { - if (drwav__fourcc_equal(header.id.fourcc, "data")) { - break; - } - } else { - if (drwav__guid_equal(header.id.guid, drwavGUID_W64_DATA)) { - break; - } + pFilePathMB = (char*)drwav__malloc_from_callbacks(lenMB + 1, &allocationCallbacks); + if (pFilePathMB == NULL) { + return NULL; } - // Optional. Get the total sample count from the FACT chunk. This is useful for compressed formats. - if (pWav->container == drwav_container_riff) { - if (drwav__fourcc_equal(header.id.fourcc, "fact")) { - drwav_uint32 sampleCount; - if (onRead(pUserData, &sampleCount, 4) != 4) { - return DRWAV_FALSE; - } - pWav->dataChunkDataPos += 4; - dataSize -= 4; + pFilePathTemp = pFilePath; + DRWAV_ZERO_MEMORY(&mbs, sizeof(mbs)); + wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs); - // The sample count in the "fact" chunk is either unreliable, or I'm not understanding it properly. For now I am only enabling this - // for Microsoft ADPCM formats. - if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) { - sampleCountFromFactChunk = sampleCount; - } else { - sampleCountFromFactChunk = 0; - } - } - } else { - if (drwav__guid_equal(header.id.guid, drwavGUID_W64_FACT)) { - if (onRead(pUserData, &sampleCountFromFactChunk, 8) != 8) { - return DRWAV_FALSE; - } - pWav->dataChunkDataPos += 8; - dataSize -= 8; - } - } + DRWAV_ZERO_MEMORY(&mbs, sizeof(mbs)); + wcsrtombs(pOpenModeMB, &pOpenModeMBTemp, sizeof(pOpenModeMB), &mbs); - // If we get here it means we didn't find the "data" chunk. Seek past it. + pFile = fopen(pFilePathMB, pOpenModeMB); - // Make sure we seek past the padding. - dataSize += header.paddingSize; - drwav__seek_forward(onSeek, dataSize, pUserData); - pWav->dataChunkDataPos += dataSize; + drwav__free_from_callbacks(pFilePathMB, &allocationCallbacks); } +#endif - // At this point we should be sitting on the first byte of the raw audio data. + return pFile; +} - pWav->onRead = onRead; - pWav->onSeek = onSeek; - pWav->pUserData = pUserData; - pWav->fmt = fmt; - pWav->sampleRate = fmt.sampleRate; - pWav->channels = fmt.channels; - pWav->bitsPerSample = fmt.bitsPerSample; - pWav->bytesPerSample = fmt.blockAlign / fmt.channels; - pWav->bytesRemaining = dataSize; - pWav->translatedFormatTag = translatedFormatTag; - pWav->dataChunkDataSize = dataSize; - // The bytes per sample should never be 0 at this point. This would indicate an invalid WAV file. - if (pWav->bytesPerSample == 0) { +static size_t drwav__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead) +{ + return fread(pBufferOut, 1, bytesToRead, (FILE*)pUserData); +} + +static size_t drwav__on_write_stdio(void* pUserData, const void* pData, size_t bytesToWrite) +{ + return fwrite(pData, 1, bytesToWrite, (FILE*)pUserData); +} + +static drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, drwav_seek_origin origin) +{ + return fseek((FILE*)pUserData, offset, (origin == drwav_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0; +} + +drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + return drwav_init_file_ex(pWav, filename, NULL, NULL, 0, pAllocationCallbacks); +} + + +drwav_bool32 drwav_init_file__internal_FILE(drwav* pWav, FILE* pFile, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + if (!drwav_preinit(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile, pAllocationCallbacks)) { + fclose(pFile); return DRWAV_FALSE; } - if (sampleCountFromFactChunk != 0) { - pWav->totalSampleCount = sampleCountFromFactChunk * fmt.channels; - } else { - pWav->totalSampleCount = dataSize / pWav->bytesPerSample; + return drwav_init__internal(pWav, onChunk, pChunkUserData, flags); +} - if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) { - drwav_uint64 blockCount = dataSize / fmt.blockAlign; - pWav->totalSampleCount = (blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2; // x2 because two samples per byte. - } - if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) { - drwav_uint64 blockCount = dataSize / fmt.blockAlign; - pWav->totalSampleCount = ((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels); - } +drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + FILE* pFile = drwav_fopen(filename, "rb"); + if (pFile == NULL) { + return DRWAV_FALSE; + } + + /* This takes ownership of the FILE* object. */ + return drwav_init_file__internal_FILE(pWav, pFile, onChunk, pChunkUserData, flags, pAllocationCallbacks); +} + +drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* filename, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + return drwav_init_file_ex_w(pWav, filename, NULL, NULL, 0, pAllocationCallbacks); +} + +drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + FILE* pFile = drwav_wfopen(filename, L"rb", pAllocationCallbacks); + if (pFile == NULL) { + return DRWAV_FALSE; + } + + /* This takes ownership of the FILE* object. */ + return drwav_init_file__internal_FILE(pWav, pFile, onChunk, pChunkUserData, flags, pAllocationCallbacks); +} + + +drwav_bool32 drwav_init_file_write__internal_FILE(drwav* pWav, FILE* pFile, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + if (!drwav_preinit_write(pWav, pFormat, isSequential, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile, pAllocationCallbacks)) { + fclose(pFile); + return DRWAV_FALSE; } - // The way we calculate the bytes per sample does not make sense for compressed formats so we just set it to 0. - if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) { - pWav->bytesPerSample = 0; - } + return drwav_init_write__internal(pWav, pFormat, totalSampleCount); +} - // Some formats only support a certain number of channels. - if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM || pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) { - if (pWav->channels > 2) { - return DRWAV_FALSE; - } +drwav_bool32 drwav_init_file_write__internal(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + FILE* pFile = drwav_fopen(filename, "wb"); + if (pFile == NULL) { + return DRWAV_FALSE; } -#ifdef DR_WAV_LIBSNDFILE_COMPAT - // I use libsndfile as a benchmark for testing, however in the version I'm using (from the Windows installer on the libsndfile website), - // it appears the total sample count libsndfile uses for MS-ADPCM is incorrect. It would seem they are computing the total sample count - // from the number of blocks, however this results in the inclusion of extra silent samples at the end of the last block. The correct - // way to know the total sample count is to inspect the "fact" chunk, which should always be present for compressed formats, and should - // always include the sample count. This little block of code below is only used to emulate the libsndfile logic so I can properly run my - // correctness tests against libsndfile, and is disabled by default. - if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) { - drwav_uint64 blockCount = dataSize / fmt.blockAlign; - pWav->totalSampleCount = (blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2; // x2 because two samples per byte. - } - if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) { - drwav_uint64 blockCount = dataSize / fmt.blockAlign; - pWav->totalSampleCount = ((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels); + /* This takes ownership of the FILE* object. */ + return drwav_init_file_write__internal_FILE(pWav, pFile, pFormat, totalSampleCount, isSequential, pAllocationCallbacks); +} + +drwav_bool32 drwav_init_file_write_w__internal(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + FILE* pFile = drwav_wfopen(filename, L"wb", pAllocationCallbacks); + if (pFile == NULL) { + return DRWAV_FALSE; } -#endif - return DRWAV_TRUE; + /* This takes ownership of the FILE* object. */ + return drwav_init_file_write__internal_FILE(pWav, pFile, pFormat, totalSampleCount, isSequential, pAllocationCallbacks); } +drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + return drwav_init_file_write__internal(pWav, filename, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks); +} -drwav_uint32 drwav_riff_chunk_size_riff(drwav_uint64 dataChunkSize) +drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks) { - if (dataChunkSize <= (0xFFFFFFFF - 36)) { - return 36 + (drwav_uint32)dataChunkSize; - } else { - return 0xFFFFFFFF; - } + return drwav_init_file_write__internal(pWav, filename, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks); } -drwav_uint32 drwav_data_chunk_size_riff(drwav_uint64 dataChunkSize) +drwav_bool32 drwav_init_file_write_sequential_pcm_frames(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks) { - if (dataChunkSize <= 0xFFFFFFFF) { - return (drwav_uint32)dataChunkSize; - } else { - return 0xFFFFFFFF; + if (pFormat == NULL) { + return DRWAV_FALSE; } + + return drwav_init_file_write_sequential(pWav, filename, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks); } -drwav_uint64 drwav_riff_chunk_size_w64(drwav_uint64 dataChunkSize) +drwav_bool32 drwav_init_file_write_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks) { - return 80 + 24 + dataChunkSize; // +24 because W64 includes the size of the GUID and size fields. + return drwav_init_file_write_w__internal(pWav, filename, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks); } -drwav_uint64 drwav_data_chunk_size_w64(drwav_uint64 dataChunkSize) +drwav_bool32 drwav_init_file_write_sequential_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks) { - return 24 + dataChunkSize; // +24 because W64 includes the size of the GUID and size fields. + return drwav_init_file_write_w__internal(pWav, filename, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks); } - -drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData) +drwav_bool32 drwav_init_file_write_sequential_pcm_frames_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks) { - if (pWav == NULL) { + if (pFormat == NULL) { return DRWAV_FALSE; } - if (onWrite == NULL) { - return DRWAV_FALSE; - } + return drwav_init_file_write_sequential_w(pWav, filename, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks); +} +#endif /* DR_WAV_NO_STDIO */ - if (!isSequential && onSeek == NULL) { - return DRWAV_FALSE; // <-- onSeek is required when in non-sequential mode. - } +static size_t drwav__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead) +{ + drwav* pWav = (drwav*)pUserData; + size_t bytesRemaining; - // Not currently supporting compressed formats. Will need to add support for the "fact" chunk before we enable this. - if (pFormat->format == DR_WAVE_FORMAT_EXTENSIBLE) { - return DRWAV_FALSE; + DRWAV_ASSERT(pWav != NULL); + DRWAV_ASSERT(pWav->memoryStream.dataSize >= pWav->memoryStream.currentReadPos); + + bytesRemaining = pWav->memoryStream.dataSize - pWav->memoryStream.currentReadPos; + if (bytesToRead > bytesRemaining) { + bytesToRead = bytesRemaining; } - if (pFormat->format == DR_WAVE_FORMAT_ADPCM || pFormat->format == DR_WAVE_FORMAT_DVI_ADPCM) { - return DRWAV_FALSE; + + if (bytesToRead > 0) { + DRWAV_COPY_MEMORY(pBufferOut, pWav->memoryStream.data + pWav->memoryStream.currentReadPos, bytesToRead); + pWav->memoryStream.currentReadPos += bytesToRead; } + return bytesToRead; +} - drwav_zero_memory(pWav, sizeof(*pWav)); - pWav->onWrite = onWrite; - pWav->onSeek = onSeek; - pWav->pUserData = pUserData; - pWav->fmt.formatTag = (drwav_uint16)pFormat->format; - pWav->fmt.channels = (drwav_uint16)pFormat->channels; - pWav->fmt.sampleRate = pFormat->sampleRate; - pWav->fmt.avgBytesPerSec = (drwav_uint32)((pFormat->bitsPerSample * pFormat->sampleRate * pFormat->channels) / 8); - pWav->fmt.blockAlign = (drwav_uint16)((pFormat->channels * pFormat->bitsPerSample) / 8); - pWav->fmt.bitsPerSample = (drwav_uint16)pFormat->bitsPerSample; - pWav->fmt.extendedSize = 0; - pWav->isSequentialWrite = isSequential; +static drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, drwav_seek_origin origin) +{ + drwav* pWav = (drwav*)pUserData; + DRWAV_ASSERT(pWav != NULL); + if (origin == drwav_seek_origin_current) { + if (offset > 0) { + if (pWav->memoryStream.currentReadPos + offset > pWav->memoryStream.dataSize) { + return DRWAV_FALSE; /* Trying to seek too far forward. */ + } + } else { + if (pWav->memoryStream.currentReadPos < (size_t)-offset) { + return DRWAV_FALSE; /* Trying to seek too far backwards. */ + } + } - size_t runningPos = 0; + /* This will never underflow thanks to the clamps above. */ + pWav->memoryStream.currentReadPos += offset; + } else { + if ((drwav_uint32)offset <= pWav->memoryStream.dataSize) { + pWav->memoryStream.currentReadPos = offset; + } else { + return DRWAV_FALSE; /* Trying to seek too far forward. */ + } + } + + return DRWAV_TRUE; +} - // The initial values for the "RIFF" and "data" chunks depends on whether or not we are initializing in sequential mode or not. In - // sequential mode we set this to its final values straight away since they can be calculated from the total sample count. In non- - // sequential mode we initialize it all to zero and fill it out in drwav_uninit() using a backwards seek. - drwav_uint64 initialDataChunkSize = 0; - if (isSequential) { - initialDataChunkSize = (totalSampleCount * pWav->fmt.bitsPerSample) / 8; +static size_t drwav__on_write_memory(void* pUserData, const void* pDataIn, size_t bytesToWrite) +{ + drwav* pWav = (drwav*)pUserData; + size_t bytesRemaining; - // The RIFF container has a limit on the number of samples. drwav is not allowing this. There's no practical limits for Wave64 - // so for the sake of simplicity I'm not doing any validation for that. - if (pFormat->container == drwav_container_riff) { - if (initialDataChunkSize > (0xFFFFFFFF - 36)) { - return DRWAV_FALSE; // Not enough room to store every sample. - } + DRWAV_ASSERT(pWav != NULL); + DRWAV_ASSERT(pWav->memoryStreamWrite.dataCapacity >= pWav->memoryStreamWrite.currentWritePos); + + bytesRemaining = pWav->memoryStreamWrite.dataCapacity - pWav->memoryStreamWrite.currentWritePos; + if (bytesRemaining < bytesToWrite) { + /* Need to reallocate. */ + void* pNewData; + size_t newDataCapacity = (pWav->memoryStreamWrite.dataCapacity == 0) ? 256 : pWav->memoryStreamWrite.dataCapacity * 2; + + /* If doubling wasn't enough, just make it the minimum required size to write the data. */ + if ((newDataCapacity - pWav->memoryStreamWrite.currentWritePos) < bytesToWrite) { + newDataCapacity = pWav->memoryStreamWrite.currentWritePos + bytesToWrite; } - } - pWav->dataChunkDataSizeTargetWrite = initialDataChunkSize; + pNewData = drwav__realloc_from_callbacks(*pWav->memoryStreamWrite.ppData, newDataCapacity, pWav->memoryStreamWrite.dataCapacity, &pWav->allocationCallbacks); + if (pNewData == NULL) { + return 0; + } + + *pWav->memoryStreamWrite.ppData = pNewData; + pWav->memoryStreamWrite.dataCapacity = newDataCapacity; + } + DRWAV_COPY_MEMORY(((drwav_uint8*)(*pWav->memoryStreamWrite.ppData)) + pWav->memoryStreamWrite.currentWritePos, pDataIn, bytesToWrite); - // "RIFF" chunk. - if (pFormat->container == drwav_container_riff) { - drwav_uint32 chunkSizeRIFF = 36 + (drwav_uint32)initialDataChunkSize; // +36 = "RIFF"+[RIFF Chunk Size]+"WAVE" + [sizeof "fmt " chunk] - runningPos += pWav->onWrite(pUserData, "RIFF", 4); - runningPos += pWav->onWrite(pUserData, &chunkSizeRIFF, 4); - runningPos += pWav->onWrite(pUserData, "WAVE", 4); - } else { - drwav_uint64 chunkSizeRIFF = 80 + 24 + initialDataChunkSize; // +24 because W64 includes the size of the GUID and size fields. - runningPos += pWav->onWrite(pUserData, drwavGUID_W64_RIFF, 16); - runningPos += pWav->onWrite(pUserData, &chunkSizeRIFF, 8); - runningPos += pWav->onWrite(pUserData, drwavGUID_W64_WAVE, 16); + pWav->memoryStreamWrite.currentWritePos += bytesToWrite; + if (pWav->memoryStreamWrite.dataSize < pWav->memoryStreamWrite.currentWritePos) { + pWav->memoryStreamWrite.dataSize = pWav->memoryStreamWrite.currentWritePos; } - // "fmt " chunk. - drwav_uint64 chunkSizeFMT; - if (pFormat->container == drwav_container_riff) { - chunkSizeFMT = 16; - runningPos += pWav->onWrite(pUserData, "fmt ", 4); - runningPos += pWav->onWrite(pUserData, &chunkSizeFMT, 4); + *pWav->memoryStreamWrite.pDataSize = pWav->memoryStreamWrite.dataSize; + + return bytesToWrite; +} + +static drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drwav_seek_origin origin) +{ + drwav* pWav = (drwav*)pUserData; + DRWAV_ASSERT(pWav != NULL); + + if (origin == drwav_seek_origin_current) { + if (offset > 0) { + if (pWav->memoryStreamWrite.currentWritePos + offset > pWav->memoryStreamWrite.dataSize) { + offset = (int)(pWav->memoryStreamWrite.dataSize - pWav->memoryStreamWrite.currentWritePos); /* Trying to seek too far forward. */ + } + } else { + if (pWav->memoryStreamWrite.currentWritePos < (size_t)-offset) { + offset = -(int)pWav->memoryStreamWrite.currentWritePos; /* Trying to seek too far backwards. */ + } + } + + /* This will never underflow thanks to the clamps above. */ + pWav->memoryStreamWrite.currentWritePos += offset; } else { - chunkSizeFMT = 40; - runningPos += pWav->onWrite(pUserData, drwavGUID_W64_FMT, 16); - runningPos += pWav->onWrite(pUserData, &chunkSizeFMT, 8); + if ((drwav_uint32)offset <= pWav->memoryStreamWrite.dataSize) { + pWav->memoryStreamWrite.currentWritePos = offset; + } else { + pWav->memoryStreamWrite.currentWritePos = pWav->memoryStreamWrite.dataSize; /* Trying to seek too far forward. */ + } } + + return DRWAV_TRUE; +} - runningPos += pWav->onWrite(pUserData, &pWav->fmt.formatTag, 2); - runningPos += pWav->onWrite(pUserData, &pWav->fmt.channels, 2); - runningPos += pWav->onWrite(pUserData, &pWav->fmt.sampleRate, 4); - runningPos += pWav->onWrite(pUserData, &pWav->fmt.avgBytesPerSec, 4); - runningPos += pWav->onWrite(pUserData, &pWav->fmt.blockAlign, 2); - runningPos += pWav->onWrite(pUserData, &pWav->fmt.bitsPerSample, 2); +drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + return drwav_init_memory_ex(pWav, data, dataSize, NULL, NULL, 0, pAllocationCallbacks); +} - pWav->dataChunkDataPos = runningPos; +drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + if (data == NULL || dataSize == 0) { + return DRWAV_FALSE; + } - // "data" chunk. - if (pFormat->container == drwav_container_riff) { - drwav_uint32 chunkSizeDATA = (drwav_uint32)initialDataChunkSize; - runningPos += pWav->onWrite(pUserData, "data", 4); - runningPos += pWav->onWrite(pUserData, &chunkSizeDATA, 4); - } else { - drwav_uint64 chunkSizeDATA = 24 + initialDataChunkSize; // +24 because W64 includes the size of the GUID and size fields. - runningPos += pWav->onWrite(pUserData, drwavGUID_W64_DATA, 16); - runningPos += pWav->onWrite(pUserData, &chunkSizeDATA, 8); + if (!drwav_preinit(pWav, drwav__on_read_memory, drwav__on_seek_memory, pWav, pAllocationCallbacks)) { + return DRWAV_FALSE; } + pWav->memoryStream.data = (const unsigned char*)data; + pWav->memoryStream.dataSize = dataSize; + pWav->memoryStream.currentReadPos = 0; - // Simple validation. - if (pFormat->container == drwav_container_riff) { - if (runningPos != 20 + chunkSizeFMT + 8) { - return DRWAV_FALSE; - } - } else { - if (runningPos != 40 + chunkSizeFMT + 24) { - return DRWAV_FALSE; - } + return drwav_init__internal(pWav, onChunk, pChunkUserData, flags); +} + + +drwav_bool32 drwav_init_memory_write__internal(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + if (ppData == NULL || pDataSize == NULL) { + return DRWAV_FALSE; } + *ppData = NULL; /* Important because we're using realloc()! */ + *pDataSize = 0; + if (!drwav_preinit_write(pWav, pFormat, isSequential, drwav__on_write_memory, drwav__on_seek_memory_write, pWav, pAllocationCallbacks)) { + return DRWAV_FALSE; + } - // Set some properties for the client's convenience. - pWav->container = pFormat->container; - pWav->channels = (drwav_uint16)pFormat->channels; - pWav->sampleRate = pFormat->sampleRate; - pWav->bitsPerSample = (drwav_uint16)pFormat->bitsPerSample; - pWav->bytesPerSample = (drwav_uint16)(pFormat->bitsPerSample >> 3); - pWav->translatedFormatTag = (drwav_uint16)pFormat->format; + pWav->memoryStreamWrite.ppData = ppData; + pWav->memoryStreamWrite.pDataSize = pDataSize; + pWav->memoryStreamWrite.dataSize = 0; + pWav->memoryStreamWrite.dataCapacity = 0; + pWav->memoryStreamWrite.currentWritePos = 0; - return DRWAV_TRUE; + return drwav_init_write__internal(pWav, pFormat, totalSampleCount); } +drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks); +} -drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData) +drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks) { - return drwav_init_write__internal(pWav, pFormat, 0, DRWAV_FALSE, onWrite, onSeek, pUserData); // DRWAV_FALSE = Not Sequential + return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks); } -drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData) +drwav_bool32 drwav_init_memory_write_sequential_pcm_frames(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks) { - return drwav_init_write__internal(pWav, pFormat, totalSampleCount, DRWAV_TRUE, onWrite, NULL, pUserData); // DRWAV_TRUE = Sequential + if (pFormat == NULL) { + return DRWAV_FALSE; + } + + return drwav_init_memory_write_sequential(pWav, ppData, pDataSize, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks); } -void drwav_uninit(drwav* pWav) + + +drwav_result drwav_uninit(drwav* pWav) { + drwav_result result = DRWAV_SUCCESS; + if (pWav == NULL) { - return; + return DRWAV_INVALID_ARGS; } - // If the drwav object was opened in write mode we'll need to finalize a few things: - // - Make sure the "data" chunk is aligned to 16-bits for RIFF containers, or 64 bits for W64 containers. - // - Set the size of the "data" chunk. + /* + If the drwav object was opened in write mode we'll need to finalize a few things: + - Make sure the "data" chunk is aligned to 16-bits for RIFF containers, or 64 bits for W64 containers. + - Set the size of the "data" chunk. + */ if (pWav->onWrite != NULL) { - // Validation for sequential mode. - if (pWav->isSequentialWrite) { - drwav_assert(pWav->dataChunkDataSize == pWav->dataChunkDataSizeTargetWrite); - } - - // Padding. Do not adjust pWav->dataChunkDataSize - this should not include the padding. drwav_uint32 paddingSize = 0; + + /* Padding. Do not adjust pWav->dataChunkDataSize - this should not include the padding. */ if (pWav->container == drwav_container_riff) { - paddingSize = (drwav_uint32)(pWav->dataChunkDataSize % 2); + paddingSize = drwav__chunk_padding_size_riff(pWav->dataChunkDataSize); } else { - paddingSize = (drwav_uint32)(pWav->dataChunkDataSize % 8); + paddingSize = drwav__chunk_padding_size_w64(pWav->dataChunkDataSize); } - + if (paddingSize > 0) { drwav_uint64 paddingData = 0; pWav->onWrite(pWav->pUserData, &paddingData, paddingSize); } - - // Chunk sizes. When using sequential mode, these will have been filled in at initialization time. We only need - // to do this when using non-sequential mode. + /* + Chunk sizes. When using sequential mode, these will have been filled in at initialization time. We only need + to do this when using non-sequential mode. + */ if (pWav->onSeek && !pWav->isSequentialWrite) { if (pWav->container == drwav_container_riff) { - // The "RIFF" chunk size. + /* The "RIFF" chunk size. */ if (pWav->onSeek(pWav->pUserData, 4, drwav_seek_origin_start)) { - drwav_uint32 riffChunkSize = drwav_riff_chunk_size_riff(pWav->dataChunkDataSize); + drwav_uint32 riffChunkSize = drwav__riff_chunk_size_riff(pWav->dataChunkDataSize); pWav->onWrite(pWav->pUserData, &riffChunkSize, 4); } - // the "data" chunk size. + /* the "data" chunk size. */ if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 4, drwav_seek_origin_start)) { - drwav_uint32 dataChunkSize = drwav_data_chunk_size_riff(pWav->dataChunkDataSize); + drwav_uint32 dataChunkSize = drwav__data_chunk_size_riff(pWav->dataChunkDataSize); pWav->onWrite(pWav->pUserData, &dataChunkSize, 4); } } else { - // The "RIFF" chunk size. + /* The "RIFF" chunk size. */ if (pWav->onSeek(pWav->pUserData, 16, drwav_seek_origin_start)) { - drwav_uint64 riffChunkSize = drwav_riff_chunk_size_w64(pWav->dataChunkDataSize); + drwav_uint64 riffChunkSize = drwav__riff_chunk_size_w64(pWav->dataChunkDataSize); pWav->onWrite(pWav->pUserData, &riffChunkSize, 8); } - // The "data" chunk size. + /* The "data" chunk size. */ if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 16, drwav_seek_origin_start)) { - drwav_uint64 dataChunkSize = drwav_data_chunk_size_w64(pWav->dataChunkDataSize); + drwav_uint64 dataChunkSize = drwav__data_chunk_size_w64(pWav->dataChunkDataSize); pWav->onWrite(pWav->pUserData, &dataChunkSize, 8); } } } + + /* Validation for sequential mode. */ + if (pWav->isSequentialWrite) { + if (pWav->dataChunkDataSize != pWav->dataChunkDataSizeTargetWrite) { + result = DRWAV_INVALID_FILE; + } + } } #ifndef DR_WAV_NO_STDIO - // If we opened the file with drwav_open_file() we will want to close the file handle. We can know whether or not drwav_open_file() - // was used by looking at the onRead and onSeek callbacks. + /* + If we opened the file with drwav_open_file() we will want to close the file handle. We can know whether or not drwav_open_file() + was used by looking at the onRead and onSeek callbacks. + */ if (pWav->onRead == drwav__on_read_stdio || pWav->onWrite == drwav__on_write_stdio) { fclose((FILE*)pWav->pUserData); } #endif -} - - -drwav* drwav_open(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData) -{ - drwav* pWav = (drwav*)DRWAV_MALLOC(sizeof(*pWav)); - if (pWav == NULL) { - return NULL; - } - - if (!drwav_init(pWav, onRead, onSeek, pUserData)) { - DRWAV_FREE(pWav); - return NULL; - } - - return pWav; -} - - -drwav* drwav_open_write__internal(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData) -{ - drwav* pWav = (drwav*)DRWAV_MALLOC(sizeof(*pWav)); - if (pWav == NULL) { - return NULL; - } - - if (!drwav_init_write__internal(pWav, pFormat, totalSampleCount, isSequential, onWrite, onSeek, pUserData)) { - DRWAV_FREE(pWav); - return NULL; - } - - return pWav; -} -drwav* drwav_open_write(const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData) -{ - return drwav_open_write__internal(pFormat, 0, DRWAV_FALSE, onWrite, onSeek, pUserData); -} - -drwav* drwav_open_write_sequential(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData) -{ - return drwav_open_write__internal(pFormat, totalSampleCount, DRWAV_TRUE, onWrite, NULL, pUserData); + return result; } -void drwav_close(drwav* pWav) -{ - drwav_uninit(pWav); - DRWAV_FREE(pWav); -} size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut) { + size_t bytesRead; + if (pWav == NULL || bytesToRead == 0 || pBufferOut == NULL) { return 0; } @@ -1878,36 +2851,63 @@ size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut) bytesToRead = (size_t)pWav->bytesRemaining; } - size_t bytesRead = pWav->onRead(pWav->pUserData, pBufferOut, bytesToRead); + bytesRead = pWav->onRead(pWav->pUserData, pBufferOut, bytesToRead); pWav->bytesRemaining -= bytesRead; return bytesRead; } -drwav_uint64 drwav_read(drwav* pWav, drwav_uint64 samplesToRead, void* pBufferOut) + + +drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut) { - if (pWav == NULL || samplesToRead == 0 || pBufferOut == NULL) { + drwav_uint32 bytesPerFrame; + + if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) { return 0; } - // Cannot use this function for compressed formats. + /* Cannot use this function for compressed formats. */ if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) { return 0; } - // Don't try to read more samples than can potentially fit in the output buffer. - if (samplesToRead * pWav->bytesPerSample > DRWAV_SIZE_MAX) { - samplesToRead = DRWAV_SIZE_MAX / pWav->bytesPerSample; + bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav); + if (bytesPerFrame == 0) { + return 0; + } + + /* Don't try to read more samples than can potentially fit in the output buffer. */ + if (framesToRead * bytesPerFrame > DRWAV_SIZE_MAX) { + framesToRead = DRWAV_SIZE_MAX / bytesPerFrame; } - size_t bytesRead = drwav_read_raw(pWav, (size_t)(samplesToRead * pWav->bytesPerSample), pBufferOut); - return bytesRead / pWav->bytesPerSample; + return drwav_read_raw(pWav, (size_t)(framesToRead * bytesPerFrame), pBufferOut) / bytesPerFrame; +} + +drwav_uint64 drwav_read_pcm_frames_be(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut) +{ + drwav_uint64 framesRead = drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut); + drwav__bswap_samples(pBufferOut, framesRead*pWav->channels, drwav_get_bytes_per_pcm_frame(pWav)/pWav->channels, pWav->translatedFormatTag); + + return framesRead; +} + +drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut) +{ + if (drwav__is_little_endian()) { + return drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut); + } else { + return drwav_read_pcm_frames_be(pWav, framesToRead, pBufferOut); + } } -drwav_bool32 drwav_seek_to_first_sample(drwav* pWav) + + +drwav_bool32 drwav_seek_to_first_pcm_frame(drwav* pWav) { if (pWav->onWrite != NULL) { - return DRWAV_FALSE; // No seeking in write mode. + return DRWAV_FALSE; /* No seeking in write mode. */ } if (!pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos, drwav_seek_origin_start)) { @@ -1915,89 +2915,96 @@ drwav_bool32 drwav_seek_to_first_sample(drwav* pWav) } if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) { - pWav->compressed.iCurrentSample = 0; + pWav->compressed.iCurrentPCMFrame = 0; } - + pWav->bytesRemaining = pWav->dataChunkDataSize; return DRWAV_TRUE; } -drwav_bool32 drwav_seek_to_sample(drwav* pWav, drwav_uint64 sample) +drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex) { - // Seeking should be compatible with wave files > 2GB. + /* Seeking should be compatible with wave files > 2GB. */ if (pWav->onWrite != NULL) { - return DRWAV_FALSE; // No seeking in write mode. + return DRWAV_FALSE; /* No seeking in write mode. */ } if (pWav == NULL || pWav->onSeek == NULL) { return DRWAV_FALSE; } - // If there are no samples, just return DRWAV_TRUE without doing anything. - if (pWav->totalSampleCount == 0) { + /* If there are no samples, just return DRWAV_TRUE without doing anything. */ + if (pWav->totalPCMFrameCount == 0) { return DRWAV_TRUE; } - // Make sure the sample is clamped. - if (sample >= pWav->totalSampleCount) { - sample = pWav->totalSampleCount - 1; + /* Make sure the sample is clamped. */ + if (targetFrameIndex >= pWav->totalPCMFrameCount) { + targetFrameIndex = pWav->totalPCMFrameCount - 1; } - - // For compressed formats we just use a slow generic seek. If we are seeking forward we just seek forward. If we are going backwards we need - // to seek back to the start. + /* + For compressed formats we just use a slow generic seek. If we are seeking forward we just seek forward. If we are going backwards we need + to seek back to the start. + */ if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) { - // TODO: This can be optimized. - - // If we're seeking forward it's simple - just keep reading samples until we hit the sample we're requesting. If we're seeking backwards, - // we first need to seek back to the start and then just do the same thing as a forward seek. - if (sample < pWav->compressed.iCurrentSample) { - if (!drwav_seek_to_first_sample(pWav)) { + /* TODO: This can be optimized. */ + + /* + If we're seeking forward it's simple - just keep reading samples until we hit the sample we're requesting. If we're seeking backwards, + we first need to seek back to the start and then just do the same thing as a forward seek. + */ + if (targetFrameIndex < pWav->compressed.iCurrentPCMFrame) { + if (!drwav_seek_to_first_pcm_frame(pWav)) { return DRWAV_FALSE; } } - if (sample > pWav->compressed.iCurrentSample) { - drwav_uint64 offset = sample - pWav->compressed.iCurrentSample; + if (targetFrameIndex > pWav->compressed.iCurrentPCMFrame) { + drwav_uint64 offsetInFrames = targetFrameIndex - pWav->compressed.iCurrentPCMFrame; drwav_int16 devnull[2048]; - while (offset > 0) { - drwav_uint64 samplesToRead = offset; - if (samplesToRead > 2048) { - samplesToRead = 2048; + while (offsetInFrames > 0) { + drwav_uint64 framesRead = 0; + drwav_uint64 framesToRead = offsetInFrames; + if (framesToRead > drwav_countof(devnull)/pWav->channels) { + framesToRead = drwav_countof(devnull)/pWav->channels; } - drwav_uint64 samplesRead = 0; if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) { - samplesRead = drwav_read_s16__msadpcm(pWav, samplesToRead, devnull); + framesRead = drwav_read_pcm_frames_s16__msadpcm(pWav, framesToRead, devnull); } else if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) { - samplesRead = drwav_read_s16__ima(pWav, samplesToRead, devnull); + framesRead = drwav_read_pcm_frames_s16__ima(pWav, framesToRead, devnull); } else { - assert(DRWAV_FALSE); // If this assertion is triggered it means I've implemented a new compressed format but forgot to add a branch for it here. + assert(DRWAV_FALSE); /* If this assertion is triggered it means I've implemented a new compressed format but forgot to add a branch for it here. */ } - if (samplesRead != samplesToRead) { + if (framesRead != framesToRead) { return DRWAV_FALSE; } - offset -= samplesRead; + offsetInFrames -= framesRead; } } } else { - drwav_uint64 totalSizeInBytes = pWav->totalSampleCount * pWav->bytesPerSample; - drwav_assert(totalSizeInBytes >= pWav->bytesRemaining); + drwav_uint64 totalSizeInBytes; + drwav_uint64 currentBytePos; + drwav_uint64 targetBytePos; + drwav_uint64 offset; - drwav_uint64 currentBytePos = totalSizeInBytes - pWav->bytesRemaining; - drwav_uint64 targetBytePos = sample * pWav->bytesPerSample; + totalSizeInBytes = pWav->totalPCMFrameCount * drwav_get_bytes_per_pcm_frame(pWav); + DRWAV_ASSERT(totalSizeInBytes >= pWav->bytesRemaining); + + currentBytePos = totalSizeInBytes - pWav->bytesRemaining; + targetBytePos = targetFrameIndex * drwav_get_bytes_per_pcm_frame(pWav); - drwav_uint64 offset; if (currentBytePos < targetBytePos) { - // Offset forwards. + /* Offset forwards. */ offset = (targetBytePos - currentBytePos); } else { - // Offset backwards. - if (!drwav_seek_to_first_sample(pWav)) { + /* Offset backwards. */ + if (!drwav_seek_to_first_pcm_frame(pWav)) { return DRWAV_FALSE; } offset = targetBytePos; @@ -2020,36 +3027,103 @@ drwav_bool32 drwav_seek_to_sample(drwav* pWav, drwav_uint64 sample) size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData) { + size_t bytesWritten; + if (pWav == NULL || bytesToWrite == 0 || pData == NULL) { return 0; } - size_t bytesWritten = pWav->onWrite(pWav->pUserData, pData, bytesToWrite); + bytesWritten = pWav->onWrite(pWav->pUserData, pData, bytesToWrite); pWav->dataChunkDataSize += bytesWritten; return bytesWritten; } -drwav_uint64 drwav_write(drwav* pWav, drwav_uint64 samplesToWrite, const void* pData) + +drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint64 framesToWrite, const void* pData) { - if (pWav == NULL || samplesToWrite == 0 || pData == NULL) { + drwav_uint64 bytesToWrite; + drwav_uint64 bytesWritten; + const drwav_uint8* pRunningData; + + if (pWav == NULL || framesToWrite == 0 || pData == NULL) { return 0; } - drwav_uint64 bytesToWrite = ((samplesToWrite * pWav->bitsPerSample) / 8); + bytesToWrite = ((framesToWrite * pWav->channels * pWav->bitsPerSample) / 8); if (bytesToWrite > DRWAV_SIZE_MAX) { return 0; } - drwav_uint64 bytesWritten = 0; - const drwav_uint8* pRunningData = (const drwav_uint8*)pData; + bytesWritten = 0; + pRunningData = (const drwav_uint8*)pData; + while (bytesToWrite > 0) { + size_t bytesJustWritten; drwav_uint64 bytesToWriteThisIteration = bytesToWrite; if (bytesToWriteThisIteration > DRWAV_SIZE_MAX) { bytesToWriteThisIteration = DRWAV_SIZE_MAX; } - size_t bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, pRunningData); + bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, pRunningData); + if (bytesJustWritten == 0) { + break; + } + + bytesToWrite -= bytesJustWritten; + bytesWritten += bytesJustWritten; + pRunningData += bytesJustWritten; + } + + return (bytesWritten * 8) / pWav->bitsPerSample / pWav->channels; +} + +drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 framesToWrite, const void* pData) +{ + drwav_uint64 bytesToWrite; + drwav_uint64 bytesWritten; + drwav_uint32 bytesPerSample; + const drwav_uint8* pRunningData; + + if (pWav == NULL || framesToWrite == 0 || pData == NULL) { + return 0; + } + + bytesToWrite = ((framesToWrite * pWav->channels * pWav->bitsPerSample) / 8); + if (bytesToWrite > DRWAV_SIZE_MAX) { + return 0; + } + + bytesWritten = 0; + pRunningData = (const drwav_uint8*)pData; + + bytesPerSample = drwav_get_bytes_per_pcm_frame(pWav) / pWav->channels; + + while (bytesToWrite > 0) { + drwav_uint8 temp[4096]; + drwav_uint32 sampleCount; + size_t bytesJustWritten; + drwav_uint64 bytesToWriteThisIteration; + + bytesToWriteThisIteration = bytesToWrite; + if (bytesToWriteThisIteration > DRWAV_SIZE_MAX) { + bytesToWriteThisIteration = DRWAV_SIZE_MAX; + } + + /* + WAV files are always little-endian. We need to byte swap on big-endian architectures. Since our input buffer is read-only we need + to use an intermediary buffer for the conversion. + */ + sampleCount = sizeof(temp)/bytesPerSample; + + if (bytesToWriteThisIteration > sampleCount*bytesPerSample) { + bytesToWriteThisIteration = sampleCount*bytesPerSample; + } + + DRWAV_COPY_MEMORY(temp, pRunningData, (size_t)bytesToWriteThisIteration); + drwav__bswap_samples(temp, sampleCount, bytesPerSample, pWav->translatedFormatTag); + + bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, temp); if (bytesJustWritten == 0) { break; } @@ -2059,44 +3133,52 @@ drwav_uint64 drwav_write(drwav* pWav, drwav_uint64 samplesToWrite, const void* p pRunningData += bytesJustWritten; } - return (bytesWritten * 8) / pWav->bitsPerSample; + return (bytesWritten * 8) / pWav->bitsPerSample / pWav->channels; } +drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData) +{ + if (drwav__is_little_endian()) { + return drwav_write_pcm_frames_le(pWav, framesToWrite, pData); + } else { + return drwav_write_pcm_frames_be(pWav, framesToWrite, pData); + } +} -drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut) { - drwav_assert(pWav != NULL); - drwav_assert(samplesToRead > 0); - drwav_assert(pBufferOut != NULL); + drwav_uint64 totalFramesRead = 0; - // TODO: Lots of room for optimization here. + DRWAV_ASSERT(pWav != NULL); + DRWAV_ASSERT(framesToRead > 0); + DRWAV_ASSERT(pBufferOut != NULL); - drwav_uint64 totalSamplesRead = 0; + /* TODO: Lots of room for optimization here. */ - while (samplesToRead > 0 && pWav->compressed.iCurrentSample < pWav->totalSampleCount) { - // If there are no cached samples we need to load a new block. - if (pWav->msadpcm.cachedSampleCount == 0 && pWav->msadpcm.bytesRemainingInBlock == 0) { + while (framesToRead > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) { + /* If there are no cached frames we need to load a new block. */ + if (pWav->msadpcm.cachedFrameCount == 0 && pWav->msadpcm.bytesRemainingInBlock == 0) { if (pWav->channels == 1) { - // Mono. + /* Mono. */ drwav_uint8 header[7]; if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) { - return totalSamplesRead; + return totalFramesRead; } pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header); - pWav->msadpcm.predictor[0] = header[0]; - pWav->msadpcm.delta[0] = drwav__bytes_to_s16(header + 1); - pWav->msadpcm.prevSamples[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 3); - pWav->msadpcm.prevSamples[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 5); - pWav->msadpcm.cachedSamples[2] = pWav->msadpcm.prevSamples[0][0]; - pWav->msadpcm.cachedSamples[3] = pWav->msadpcm.prevSamples[0][1]; - pWav->msadpcm.cachedSampleCount = 2; + pWav->msadpcm.predictor[0] = header[0]; + pWav->msadpcm.delta[0] = drwav__bytes_to_s16(header + 1); + pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 3); + pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 5); + pWav->msadpcm.cachedFrames[2] = pWav->msadpcm.prevFrames[0][0]; + pWav->msadpcm.cachedFrames[3] = pWav->msadpcm.prevFrames[0][1]; + pWav->msadpcm.cachedFrameCount = 2; } else { - // Stereo. + /* Stereo. */ drwav_uint8 header[14]; if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) { - return totalSamplesRead; + return totalFramesRead; } pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header); @@ -2104,62 +3186,72 @@ drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr pWav->msadpcm.predictor[1] = header[1]; pWav->msadpcm.delta[0] = drwav__bytes_to_s16(header + 2); pWav->msadpcm.delta[1] = drwav__bytes_to_s16(header + 4); - pWav->msadpcm.prevSamples[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 6); - pWav->msadpcm.prevSamples[1][1] = (drwav_int32)drwav__bytes_to_s16(header + 8); - pWav->msadpcm.prevSamples[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 10); - pWav->msadpcm.prevSamples[1][0] = (drwav_int32)drwav__bytes_to_s16(header + 12); - - pWav->msadpcm.cachedSamples[0] = pWav->msadpcm.prevSamples[0][0]; - pWav->msadpcm.cachedSamples[1] = pWav->msadpcm.prevSamples[1][0]; - pWav->msadpcm.cachedSamples[2] = pWav->msadpcm.prevSamples[0][1]; - pWav->msadpcm.cachedSamples[3] = pWav->msadpcm.prevSamples[1][1]; - pWav->msadpcm.cachedSampleCount = 4; + pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 6); + pWav->msadpcm.prevFrames[1][1] = (drwav_int32)drwav__bytes_to_s16(header + 8); + pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 10); + pWav->msadpcm.prevFrames[1][0] = (drwav_int32)drwav__bytes_to_s16(header + 12); + + pWav->msadpcm.cachedFrames[0] = pWav->msadpcm.prevFrames[0][0]; + pWav->msadpcm.cachedFrames[1] = pWav->msadpcm.prevFrames[1][0]; + pWav->msadpcm.cachedFrames[2] = pWav->msadpcm.prevFrames[0][1]; + pWav->msadpcm.cachedFrames[3] = pWav->msadpcm.prevFrames[1][1]; + pWav->msadpcm.cachedFrameCount = 2; } } - // Output anything that's cached. - while (samplesToRead > 0 && pWav->msadpcm.cachedSampleCount > 0 && pWav->compressed.iCurrentSample < pWav->totalSampleCount) { - pBufferOut[0] = (drwav_int16)pWav->msadpcm.cachedSamples[drwav_countof(pWav->msadpcm.cachedSamples) - pWav->msadpcm.cachedSampleCount]; - pWav->msadpcm.cachedSampleCount -= 1; + /* Output anything that's cached. */ + while (framesToRead > 0 && pWav->msadpcm.cachedFrameCount > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) { + drwav_uint32 iSample = 0; + for (iSample = 0; iSample < pWav->channels; iSample += 1) { + pBufferOut[iSample] = (drwav_int16)pWav->msadpcm.cachedFrames[(drwav_countof(pWav->msadpcm.cachedFrames) - (pWav->msadpcm.cachedFrameCount*pWav->channels)) + iSample]; + } - pBufferOut += 1; - samplesToRead -= 1; - totalSamplesRead += 1; - pWav->compressed.iCurrentSample += 1; + pBufferOut += pWav->channels; + framesToRead -= 1; + totalFramesRead += 1; + pWav->compressed.iCurrentPCMFrame += 1; + pWav->msadpcm.cachedFrameCount -= 1; } - if (samplesToRead == 0) { - return totalSamplesRead; + if (framesToRead == 0) { + return totalFramesRead; } - // If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next - // loop iteration which will trigger the loading of a new block. - if (pWav->msadpcm.cachedSampleCount == 0) { + /* + If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next + loop iteration which will trigger the loading of a new block. + */ + if (pWav->msadpcm.cachedFrameCount == 0) { if (pWav->msadpcm.bytesRemainingInBlock == 0) { continue; } else { + static drwav_int32 adaptationTable[] = { + 230, 230, 230, 230, 307, 409, 512, 614, + 768, 614, 512, 409, 307, 230, 230, 230 + }; + static drwav_int32 coeff1Table[] = { 256, 512, 0, 192, 240, 460, 392 }; + static drwav_int32 coeff2Table[] = { 0, -256, 0, 64, 0, -208, -232 }; + drwav_uint8 nibbles; + drwav_int32 nibble0; + drwav_int32 nibble1; + if (pWav->onRead(pWav->pUserData, &nibbles, 1) != 1) { - return totalSamplesRead; + return totalFramesRead; } pWav->msadpcm.bytesRemainingInBlock -= 1; - // TODO: Optimize away these if statements. - drwav_int32 nibble0 = ((nibbles & 0xF0) >> 4); if ((nibbles & 0x80)) { nibble0 |= 0xFFFFFFF0UL; } - drwav_int32 nibble1 = ((nibbles & 0x0F) >> 0); if ((nibbles & 0x08)) { nibble1 |= 0xFFFFFFF0UL; } - - static drwav_int32 adaptationTable[] = { - 230, 230, 230, 230, 307, 409, 512, 614, - 768, 614, 512, 409, 307, 230, 230, 230 - }; - static drwav_int32 coeff1Table[] = { 256, 512, 0, 192, 240, 460, 392 }; - static drwav_int32 coeff2Table[] = { 0, -256, 0, 64, 0, -208, -232 }; + /* TODO: Optimize away these if statements. */ + nibble0 = ((nibbles & 0xF0) >> 4); if ((nibbles & 0x80)) { nibble0 |= 0xFFFFFFF0UL; } + nibble1 = ((nibbles & 0x0F) >> 0); if ((nibbles & 0x08)) { nibble1 |= 0xFFFFFFF0UL; } if (pWav->channels == 1) { - // Mono. + /* Mono. */ drwav_int32 newSample0; - newSample0 = ((pWav->msadpcm.prevSamples[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevSamples[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8; + drwav_int32 newSample1; + + newSample0 = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8; newSample0 += nibble0 * pWav->msadpcm.delta[0]; newSample0 = drwav_clamp(newSample0, -32768, 32767); @@ -2168,12 +3260,11 @@ drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr pWav->msadpcm.delta[0] = 16; } - pWav->msadpcm.prevSamples[0][0] = pWav->msadpcm.prevSamples[0][1]; - pWav->msadpcm.prevSamples[0][1] = newSample0; + pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1]; + pWav->msadpcm.prevFrames[0][1] = newSample0; - drwav_int32 newSample1; - newSample1 = ((pWav->msadpcm.prevSamples[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevSamples[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8; + newSample1 = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8; newSample1 += nibble1 * pWav->msadpcm.delta[0]; newSample1 = drwav_clamp(newSample1, -32768, 32767); @@ -2182,19 +3273,20 @@ drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr pWav->msadpcm.delta[0] = 16; } - pWav->msadpcm.prevSamples[0][0] = pWav->msadpcm.prevSamples[0][1]; - pWav->msadpcm.prevSamples[0][1] = newSample1; + pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1]; + pWav->msadpcm.prevFrames[0][1] = newSample1; - pWav->msadpcm.cachedSamples[2] = newSample0; - pWav->msadpcm.cachedSamples[3] = newSample1; - pWav->msadpcm.cachedSampleCount = 2; + pWav->msadpcm.cachedFrames[2] = newSample0; + pWav->msadpcm.cachedFrames[3] = newSample1; + pWav->msadpcm.cachedFrameCount = 2; } else { - // Stereo. - - // Left. + /* Stereo. */ drwav_int32 newSample0; - newSample0 = ((pWav->msadpcm.prevSamples[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevSamples[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8; + drwav_int32 newSample1; + + /* Left. */ + newSample0 = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8; newSample0 += nibble0 * pWav->msadpcm.delta[0]; newSample0 = drwav_clamp(newSample0, -32768, 32767); @@ -2203,13 +3295,12 @@ drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr pWav->msadpcm.delta[0] = 16; } - pWav->msadpcm.prevSamples[0][0] = pWav->msadpcm.prevSamples[0][1]; - pWav->msadpcm.prevSamples[0][1] = newSample0; + pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1]; + pWav->msadpcm.prevFrames[0][1] = newSample0; - // Right. - drwav_int32 newSample1; - newSample1 = ((pWav->msadpcm.prevSamples[1][1] * coeff1Table[pWav->msadpcm.predictor[1]]) + (pWav->msadpcm.prevSamples[1][0] * coeff2Table[pWav->msadpcm.predictor[1]])) >> 8; + /* Right. */ + newSample1 = ((pWav->msadpcm.prevFrames[1][1] * coeff1Table[pWav->msadpcm.predictor[1]]) + (pWav->msadpcm.prevFrames[1][0] * coeff2Table[pWav->msadpcm.predictor[1]])) >> 8; newSample1 += nibble1 * pWav->msadpcm.delta[1]; newSample1 = drwav_clamp(newSample1, -32768, 32767); @@ -2218,50 +3309,51 @@ drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr pWav->msadpcm.delta[1] = 16; } - pWav->msadpcm.prevSamples[1][0] = pWav->msadpcm.prevSamples[1][1]; - pWav->msadpcm.prevSamples[1][1] = newSample1; + pWav->msadpcm.prevFrames[1][0] = pWav->msadpcm.prevFrames[1][1]; + pWav->msadpcm.prevFrames[1][1] = newSample1; - pWav->msadpcm.cachedSamples[2] = newSample0; - pWav->msadpcm.cachedSamples[3] = newSample1; - pWav->msadpcm.cachedSampleCount = 2; + pWav->msadpcm.cachedFrames[2] = newSample0; + pWav->msadpcm.cachedFrames[3] = newSample1; + pWav->msadpcm.cachedFrameCount = 1; } } } } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut) + +drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut) { - drwav_assert(pWav != NULL); - drwav_assert(samplesToRead > 0); - drwav_assert(pBufferOut != NULL); + drwav_uint64 totalFramesRead = 0; - // TODO: Lots of room for optimization here. + DRWAV_ASSERT(pWav != NULL); + DRWAV_ASSERT(framesToRead > 0); + DRWAV_ASSERT(pBufferOut != NULL); - drwav_uint64 totalSamplesRead = 0; + /* TODO: Lots of room for optimization here. */ - while (samplesToRead > 0 && pWav->compressed.iCurrentSample < pWav->totalSampleCount) { - // If there are no cached samples we need to load a new block. - if (pWav->ima.cachedSampleCount == 0 && pWav->ima.bytesRemainingInBlock == 0) { + while (framesToRead > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) { + /* If there are no cached samples we need to load a new block. */ + if (pWav->ima.cachedFrameCount == 0 && pWav->ima.bytesRemainingInBlock == 0) { if (pWav->channels == 1) { - // Mono. + /* Mono. */ drwav_uint8 header[4]; if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) { - return totalSamplesRead; + return totalFramesRead; } pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header); pWav->ima.predictor[0] = drwav__bytes_to_s16(header + 0); pWav->ima.stepIndex[0] = header[2]; - pWav->ima.cachedSamples[drwav_countof(pWav->ima.cachedSamples) - 1] = pWav->ima.predictor[0]; - pWav->ima.cachedSampleCount = 1; + pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 1] = pWav->ima.predictor[0]; + pWav->ima.cachedFrameCount = 1; } else { - // Stereo. + /* Stereo. */ drwav_uint8 header[8]; if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) { - return totalSamplesRead; + return totalFramesRead; } pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header); @@ -2270,30 +3362,35 @@ drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_ pWav->ima.predictor[1] = drwav__bytes_to_s16(header + 4); pWav->ima.stepIndex[1] = header[6]; - pWav->ima.cachedSamples[drwav_countof(pWav->ima.cachedSamples) - 2] = pWav->ima.predictor[0]; - pWav->ima.cachedSamples[drwav_countof(pWav->ima.cachedSamples) - 1] = pWav->ima.predictor[1]; - pWav->ima.cachedSampleCount = 2; + pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 2] = pWav->ima.predictor[0]; + pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 1] = pWav->ima.predictor[1]; + pWav->ima.cachedFrameCount = 1; } } - // Output anything that's cached. - while (samplesToRead > 0 && pWav->ima.cachedSampleCount > 0 && pWav->compressed.iCurrentSample < pWav->totalSampleCount) { - pBufferOut[0] = (drwav_int16)pWav->ima.cachedSamples[drwav_countof(pWav->ima.cachedSamples) - pWav->ima.cachedSampleCount]; - pWav->ima.cachedSampleCount -= 1; + /* Output anything that's cached. */ + while (framesToRead > 0 && pWav->ima.cachedFrameCount > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) { + drwav_uint32 iSample; + for (iSample = 0; iSample < pWav->channels; iSample += 1) { + pBufferOut[iSample] = (drwav_int16)pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + iSample]; + } - pBufferOut += 1; - samplesToRead -= 1; - totalSamplesRead += 1; - pWav->compressed.iCurrentSample += 1; + pBufferOut += pWav->channels; + framesToRead -= 1; + totalFramesRead += 1; + pWav->compressed.iCurrentPCMFrame += 1; + pWav->ima.cachedFrameCount -= 1; } - if (samplesToRead == 0) { - return totalSamplesRead; + if (framesToRead == 0) { + return totalFramesRead; } - // If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next - // loop iteration which will trigger the loading of a new block. - if (pWav->ima.cachedSampleCount == 0) { + /* + If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next + loop iteration which will trigger the loading of a new block. + */ + if (pWav->ima.cachedFrameCount == 0) { if (pWav->ima.bytesRemainingInBlock == 0) { continue; } else { @@ -2303,28 +3400,33 @@ drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_ }; static drwav_int32 stepTable[89] = { - 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, - 19, 21, 23, 25, 28, 31, 34, 37, 41, 45, - 50, 55, 60, 66, 73, 80, 88, 97, 107, 118, + 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, + 19, 21, 23, 25, 28, 31, 34, 37, 41, 45, + 50, 55, 60, 66, 73, 80, 88, 97, 107, 118, 130, 143, 157, 173, 190, 209, 230, 253, 279, 307, 337, 371, 408, 449, 494, 544, 598, 658, 724, 796, - 876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066, + 876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066, 2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358, - 5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899, - 15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767 + 5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899, + 15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767 }; - // From what I can tell with stereo streams, it looks like every 4 bytes (8 samples) is for one channel. So it goes 4 bytes for the - // left channel, 4 bytes for the right channel. - pWav->ima.cachedSampleCount = 8 * pWav->channels; - for (drwav_uint32 iChannel = 0; iChannel < pWav->channels; ++iChannel) { + drwav_uint32 iChannel; + + /* + From what I can tell with stereo streams, it looks like every 4 bytes (8 samples) is for one channel. So it goes 4 bytes for the + left channel, 4 bytes for the right channel. + */ + pWav->ima.cachedFrameCount = 8; + for (iChannel = 0; iChannel < pWav->channels; ++iChannel) { + drwav_uint32 iByte; drwav_uint8 nibbles[4]; if (pWav->onRead(pWav->pUserData, &nibbles, 4) != 4) { - return totalSamplesRead; + return totalFramesRead; } pWav->ima.bytesRemainingInBlock -= 4; - for (drwav_uint32 iByte = 0; iByte < 4; ++iByte) { + for (iByte = 0; iByte < 4; ++iByte) { drwav_uint8 nibble0 = ((nibbles[iByte] & 0x0F) >> 0); drwav_uint8 nibble1 = ((nibbles[iByte] & 0xF0) >> 4); @@ -2340,7 +3442,7 @@ drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_ predictor = drwav_clamp(predictor + diff, -32768, 32767); pWav->ima.predictor[iChannel] = predictor; pWav->ima.stepIndex[iChannel] = drwav_clamp(pWav->ima.stepIndex[iChannel] + indexTable[nibble0], 0, (drwav_int32)drwav_countof(stepTable)-1); - pWav->ima.cachedSamples[(drwav_countof(pWav->ima.cachedSamples) - pWav->ima.cachedSampleCount) + (iByte*2+0)*pWav->channels + iChannel] = predictor; + pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + (iByte*2+0)*pWav->channels + iChannel] = predictor; step = stepTable[pWav->ima.stepIndex[iChannel]]; @@ -2355,53 +3457,53 @@ drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_ predictor = drwav_clamp(predictor + diff, -32768, 32767); pWav->ima.predictor[iChannel] = predictor; pWav->ima.stepIndex[iChannel] = drwav_clamp(pWav->ima.stepIndex[iChannel] + indexTable[nibble1], 0, (drwav_int32)drwav_countof(stepTable)-1); - pWav->ima.cachedSamples[(drwav_countof(pWav->ima.cachedSamples) - pWav->ima.cachedSampleCount) + (iByte*2+1)*pWav->channels + iChannel] = predictor; + pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + (iByte*2+1)*pWav->channels + iChannel] = predictor; } } } } } - return totalSamplesRead; + return totalFramesRead; } #ifndef DR_WAV_NO_CONVERSION_API static unsigned short g_drwavAlawTable[256] = { - 0xEA80, 0xEB80, 0xE880, 0xE980, 0xEE80, 0xEF80, 0xEC80, 0xED80, 0xE280, 0xE380, 0xE080, 0xE180, 0xE680, 0xE780, 0xE480, 0xE580, - 0xF540, 0xF5C0, 0xF440, 0xF4C0, 0xF740, 0xF7C0, 0xF640, 0xF6C0, 0xF140, 0xF1C0, 0xF040, 0xF0C0, 0xF340, 0xF3C0, 0xF240, 0xF2C0, - 0xAA00, 0xAE00, 0xA200, 0xA600, 0xBA00, 0xBE00, 0xB200, 0xB600, 0x8A00, 0x8E00, 0x8200, 0x8600, 0x9A00, 0x9E00, 0x9200, 0x9600, - 0xD500, 0xD700, 0xD100, 0xD300, 0xDD00, 0xDF00, 0xD900, 0xDB00, 0xC500, 0xC700, 0xC100, 0xC300, 0xCD00, 0xCF00, 0xC900, 0xCB00, - 0xFEA8, 0xFEB8, 0xFE88, 0xFE98, 0xFEE8, 0xFEF8, 0xFEC8, 0xFED8, 0xFE28, 0xFE38, 0xFE08, 0xFE18, 0xFE68, 0xFE78, 0xFE48, 0xFE58, - 0xFFA8, 0xFFB8, 0xFF88, 0xFF98, 0xFFE8, 0xFFF8, 0xFFC8, 0xFFD8, 0xFF28, 0xFF38, 0xFF08, 0xFF18, 0xFF68, 0xFF78, 0xFF48, 0xFF58, - 0xFAA0, 0xFAE0, 0xFA20, 0xFA60, 0xFBA0, 0xFBE0, 0xFB20, 0xFB60, 0xF8A0, 0xF8E0, 0xF820, 0xF860, 0xF9A0, 0xF9E0, 0xF920, 0xF960, - 0xFD50, 0xFD70, 0xFD10, 0xFD30, 0xFDD0, 0xFDF0, 0xFD90, 0xFDB0, 0xFC50, 0xFC70, 0xFC10, 0xFC30, 0xFCD0, 0xFCF0, 0xFC90, 0xFCB0, - 0x1580, 0x1480, 0x1780, 0x1680, 0x1180, 0x1080, 0x1380, 0x1280, 0x1D80, 0x1C80, 0x1F80, 0x1E80, 0x1980, 0x1880, 0x1B80, 0x1A80, - 0x0AC0, 0x0A40, 0x0BC0, 0x0B40, 0x08C0, 0x0840, 0x09C0, 0x0940, 0x0EC0, 0x0E40, 0x0FC0, 0x0F40, 0x0CC0, 0x0C40, 0x0DC0, 0x0D40, - 0x5600, 0x5200, 0x5E00, 0x5A00, 0x4600, 0x4200, 0x4E00, 0x4A00, 0x7600, 0x7200, 0x7E00, 0x7A00, 0x6600, 0x6200, 0x6E00, 0x6A00, - 0x2B00, 0x2900, 0x2F00, 0x2D00, 0x2300, 0x2100, 0x2700, 0x2500, 0x3B00, 0x3900, 0x3F00, 0x3D00, 0x3300, 0x3100, 0x3700, 0x3500, - 0x0158, 0x0148, 0x0178, 0x0168, 0x0118, 0x0108, 0x0138, 0x0128, 0x01D8, 0x01C8, 0x01F8, 0x01E8, 0x0198, 0x0188, 0x01B8, 0x01A8, - 0x0058, 0x0048, 0x0078, 0x0068, 0x0018, 0x0008, 0x0038, 0x0028, 0x00D8, 0x00C8, 0x00F8, 0x00E8, 0x0098, 0x0088, 0x00B8, 0x00A8, - 0x0560, 0x0520, 0x05E0, 0x05A0, 0x0460, 0x0420, 0x04E0, 0x04A0, 0x0760, 0x0720, 0x07E0, 0x07A0, 0x0660, 0x0620, 0x06E0, 0x06A0, + 0xEA80, 0xEB80, 0xE880, 0xE980, 0xEE80, 0xEF80, 0xEC80, 0xED80, 0xE280, 0xE380, 0xE080, 0xE180, 0xE680, 0xE780, 0xE480, 0xE580, + 0xF540, 0xF5C0, 0xF440, 0xF4C0, 0xF740, 0xF7C0, 0xF640, 0xF6C0, 0xF140, 0xF1C0, 0xF040, 0xF0C0, 0xF340, 0xF3C0, 0xF240, 0xF2C0, + 0xAA00, 0xAE00, 0xA200, 0xA600, 0xBA00, 0xBE00, 0xB200, 0xB600, 0x8A00, 0x8E00, 0x8200, 0x8600, 0x9A00, 0x9E00, 0x9200, 0x9600, + 0xD500, 0xD700, 0xD100, 0xD300, 0xDD00, 0xDF00, 0xD900, 0xDB00, 0xC500, 0xC700, 0xC100, 0xC300, 0xCD00, 0xCF00, 0xC900, 0xCB00, + 0xFEA8, 0xFEB8, 0xFE88, 0xFE98, 0xFEE8, 0xFEF8, 0xFEC8, 0xFED8, 0xFE28, 0xFE38, 0xFE08, 0xFE18, 0xFE68, 0xFE78, 0xFE48, 0xFE58, + 0xFFA8, 0xFFB8, 0xFF88, 0xFF98, 0xFFE8, 0xFFF8, 0xFFC8, 0xFFD8, 0xFF28, 0xFF38, 0xFF08, 0xFF18, 0xFF68, 0xFF78, 0xFF48, 0xFF58, + 0xFAA0, 0xFAE0, 0xFA20, 0xFA60, 0xFBA0, 0xFBE0, 0xFB20, 0xFB60, 0xF8A0, 0xF8E0, 0xF820, 0xF860, 0xF9A0, 0xF9E0, 0xF920, 0xF960, + 0xFD50, 0xFD70, 0xFD10, 0xFD30, 0xFDD0, 0xFDF0, 0xFD90, 0xFDB0, 0xFC50, 0xFC70, 0xFC10, 0xFC30, 0xFCD0, 0xFCF0, 0xFC90, 0xFCB0, + 0x1580, 0x1480, 0x1780, 0x1680, 0x1180, 0x1080, 0x1380, 0x1280, 0x1D80, 0x1C80, 0x1F80, 0x1E80, 0x1980, 0x1880, 0x1B80, 0x1A80, + 0x0AC0, 0x0A40, 0x0BC0, 0x0B40, 0x08C0, 0x0840, 0x09C0, 0x0940, 0x0EC0, 0x0E40, 0x0FC0, 0x0F40, 0x0CC0, 0x0C40, 0x0DC0, 0x0D40, + 0x5600, 0x5200, 0x5E00, 0x5A00, 0x4600, 0x4200, 0x4E00, 0x4A00, 0x7600, 0x7200, 0x7E00, 0x7A00, 0x6600, 0x6200, 0x6E00, 0x6A00, + 0x2B00, 0x2900, 0x2F00, 0x2D00, 0x2300, 0x2100, 0x2700, 0x2500, 0x3B00, 0x3900, 0x3F00, 0x3D00, 0x3300, 0x3100, 0x3700, 0x3500, + 0x0158, 0x0148, 0x0178, 0x0168, 0x0118, 0x0108, 0x0138, 0x0128, 0x01D8, 0x01C8, 0x01F8, 0x01E8, 0x0198, 0x0188, 0x01B8, 0x01A8, + 0x0058, 0x0048, 0x0078, 0x0068, 0x0018, 0x0008, 0x0038, 0x0028, 0x00D8, 0x00C8, 0x00F8, 0x00E8, 0x0098, 0x0088, 0x00B8, 0x00A8, + 0x0560, 0x0520, 0x05E0, 0x05A0, 0x0460, 0x0420, 0x04E0, 0x04A0, 0x0760, 0x0720, 0x07E0, 0x07A0, 0x0660, 0x0620, 0x06E0, 0x06A0, 0x02B0, 0x0290, 0x02F0, 0x02D0, 0x0230, 0x0210, 0x0270, 0x0250, 0x03B0, 0x0390, 0x03F0, 0x03D0, 0x0330, 0x0310, 0x0370, 0x0350 }; static unsigned short g_drwavMulawTable[256] = { - 0x8284, 0x8684, 0x8A84, 0x8E84, 0x9284, 0x9684, 0x9A84, 0x9E84, 0xA284, 0xA684, 0xAA84, 0xAE84, 0xB284, 0xB684, 0xBA84, 0xBE84, - 0xC184, 0xC384, 0xC584, 0xC784, 0xC984, 0xCB84, 0xCD84, 0xCF84, 0xD184, 0xD384, 0xD584, 0xD784, 0xD984, 0xDB84, 0xDD84, 0xDF84, - 0xE104, 0xE204, 0xE304, 0xE404, 0xE504, 0xE604, 0xE704, 0xE804, 0xE904, 0xEA04, 0xEB04, 0xEC04, 0xED04, 0xEE04, 0xEF04, 0xF004, - 0xF0C4, 0xF144, 0xF1C4, 0xF244, 0xF2C4, 0xF344, 0xF3C4, 0xF444, 0xF4C4, 0xF544, 0xF5C4, 0xF644, 0xF6C4, 0xF744, 0xF7C4, 0xF844, - 0xF8A4, 0xF8E4, 0xF924, 0xF964, 0xF9A4, 0xF9E4, 0xFA24, 0xFA64, 0xFAA4, 0xFAE4, 0xFB24, 0xFB64, 0xFBA4, 0xFBE4, 0xFC24, 0xFC64, - 0xFC94, 0xFCB4, 0xFCD4, 0xFCF4, 0xFD14, 0xFD34, 0xFD54, 0xFD74, 0xFD94, 0xFDB4, 0xFDD4, 0xFDF4, 0xFE14, 0xFE34, 0xFE54, 0xFE74, - 0xFE8C, 0xFE9C, 0xFEAC, 0xFEBC, 0xFECC, 0xFEDC, 0xFEEC, 0xFEFC, 0xFF0C, 0xFF1C, 0xFF2C, 0xFF3C, 0xFF4C, 0xFF5C, 0xFF6C, 0xFF7C, - 0xFF88, 0xFF90, 0xFF98, 0xFFA0, 0xFFA8, 0xFFB0, 0xFFB8, 0xFFC0, 0xFFC8, 0xFFD0, 0xFFD8, 0xFFE0, 0xFFE8, 0xFFF0, 0xFFF8, 0x0000, - 0x7D7C, 0x797C, 0x757C, 0x717C, 0x6D7C, 0x697C, 0x657C, 0x617C, 0x5D7C, 0x597C, 0x557C, 0x517C, 0x4D7C, 0x497C, 0x457C, 0x417C, - 0x3E7C, 0x3C7C, 0x3A7C, 0x387C, 0x367C, 0x347C, 0x327C, 0x307C, 0x2E7C, 0x2C7C, 0x2A7C, 0x287C, 0x267C, 0x247C, 0x227C, 0x207C, - 0x1EFC, 0x1DFC, 0x1CFC, 0x1BFC, 0x1AFC, 0x19FC, 0x18FC, 0x17FC, 0x16FC, 0x15FC, 0x14FC, 0x13FC, 0x12FC, 0x11FC, 0x10FC, 0x0FFC, - 0x0F3C, 0x0EBC, 0x0E3C, 0x0DBC, 0x0D3C, 0x0CBC, 0x0C3C, 0x0BBC, 0x0B3C, 0x0ABC, 0x0A3C, 0x09BC, 0x093C, 0x08BC, 0x083C, 0x07BC, - 0x075C, 0x071C, 0x06DC, 0x069C, 0x065C, 0x061C, 0x05DC, 0x059C, 0x055C, 0x051C, 0x04DC, 0x049C, 0x045C, 0x041C, 0x03DC, 0x039C, - 0x036C, 0x034C, 0x032C, 0x030C, 0x02EC, 0x02CC, 0x02AC, 0x028C, 0x026C, 0x024C, 0x022C, 0x020C, 0x01EC, 0x01CC, 0x01AC, 0x018C, - 0x0174, 0x0164, 0x0154, 0x0144, 0x0134, 0x0124, 0x0114, 0x0104, 0x00F4, 0x00E4, 0x00D4, 0x00C4, 0x00B4, 0x00A4, 0x0094, 0x0084, + 0x8284, 0x8684, 0x8A84, 0x8E84, 0x9284, 0x9684, 0x9A84, 0x9E84, 0xA284, 0xA684, 0xAA84, 0xAE84, 0xB284, 0xB684, 0xBA84, 0xBE84, + 0xC184, 0xC384, 0xC584, 0xC784, 0xC984, 0xCB84, 0xCD84, 0xCF84, 0xD184, 0xD384, 0xD584, 0xD784, 0xD984, 0xDB84, 0xDD84, 0xDF84, + 0xE104, 0xE204, 0xE304, 0xE404, 0xE504, 0xE604, 0xE704, 0xE804, 0xE904, 0xEA04, 0xEB04, 0xEC04, 0xED04, 0xEE04, 0xEF04, 0xF004, + 0xF0C4, 0xF144, 0xF1C4, 0xF244, 0xF2C4, 0xF344, 0xF3C4, 0xF444, 0xF4C4, 0xF544, 0xF5C4, 0xF644, 0xF6C4, 0xF744, 0xF7C4, 0xF844, + 0xF8A4, 0xF8E4, 0xF924, 0xF964, 0xF9A4, 0xF9E4, 0xFA24, 0xFA64, 0xFAA4, 0xFAE4, 0xFB24, 0xFB64, 0xFBA4, 0xFBE4, 0xFC24, 0xFC64, + 0xFC94, 0xFCB4, 0xFCD4, 0xFCF4, 0xFD14, 0xFD34, 0xFD54, 0xFD74, 0xFD94, 0xFDB4, 0xFDD4, 0xFDF4, 0xFE14, 0xFE34, 0xFE54, 0xFE74, + 0xFE8C, 0xFE9C, 0xFEAC, 0xFEBC, 0xFECC, 0xFEDC, 0xFEEC, 0xFEFC, 0xFF0C, 0xFF1C, 0xFF2C, 0xFF3C, 0xFF4C, 0xFF5C, 0xFF6C, 0xFF7C, + 0xFF88, 0xFF90, 0xFF98, 0xFFA0, 0xFFA8, 0xFFB0, 0xFFB8, 0xFFC0, 0xFFC8, 0xFFD0, 0xFFD8, 0xFFE0, 0xFFE8, 0xFFF0, 0xFFF8, 0x0000, + 0x7D7C, 0x797C, 0x757C, 0x717C, 0x6D7C, 0x697C, 0x657C, 0x617C, 0x5D7C, 0x597C, 0x557C, 0x517C, 0x4D7C, 0x497C, 0x457C, 0x417C, + 0x3E7C, 0x3C7C, 0x3A7C, 0x387C, 0x367C, 0x347C, 0x327C, 0x307C, 0x2E7C, 0x2C7C, 0x2A7C, 0x287C, 0x267C, 0x247C, 0x227C, 0x207C, + 0x1EFC, 0x1DFC, 0x1CFC, 0x1BFC, 0x1AFC, 0x19FC, 0x18FC, 0x17FC, 0x16FC, 0x15FC, 0x14FC, 0x13FC, 0x12FC, 0x11FC, 0x10FC, 0x0FFC, + 0x0F3C, 0x0EBC, 0x0E3C, 0x0DBC, 0x0D3C, 0x0CBC, 0x0C3C, 0x0BBC, 0x0B3C, 0x0ABC, 0x0A3C, 0x09BC, 0x093C, 0x08BC, 0x083C, 0x07BC, + 0x075C, 0x071C, 0x06DC, 0x069C, 0x065C, 0x061C, 0x05DC, 0x059C, 0x055C, 0x051C, 0x04DC, 0x049C, 0x045C, 0x041C, 0x03DC, 0x039C, + 0x036C, 0x034C, 0x032C, 0x030C, 0x02EC, 0x02CC, 0x02AC, 0x028C, 0x026C, 0x024C, 0x022C, 0x020C, 0x01EC, 0x01CC, 0x01AC, 0x018C, + 0x0174, 0x0164, 0x0154, 0x0144, 0x0134, 0x0124, 0x0114, 0x0104, 0x00F4, 0x00E4, 0x00D4, 0x00C4, 0x00B4, 0x00A4, 0x0094, 0x0084, 0x0078, 0x0070, 0x0068, 0x0060, 0x0058, 0x0050, 0x0048, 0x0040, 0x0038, 0x0030, 0x0028, 0x0020, 0x0018, 0x0010, 0x0008, 0x0000 }; @@ -2417,18 +3519,20 @@ static DRWAV_INLINE drwav_int16 drwav__mulaw_to_s16(drwav_uint8 sampleIn) -static void drwav__pcm_to_s16(drwav_int16* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned short bytesPerSample) +static void drwav__pcm_to_s16(drwav_int16* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned int bytesPerSample) { - // Special case for 8-bit sample data because it's treated as unsigned. + unsigned int i; + + /* Special case for 8-bit sample data because it's treated as unsigned. */ if (bytesPerSample == 1) { drwav_u8_to_s16(pOut, pIn, totalSampleCount); return; } - // Slightly more optimal implementation for common formats. + /* Slightly more optimal implementation for common formats. */ if (bytesPerSample == 2) { - for (unsigned int i = 0; i < totalSampleCount; ++i) { + for (i = 0; i < totalSampleCount; ++i) { *pOut++ = ((const drwav_int16*)pIn)[i]; } return; @@ -2443,15 +3547,15 @@ static void drwav__pcm_to_s16(drwav_int16* pOut, const unsigned char* pIn, size_ } - // Anything more than 64 bits per sample is not supported. + /* Anything more than 64 bits per sample is not supported. */ if (bytesPerSample > 8) { - drwav_zero_memory(pOut, totalSampleCount * sizeof(*pOut)); + DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut)); return; } - // Generic, slow converter. - for (unsigned int i = 0; i < totalSampleCount; ++i) { + /* Generic, slow converter. */ + for (i = 0; i < totalSampleCount; ++i) { drwav_uint64 sample = 0; unsigned int shift = (8 - bytesPerSample) * 8; @@ -2466,7 +3570,7 @@ static void drwav__pcm_to_s16(drwav_int16* pOut, const unsigned char* pIn, size_ } } -static void drwav__ieee_to_s16(drwav_int16* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned short bytesPerSample) +static void drwav__ieee_to_s16(drwav_int16* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned int bytesPerSample) { if (bytesPerSample == 4) { drwav_f32_to_s16(pOut, (const float*)pIn, totalSampleCount); @@ -2475,142 +3579,197 @@ static void drwav__ieee_to_s16(drwav_int16* pOut, const unsigned char* pIn, size drwav_f64_to_s16(pOut, (const double*)pIn, totalSampleCount); return; } else { - // Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. - drwav_zero_memory(pOut, totalSampleCount * sizeof(*pOut)); + /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */ + DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut)); return; } } -drwav_uint64 drwav_read_s16__pcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_s16__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut) { - // Fast path. - if (pWav->bytesPerSample == 2) { - return drwav_read(pWav, samplesToRead, pBufferOut); + drwav_uint32 bytesPerFrame; + drwav_uint64 totalFramesRead; + unsigned char sampleData[4096]; + + /* Fast path. */ + if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 16) { + return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut); + } + + bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav); + if (bytesPerFrame == 0) { + return 0; } - drwav_uint64 totalSamplesRead = 0; - unsigned char sampleData[4096]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData); - if (samplesRead == 0) { + totalFramesRead = 0; + + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData); + if (framesRead == 0) { break; } - drwav__pcm_to_s16(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample); + drwav__pcm_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels); - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_s16__ieee(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_s16__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut) { - drwav_uint64 totalSamplesRead = 0; + drwav_uint64 totalFramesRead; unsigned char sampleData[4096]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData); - if (samplesRead == 0) { + + drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav); + if (bytesPerFrame == 0) { + return 0; + } + + totalFramesRead = 0; + + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData); + if (framesRead == 0) { break; } - drwav__ieee_to_s16(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample); + drwav__ieee_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels); - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_s16__alaw(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_s16__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut) { - drwav_uint64 totalSamplesRead = 0; + drwav_uint64 totalFramesRead; unsigned char sampleData[4096]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData); - if (samplesRead == 0) { + + drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav); + if (bytesPerFrame == 0) { + return 0; + } + + totalFramesRead = 0; + + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData); + if (framesRead == 0) { break; } - drwav_alaw_to_s16(pBufferOut, sampleData, (size_t)samplesRead); + drwav_alaw_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels)); - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_s16__mulaw(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_s16__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut) { - drwav_uint64 totalSamplesRead = 0; + drwav_uint64 totalFramesRead; unsigned char sampleData[4096]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData); - if (samplesRead == 0) { + + drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav); + if (bytesPerFrame == 0) { + return 0; + } + + totalFramesRead = 0; + + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData); + if (framesRead == 0) { break; } - drwav_mulaw_to_s16(pBufferOut, sampleData, (size_t)samplesRead); + drwav_mulaw_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels)); - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_s16(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut) { - if (pWav == NULL || samplesToRead == 0 || pBufferOut == NULL) { + if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) { return 0; } - // Don't try to read more samples than can potentially fit in the output buffer. - if (samplesToRead * sizeof(drwav_int16) > DRWAV_SIZE_MAX) { - samplesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int16); + /* Don't try to read more samples than can potentially fit in the output buffer. */ + if (framesToRead * pWav->channels * sizeof(drwav_int16) > DRWAV_SIZE_MAX) { + framesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int16) / pWav->channels; } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) { - return drwav_read_s16__pcm(pWav, samplesToRead, pBufferOut); - } - - if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) { - return drwav_read_s16__msadpcm(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_s16__pcm(pWav, framesToRead, pBufferOut); } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) { - return drwav_read_s16__ieee(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_s16__ieee(pWav, framesToRead, pBufferOut); } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) { - return drwav_read_s16__alaw(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_s16__alaw(pWav, framesToRead, pBufferOut); } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) { - return drwav_read_s16__mulaw(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_s16__mulaw(pWav, framesToRead, pBufferOut); + } + + if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) { + return drwav_read_pcm_frames_s16__msadpcm(pWav, framesToRead, pBufferOut); } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) { - return drwav_read_s16__ima(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_s16__ima(pWav, framesToRead, pBufferOut); } return 0; } +drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut) +{ + drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToRead, pBufferOut); + if (!drwav__is_little_endian()) { + drwav__bswap_samples_s16(pBufferOut, framesRead*pWav->channels); + } + + return framesRead; +} + +drwav_uint64 drwav_read_pcm_frames_s16be(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut) +{ + drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToRead, pBufferOut); + if (drwav__is_little_endian()) { + drwav__bswap_samples_s16(pBufferOut, framesRead*pWav->channels); + } + + return framesRead; +} + + void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount) { int r; - for (size_t i = 0; i < sampleCount; ++i) { + size_t i; + for (i = 0; i < sampleCount; ++i) { int x = pIn[i]; - r = x - 128; - r = r << 8; + r = x << 8; + r = r - 32768; pOut[i] = (short)r; } } @@ -2618,7 +3777,8 @@ void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCou void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount) { int r; - for (size_t i = 0; i < sampleCount; ++i) { + size_t i; + for (i = 0; i < sampleCount; ++i) { int x = ((int)(((unsigned int)(((const unsigned char*)pIn)[i*3+0]) << 8) | ((unsigned int)(((const unsigned char*)pIn)[i*3+1]) << 16) | ((unsigned int)(((const unsigned char*)pIn)[i*3+2])) << 24)) >> 8; r = x >> 8; pOut[i] = (short)r; @@ -2628,7 +3788,8 @@ void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCo void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount) { int r; - for (size_t i = 0; i < sampleCount; ++i) { + size_t i; + for (i = 0; i < sampleCount; ++i) { int x = pIn[i]; r = x >> 16; pOut[i] = (short)r; @@ -2638,7 +3799,8 @@ void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCo void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, size_t sampleCount) { int r; - for (size_t i = 0; i < sampleCount; ++i) { + size_t i; + for (i = 0; i < sampleCount; ++i) { float x = pIn[i]; float c; c = ((x < -1) ? -1 : ((x > 1) ? 1 : x)); @@ -2652,7 +3814,8 @@ void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, size_t sampleCount) void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount) { int r; - for (size_t i = 0; i < sampleCount; ++i) { + size_t i; + for (i = 0; i < sampleCount; ++i) { double x = pIn[i]; double c; c = ((x < -1) ? -1 : ((x > 1) ? 1 : x)); @@ -2665,29 +3828,33 @@ void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount) void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount) { - for (size_t i = 0; i < sampleCount; ++i) { + size_t i; + for (i = 0; i < sampleCount; ++i) { pOut[i] = drwav__alaw_to_s16(pIn[i]); } } void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount) { - for (size_t i = 0; i < sampleCount; ++i) { + size_t i; + for (i = 0; i < sampleCount; ++i) { pOut[i] = drwav__mulaw_to_s16(pIn[i]); } } -static void drwav__pcm_to_f32(float* pOut, const unsigned char* pIn, size_t sampleCount, unsigned short bytesPerSample) +static void drwav__pcm_to_f32(float* pOut, const unsigned char* pIn, size_t sampleCount, unsigned int bytesPerSample) { - // Special case for 8-bit sample data because it's treated as unsigned. + unsigned int i; + + /* Special case for 8-bit sample data because it's treated as unsigned. */ if (bytesPerSample == 1) { drwav_u8_to_f32(pOut, pIn, sampleCount); return; } - // Slightly more optimal implementation for common formats. + /* Slightly more optimal implementation for common formats. */ if (bytesPerSample == 2) { drwav_s16_to_f32(pOut, (const drwav_int16*)pIn, sampleCount); return; @@ -2702,15 +3869,15 @@ static void drwav__pcm_to_f32(float* pOut, const unsigned char* pIn, size_t samp } - // Anything more than 64 bits per sample is not supported. + /* Anything more than 64 bits per sample is not supported. */ if (bytesPerSample > 8) { - drwav_zero_memory(pOut, sampleCount * sizeof(*pOut)); + DRWAV_ZERO_MEMORY(pOut, sampleCount * sizeof(*pOut)); return; } - // Generic, slow converter. - for (unsigned int i = 0; i < sampleCount; ++i) { + /* Generic, slow converter. */ + for (i = 0; i < sampleCount; ++i) { drwav_uint64 sample = 0; unsigned int shift = (8 - bytesPerSample) * 8; @@ -2725,10 +3892,11 @@ static void drwav__pcm_to_f32(float* pOut, const unsigned char* pIn, size_t samp } } -static void drwav__ieee_to_f32(float* pOut, const unsigned char* pIn, size_t sampleCount, unsigned short bytesPerSample) +static void drwav__ieee_to_f32(float* pOut, const unsigned char* pIn, size_t sampleCount, unsigned int bytesPerSample) { if (bytesPerSample == 4) { - for (unsigned int i = 0; i < sampleCount; ++i) { + unsigned int i; + for (i = 0; i < sampleCount; ++i) { *pOut++ = ((const float*)pIn)[i]; } return; @@ -2736,212 +3904,257 @@ static void drwav__ieee_to_f32(float* pOut, const unsigned char* pIn, size_t sam drwav_f64_to_f32(pOut, (const double*)pIn, sampleCount); return; } else { - // Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. - drwav_zero_memory(pOut, sampleCount * sizeof(*pOut)); + /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */ + DRWAV_ZERO_MEMORY(pOut, sampleCount * sizeof(*pOut)); return; } } -drwav_uint64 drwav_read_f32__pcm(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_f32__pcm(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut) { - if (pWav->bytesPerSample == 0) { + drwav_uint64 totalFramesRead; + unsigned char sampleData[4096]; + + drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav); + if (bytesPerFrame == 0) { return 0; } - drwav_uint64 totalSamplesRead = 0; - unsigned char sampleData[4096]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData); - if (samplesRead == 0) { + totalFramesRead = 0; + + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData); + if (framesRead == 0) { break; } - drwav__pcm_to_f32(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample); - pBufferOut += samplesRead; + drwav__pcm_to_f32(pBufferOut, sampleData, (size_t)framesRead*pWav->channels, bytesPerFrame/pWav->channels); - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_f32__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_f32__msadpcm(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut) { - // We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't - // want to duplicate that code. - drwav_uint64 totalSamplesRead = 0; + /* + We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't + want to duplicate that code. + */ + drwav_uint64 totalFramesRead = 0; drwav_int16 samples16[2048]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read_s16(pWav, drwav_min(samplesToRead, 2048), samples16); - if (samplesRead == 0) { + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16); + if (framesRead == 0) { break; } - drwav_s16_to_f32(pBufferOut, samples16, (size_t)samplesRead); // <-- Safe cast because we're clamping to 2048. + drwav_s16_to_f32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels)); /* <-- Safe cast because we're clamping to 2048. */ - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_f32__ima(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_f32__ima(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut) { - // We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't - // want to duplicate that code. - drwav_uint64 totalSamplesRead = 0; + /* + We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't + want to duplicate that code. + */ + drwav_uint64 totalFramesRead = 0; drwav_int16 samples16[2048]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read_s16(pWav, drwav_min(samplesToRead, 2048), samples16); - if (samplesRead == 0) { + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16); + if (framesRead == 0) { break; } - drwav_s16_to_f32(pBufferOut, samples16, (size_t)samplesRead); // <-- Safe cast because we're clamping to 2048. + drwav_s16_to_f32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels)); /* <-- Safe cast because we're clamping to 2048. */ - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_f32__ieee(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_f32__ieee(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut) { - // Fast path. - if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT && pWav->bytesPerSample == 4) { - return drwav_read(pWav, samplesToRead, pBufferOut); - } + drwav_uint64 totalFramesRead; + unsigned char sampleData[4096]; + drwav_uint32 bytesPerFrame; - if (pWav->bytesPerSample == 0) { + /* Fast path. */ + if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT && pWav->bitsPerSample == 32) { + return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut); + } + + bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav); + if (bytesPerFrame == 0) { return 0; } - drwav_uint64 totalSamplesRead = 0; - unsigned char sampleData[4096]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData); - if (samplesRead == 0) { + totalFramesRead = 0; + + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData); + if (framesRead == 0) { break; } - drwav__ieee_to_f32(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample); + drwav__ieee_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels); - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_f32__alaw(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_f32__alaw(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut) { - if (pWav->bytesPerSample == 0) { + drwav_uint64 totalFramesRead; + unsigned char sampleData[4096]; + drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav); + if (bytesPerFrame == 0) { return 0; } - drwav_uint64 totalSamplesRead = 0; - unsigned char sampleData[4096]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData); - if (samplesRead == 0) { + totalFramesRead = 0; + + while (bytesPerFrame > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData); + if (framesRead == 0) { break; } - drwav_alaw_to_f32(pBufferOut, sampleData, (size_t)samplesRead); + drwav_alaw_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels)); - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_f32__mulaw(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_f32__mulaw(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut) { - if (pWav->bytesPerSample == 0) { + drwav_uint64 totalFramesRead; + unsigned char sampleData[4096]; + + drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav); + if (bytesPerFrame == 0) { return 0; } - drwav_uint64 totalSamplesRead = 0; - unsigned char sampleData[4096]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData); - if (samplesRead == 0) { + totalFramesRead = 0; + + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData); + if (framesRead == 0) { break; } - drwav_mulaw_to_f32(pBufferOut, sampleData, (size_t)samplesRead); + drwav_mulaw_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels)); - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_f32(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut) { - if (pWav == NULL || samplesToRead == 0 || pBufferOut == NULL) { + if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) { return 0; } - // Don't try to read more samples than can potentially fit in the output buffer. - if (samplesToRead * sizeof(float) > DRWAV_SIZE_MAX) { - samplesToRead = DRWAV_SIZE_MAX / sizeof(float); + /* Don't try to read more samples than can potentially fit in the output buffer. */ + if (framesToRead * pWav->channels * sizeof(float) > DRWAV_SIZE_MAX) { + framesToRead = DRWAV_SIZE_MAX / sizeof(float) / pWav->channels; } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) { - return drwav_read_f32__pcm(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_f32__pcm(pWav, framesToRead, pBufferOut); } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) { - return drwav_read_f32__msadpcm(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_f32__msadpcm(pWav, framesToRead, pBufferOut); } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) { - return drwav_read_f32__ieee(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_f32__ieee(pWav, framesToRead, pBufferOut); } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) { - return drwav_read_f32__alaw(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_f32__alaw(pWav, framesToRead, pBufferOut); } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) { - return drwav_read_f32__mulaw(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_f32__mulaw(pWav, framesToRead, pBufferOut); } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) { - return drwav_read_f32__ima(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_f32__ima(pWav, framesToRead, pBufferOut); } return 0; } +drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut) +{ + drwav_uint64 framesRead = drwav_read_pcm_frames_f32(pWav, framesToRead, pBufferOut); + if (!drwav__is_little_endian()) { + drwav__bswap_samples_f32(pBufferOut, framesRead*pWav->channels); + } + + return framesRead; +} + +drwav_uint64 drwav_read_pcm_frames_f32be(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut) +{ + drwav_uint64 framesRead = drwav_read_pcm_frames_f32(pWav, framesToRead, pBufferOut); + if (drwav__is_little_endian()) { + drwav__bswap_samples_f32(pBufferOut, framesRead*pWav->channels); + } + + return framesRead; +} + + void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount) { + size_t i; + if (pOut == NULL || pIn == NULL) { return; } #ifdef DR_WAV_LIBSNDFILE_COMPAT - // It appears libsndfile uses slightly different logic for the u8 -> f32 conversion to dr_wav, which in my opinion is incorrect. It appears - // libsndfile performs the conversion something like "f32 = (u8 / 256) * 2 - 1", however I think it should be "f32 = (u8 / 255) * 2 - 1" (note - // the divisor of 256 vs 255). I use libsndfile as a benchmark for testing, so I'm therefore leaving this block here just for my automated - // correctness testing. This is disabled by default. - for (size_t i = 0; i < sampleCount; ++i) { + /* + It appears libsndfile uses slightly different logic for the u8 -> f32 conversion to dr_wav, which in my opinion is incorrect. It appears + libsndfile performs the conversion something like "f32 = (u8 / 256) * 2 - 1", however I think it should be "f32 = (u8 / 255) * 2 - 1" (note + the divisor of 256 vs 255). I use libsndfile as a benchmark for testing, so I'm therefore leaving this block here just for my automated + correctness testing. This is disabled by default. + */ + for (i = 0; i < sampleCount; ++i) { *pOut++ = (pIn[i] / 256.0f) * 2 - 1; } #else - for (size_t i = 0; i < sampleCount; ++i) { + for (i = 0; i < sampleCount; ++i) { *pOut++ = (pIn[i] / 255.0f) * 2 - 1; } #endif @@ -2949,22 +4162,26 @@ void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount) void drwav_s16_to_f32(float* pOut, const drwav_int16* pIn, size_t sampleCount) { + size_t i; + if (pOut == NULL || pIn == NULL) { return; } - for (size_t i = 0; i < sampleCount; ++i) { + for (i = 0; i < sampleCount; ++i) { *pOut++ = pIn[i] / 32768.0f; } } void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount) { + size_t i; + if (pOut == NULL || pIn == NULL) { return; } - for (size_t i = 0; i < sampleCount; ++i) { + for (i = 0; i < sampleCount; ++i) { unsigned int s0 = pIn[i*3 + 0]; unsigned int s1 = pIn[i*3 + 1]; unsigned int s2 = pIn[i*3 + 2]; @@ -2976,59 +4193,68 @@ void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount) void drwav_s32_to_f32(float* pOut, const drwav_int32* pIn, size_t sampleCount) { + size_t i; if (pOut == NULL || pIn == NULL) { return; } - for (size_t i = 0; i < sampleCount; ++i) { + for (i = 0; i < sampleCount; ++i) { *pOut++ = (float)(pIn[i] / 2147483648.0); } } void drwav_f64_to_f32(float* pOut, const double* pIn, size_t sampleCount) { + size_t i; + if (pOut == NULL || pIn == NULL) { return; } - for (size_t i = 0; i < sampleCount; ++i) { + for (i = 0; i < sampleCount; ++i) { *pOut++ = (float)pIn[i]; } } void drwav_alaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount) { + size_t i; + if (pOut == NULL || pIn == NULL) { return; } - for (size_t i = 0; i < sampleCount; ++i) { + for (i = 0; i < sampleCount; ++i) { *pOut++ = drwav__alaw_to_s16(pIn[i]) / 32768.0f; } } void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount) { + size_t i; + if (pOut == NULL || pIn == NULL) { return; } - for (size_t i = 0; i < sampleCount; ++i) { + for (i = 0; i < sampleCount; ++i) { *pOut++ = drwav__mulaw_to_s16(pIn[i]) / 32768.0f; } } -static void drwav__pcm_to_s32(drwav_int32* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned short bytesPerSample) +static void drwav__pcm_to_s32(drwav_int32* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned int bytesPerSample) { - // Special case for 8-bit sample data because it's treated as unsigned. + unsigned int i; + + /* Special case for 8-bit sample data because it's treated as unsigned. */ if (bytesPerSample == 1) { drwav_u8_to_s32(pOut, pIn, totalSampleCount); return; } - // Slightly more optimal implementation for common formats. + /* Slightly more optimal implementation for common formats. */ if (bytesPerSample == 2) { drwav_s16_to_s32(pOut, (const drwav_int16*)pIn, totalSampleCount); return; @@ -3038,22 +4264,22 @@ static void drwav__pcm_to_s32(drwav_int32* pOut, const unsigned char* pIn, size_ return; } if (bytesPerSample == 4) { - for (unsigned int i = 0; i < totalSampleCount; ++i) { + for (i = 0; i < totalSampleCount; ++i) { *pOut++ = ((const drwav_int32*)pIn)[i]; } return; } - // Anything more than 64 bits per sample is not supported. + /* Anything more than 64 bits per sample is not supported. */ if (bytesPerSample > 8) { - drwav_zero_memory(pOut, totalSampleCount * sizeof(*pOut)); + DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut)); return; } - // Generic, slow converter. - for (unsigned int i = 0; i < totalSampleCount; ++i) { + /* Generic, slow converter. */ + for (i = 0; i < totalSampleCount; ++i) { drwav_uint64 sample = 0; unsigned int shift = (8 - bytesPerSample) * 8; @@ -3068,7 +4294,7 @@ static void drwav__pcm_to_s32(drwav_int32* pOut, const unsigned char* pIn, size_ } } -static void drwav__ieee_to_s32(drwav_int32* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned short bytesPerSample) +static void drwav__ieee_to_s32(drwav_int32* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned int bytesPerSample) { if (bytesPerSample == 4) { drwav_f32_to_s32(pOut, (const float*)pIn, totalSampleCount); @@ -3077,226 +4303,274 @@ static void drwav__ieee_to_s32(drwav_int32* pOut, const unsigned char* pIn, size drwav_f64_to_s32(pOut, (const double*)pIn, totalSampleCount); return; } else { - // Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. - drwav_zero_memory(pOut, totalSampleCount * sizeof(*pOut)); + /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */ + DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut)); return; } } -drwav_uint64 drwav_read_s32__pcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_s32__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut) { - // Fast path. - if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bytesPerSample == 4) { - return drwav_read(pWav, samplesToRead, pBufferOut); - } + drwav_uint64 totalFramesRead; + unsigned char sampleData[4096]; + drwav_uint32 bytesPerFrame; - if (pWav->bytesPerSample == 0) { + /* Fast path. */ + if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 32) { + return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut); + } + + bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav); + if (bytesPerFrame == 0) { return 0; } - drwav_uint64 totalSamplesRead = 0; - unsigned char sampleData[4096]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData); - if (samplesRead == 0) { + totalFramesRead = 0; + + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData); + if (framesRead == 0) { break; } - drwav__pcm_to_s32(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample); + drwav__pcm_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels); - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_s32__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_s32__msadpcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut) { - // We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't - // want to duplicate that code. - drwav_uint64 totalSamplesRead = 0; + /* + We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't + want to duplicate that code. + */ + drwav_uint64 totalFramesRead = 0; drwav_int16 samples16[2048]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read_s16(pWav, drwav_min(samplesToRead, 2048), samples16); - if (samplesRead == 0) { + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16); + if (framesRead == 0) { break; } - drwav_s16_to_s32(pBufferOut, samples16, (size_t)samplesRead); // <-- Safe cast because we're clamping to 2048. + drwav_s16_to_s32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels)); /* <-- Safe cast because we're clamping to 2048. */ - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_s32__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_s32__ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut) { - // We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't - // want to duplicate that code. - drwav_uint64 totalSamplesRead = 0; + /* + We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't + want to duplicate that code. + */ + drwav_uint64 totalFramesRead = 0; drwav_int16 samples16[2048]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read_s16(pWav, drwav_min(samplesToRead, 2048), samples16); - if (samplesRead == 0) { + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16); + if (framesRead == 0) { break; } - drwav_s16_to_s32(pBufferOut, samples16, (size_t)samplesRead); // <-- Safe cast because we're clamping to 2048. + drwav_s16_to_s32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels)); /* <-- Safe cast because we're clamping to 2048. */ - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_s32__ieee(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_s32__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut) { - if (pWav->bytesPerSample == 0) { + drwav_uint64 totalFramesRead; + unsigned char sampleData[4096]; + + drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav); + if (bytesPerFrame == 0) { return 0; } - drwav_uint64 totalSamplesRead = 0; - unsigned char sampleData[4096]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData); - if (samplesRead == 0) { + totalFramesRead = 0; + + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData); + if (framesRead == 0) { break; } - drwav__ieee_to_s32(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample); + drwav__ieee_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels); - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_s32__alaw(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_s32__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut) { - if (pWav->bytesPerSample == 0) { + drwav_uint64 totalFramesRead; + unsigned char sampleData[4096]; + + drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav); + if (bytesPerFrame == 0) { return 0; } - drwav_uint64 totalSamplesRead = 0; - unsigned char sampleData[4096]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData); - if (samplesRead == 0) { + totalFramesRead = 0; + + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData); + if (framesRead == 0) { break; } - drwav_alaw_to_s32(pBufferOut, sampleData, (size_t)samplesRead); + drwav_alaw_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels)); - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_s32__mulaw(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_s32__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut) { - if (pWav->bytesPerSample == 0) { + drwav_uint64 totalFramesRead; + unsigned char sampleData[4096]; + + drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav); + if (bytesPerFrame == 0) { return 0; } - drwav_uint64 totalSamplesRead = 0; - unsigned char sampleData[4096]; - while (samplesToRead > 0) { - drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData); - if (samplesRead == 0) { + totalFramesRead = 0; + + while (framesToRead > 0) { + drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData); + if (framesRead == 0) { break; } - drwav_mulaw_to_s32(pBufferOut, sampleData, (size_t)samplesRead); + drwav_mulaw_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels)); - pBufferOut += samplesRead; - samplesToRead -= samplesRead; - totalSamplesRead += samplesRead; + pBufferOut += framesRead*pWav->channels; + framesToRead -= framesRead; + totalFramesRead += framesRead; } - return totalSamplesRead; + return totalFramesRead; } -drwav_uint64 drwav_read_s32(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut) +drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut) { - if (pWav == NULL || samplesToRead == 0 || pBufferOut == NULL) { + if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) { return 0; } - // Don't try to read more samples than can potentially fit in the output buffer. - if (samplesToRead * sizeof(drwav_int32) > DRWAV_SIZE_MAX) { - samplesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int32); + /* Don't try to read more samples than can potentially fit in the output buffer. */ + if (framesToRead * pWav->channels * sizeof(drwav_int32) > DRWAV_SIZE_MAX) { + framesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int32) / pWav->channels; } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) { - return drwav_read_s32__pcm(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_s32__pcm(pWav, framesToRead, pBufferOut); } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) { - return drwav_read_s32__msadpcm(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_s32__msadpcm(pWav, framesToRead, pBufferOut); } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) { - return drwav_read_s32__ieee(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_s32__ieee(pWav, framesToRead, pBufferOut); } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) { - return drwav_read_s32__alaw(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_s32__alaw(pWav, framesToRead, pBufferOut); } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) { - return drwav_read_s32__mulaw(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_s32__mulaw(pWav, framesToRead, pBufferOut); } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) { - return drwav_read_s32__ima(pWav, samplesToRead, pBufferOut); + return drwav_read_pcm_frames_s32__ima(pWav, framesToRead, pBufferOut); } return 0; } +drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut) +{ + drwav_uint64 framesRead = drwav_read_pcm_frames_s32(pWav, framesToRead, pBufferOut); + if (!drwav__is_little_endian()) { + drwav__bswap_samples_s32(pBufferOut, framesRead*pWav->channels); + } + + return framesRead; +} + +drwav_uint64 drwav_read_pcm_frames_s32be(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut) +{ + drwav_uint64 framesRead = drwav_read_pcm_frames_s32(pWav, framesToRead, pBufferOut); + if (drwav__is_little_endian()) { + drwav__bswap_samples_s32(pBufferOut, framesRead*pWav->channels); + } + + return framesRead; +} + + void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount) { + size_t i; + if (pOut == NULL || pIn == NULL) { return; } - for (size_t i = 0; i < sampleCount; ++i) { + for (i = 0; i < sampleCount; ++i) { *pOut++ = ((int)pIn[i] - 128) << 24; } } void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount) { + size_t i; + if (pOut == NULL || pIn == NULL) { return; } - for (size_t i = 0; i < sampleCount; ++i) { + for (i = 0; i < sampleCount; ++i) { *pOut++ = pIn[i] << 16; } } void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount) { + size_t i; + if (pOut == NULL || pIn == NULL) { return; } - for (size_t i = 0; i < sampleCount; ++i) { + for (i = 0; i < sampleCount; ++i) { unsigned int s0 = pIn[i*3 + 0]; unsigned int s1 = pIn[i*3 + 1]; unsigned int s2 = pIn[i*3 + 2]; @@ -3308,423 +4582,753 @@ void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCo void drwav_f32_to_s32(drwav_int32* pOut, const float* pIn, size_t sampleCount) { + size_t i; + if (pOut == NULL || pIn == NULL) { return; } - for (size_t i = 0; i < sampleCount; ++i) { + for (i = 0; i < sampleCount; ++i) { *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]); } } void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount) { + size_t i; + if (pOut == NULL || pIn == NULL) { return; } - for (size_t i = 0; i < sampleCount; ++i) { + for (i = 0; i < sampleCount; ++i) { *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]); } } void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount) { + size_t i; + if (pOut == NULL || pIn == NULL) { return; } - for (size_t i = 0; i < sampleCount; ++i) { + for (i = 0; i < sampleCount; ++i) { *pOut++ = ((drwav_int32)drwav__alaw_to_s16(pIn[i])) << 16; } } void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount) { + size_t i; + if (pOut == NULL || pIn == NULL) { return; } - for (size_t i= 0; i < sampleCount; ++i) { + for (i= 0; i < sampleCount; ++i) { *pOut++ = ((drwav_int32)drwav__mulaw_to_s16(pIn[i])) << 16; } } -drwav_int16* drwav__read_and_close_s16(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +drwav_int16* drwav__read_pcm_frames_and_close_s16(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount) { - drwav_assert(pWav != NULL); + drwav_uint64 sampleDataSize; + drwav_int16* pSampleData; + drwav_uint64 framesRead; - drwav_uint64 sampleDataSize = pWav->totalSampleCount * sizeof(drwav_int16); + DRWAV_ASSERT(pWav != NULL); + + sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(drwav_int16); if (sampleDataSize > DRWAV_SIZE_MAX) { drwav_uninit(pWav); - return NULL; // File's too big. + return NULL; /* File's too big. */ } - drwav_int16* pSampleData = (drwav_int16*)DRWAV_MALLOC((size_t)sampleDataSize); // <-- Safe cast due to the check above. + pSampleData = (drwav_int16*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */ if (pSampleData == NULL) { drwav_uninit(pWav); - return NULL; // Failed to allocate memory. + return NULL; /* Failed to allocate memory. */ } - drwav_uint64 samplesRead = drwav_read_s16(pWav, (size_t)pWav->totalSampleCount, pSampleData); - if (samplesRead != pWav->totalSampleCount) { - DRWAV_FREE(pSampleData); + framesRead = drwav_read_pcm_frames_s16(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData); + if (framesRead != pWav->totalPCMFrameCount) { + drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks); drwav_uninit(pWav); - return NULL; // There was an error reading the samples. + return NULL; /* There was an error reading the samples. */ } drwav_uninit(pWav); - if (sampleRate) *sampleRate = pWav->sampleRate; - if (channels) *channels = pWav->channels; - if (totalSampleCount) *totalSampleCount = pWav->totalSampleCount; + if (sampleRate) { + *sampleRate = pWav->sampleRate; + } + if (channels) { + *channels = pWav->channels; + } + if (totalFrameCount) { + *totalFrameCount = pWav->totalPCMFrameCount; + } + return pSampleData; } -float* drwav__read_and_close_f32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +float* drwav__read_pcm_frames_and_close_f32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount) { - drwav_assert(pWav != NULL); + drwav_uint64 sampleDataSize; + float* pSampleData; + drwav_uint64 framesRead; + + DRWAV_ASSERT(pWav != NULL); - drwav_uint64 sampleDataSize = pWav->totalSampleCount * sizeof(float); + sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(float); if (sampleDataSize > DRWAV_SIZE_MAX) { drwav_uninit(pWav); - return NULL; // File's too big. + return NULL; /* File's too big. */ } - float* pSampleData = (float*)DRWAV_MALLOC((size_t)sampleDataSize); // <-- Safe cast due to the check above. + pSampleData = (float*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */ if (pSampleData == NULL) { drwav_uninit(pWav); - return NULL; // Failed to allocate memory. + return NULL; /* Failed to allocate memory. */ } - drwav_uint64 samplesRead = drwav_read_f32(pWav, (size_t)pWav->totalSampleCount, pSampleData); - if (samplesRead != pWav->totalSampleCount) { - DRWAV_FREE(pSampleData); + framesRead = drwav_read_pcm_frames_f32(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData); + if (framesRead != pWav->totalPCMFrameCount) { + drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks); drwav_uninit(pWav); - return NULL; // There was an error reading the samples. + return NULL; /* There was an error reading the samples. */ } drwav_uninit(pWav); - if (sampleRate) *sampleRate = pWav->sampleRate; - if (channels) *channels = pWav->channels; - if (totalSampleCount) *totalSampleCount = pWav->totalSampleCount; + if (sampleRate) { + *sampleRate = pWav->sampleRate; + } + if (channels) { + *channels = pWav->channels; + } + if (totalFrameCount) { + *totalFrameCount = pWav->totalPCMFrameCount; + } + return pSampleData; } -drwav_int32* drwav__read_and_close_s32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +drwav_int32* drwav__read_pcm_frames_and_close_s32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount) { - drwav_assert(pWav != NULL); + drwav_uint64 sampleDataSize; + drwav_int32* pSampleData; + drwav_uint64 framesRead; - drwav_uint64 sampleDataSize = pWav->totalSampleCount * sizeof(drwav_int32); + DRWAV_ASSERT(pWav != NULL); + + sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(drwav_int32); if (sampleDataSize > DRWAV_SIZE_MAX) { drwav_uninit(pWav); - return NULL; // File's too big. + return NULL; /* File's too big. */ } - drwav_int32* pSampleData = (drwav_int32*)DRWAV_MALLOC((size_t)sampleDataSize); // <-- Safe cast due to the check above. + pSampleData = (drwav_int32*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */ if (pSampleData == NULL) { drwav_uninit(pWav); - return NULL; // Failed to allocate memory. + return NULL; /* Failed to allocate memory. */ } - drwav_uint64 samplesRead = drwav_read_s32(pWav, (size_t)pWav->totalSampleCount, pSampleData); - if (samplesRead != pWav->totalSampleCount) { - DRWAV_FREE(pSampleData); + framesRead = drwav_read_pcm_frames_s32(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData); + if (framesRead != pWav->totalPCMFrameCount) { + drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks); drwav_uninit(pWav); - return NULL; // There was an error reading the samples. + return NULL; /* There was an error reading the samples. */ } drwav_uninit(pWav); - if (sampleRate) *sampleRate = pWav->sampleRate; - if (channels) *channels = pWav->channels; - if (totalSampleCount) *totalSampleCount = pWav->totalSampleCount; + if (sampleRate) { + *sampleRate = pWav->sampleRate; + } + if (channels) { + *channels = pWav->channels; + } + if (totalFrameCount) { + *totalFrameCount = pWav->totalPCMFrameCount; + } + return pSampleData; } -drwav_int16* drwav_open_and_read_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) -{ - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; +drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks) +{ drwav wav; - if (!drwav_init(&wav, onRead, onSeek, pUserData)) { + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalFrameCountOut) { + *totalFrameCountOut = 0; + } + + if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) { return NULL; } - return drwav__read_and_close_s16(&wav, channels, sampleRate, totalSampleCount); + return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut); } -float* drwav_open_and_read_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; - drwav wav; - if (!drwav_init(&wav, onRead, onSeek, pUserData)) { + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalFrameCountOut) { + *totalFrameCountOut = 0; + } + + if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) { return NULL; } - return drwav__read_and_close_f32(&wav, channels, sampleRate, totalSampleCount); + return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut); } -drwav_int32* drwav_open_and_read_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; - drwav wav; - if (!drwav_init(&wav, onRead, onSeek, pUserData)) { + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalFrameCountOut) { + *totalFrameCountOut = 0; + } + + if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) { return NULL; } - return drwav__read_and_close_s32(&wav, channels, sampleRate, totalSampleCount); + return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut); } #ifndef DR_WAV_NO_STDIO -drwav_int16* drwav_open_and_read_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + drwav wav; + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalFrameCountOut) { + *totalFrameCountOut = 0; + } + + if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) { + return NULL; + } + return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut); +} + +float* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks) +{ drwav wav; - if (!drwav_init_file(&wav, filename)) { + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalFrameCountOut) { + *totalFrameCountOut = 0; + } + + if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) { return NULL; } - return drwav__read_and_close_s16(&wav, channels, sampleRate, totalSampleCount); + return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut); } -float* drwav_open_and_read_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + drwav wav; + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalFrameCountOut) { + *totalFrameCountOut = 0; + } + + if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) { + return NULL; + } + return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut); +} + + +drwav_int16* drwav_open_file_and_read_pcm_frames_s16_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks) +{ drwav wav; - if (!drwav_init_file(&wav, filename)) { + + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (channelsOut) { + *channelsOut = 0; + } + if (totalFrameCountOut) { + *totalFrameCountOut = 0; + } + + if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) { return NULL; } - return drwav__read_and_close_f32(&wav, channels, sampleRate, totalSampleCount); + return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut); } -drwav_int32* drwav_open_and_read_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +float* drwav_open_file_and_read_pcm_frames_f32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + drwav wav; + + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (channelsOut) { + *channelsOut = 0; + } + if (totalFrameCountOut) { + *totalFrameCountOut = 0; + } + + if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) { + return NULL; + } + + return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut); +} +drwav_int32* drwav_open_file_and_read_pcm_frames_s32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks) +{ drwav wav; - if (!drwav_init_file(&wav, filename)) { + + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (channelsOut) { + *channelsOut = 0; + } + if (totalFrameCountOut) { + *totalFrameCountOut = 0; + } + + if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) { return NULL; } - return drwav__read_and_close_s32(&wav, channels, sampleRate, totalSampleCount); + return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut); } #endif -drwav_int16* drwav_open_and_read_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; - drwav wav; - if (!drwav_init_memory(&wav, data, dataSize)) { + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalFrameCountOut) { + *totalFrameCountOut = 0; + } + + if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) { return NULL; } - return drwav__read_and_close_s16(&wav, channels, sampleRate, totalSampleCount); + return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut); } -float* drwav_open_and_read_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +float* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; - drwav wav; - if (!drwav_init_memory(&wav, data, dataSize)) { + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalFrameCountOut) { + *totalFrameCountOut = 0; + } + + if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) { return NULL; } - return drwav__read_and_close_f32(&wav, channels, sampleRate, totalSampleCount); + return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut); } -drwav_int32* drwav_open_and_read_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; - drwav wav; - if (!drwav_init_memory(&wav, data, dataSize)) { + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalFrameCountOut) { + *totalFrameCountOut = 0; + } + + if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) { return NULL; } - return drwav__read_and_close_s32(&wav, channels, sampleRate, totalSampleCount); -} -#endif //DR_WAV_NO_CONVERSION_API - - -void drwav_free(void* pDataReturnedByOpenAndRead) -{ - DRWAV_FREE(pDataReturnedByOpenAndRead); -} - -#endif //DR_WAV_IMPLEMENTATION - - -// REVISION HISTORY -// -// v0.8.5 - 2018-09-11 -// - Const correctness. -// - Fix a potential stack overflow. -// -// v0.8.4 - 2018-08-07 -// - Improve 64-bit detection. -// -// v0.8.3 - 2018-08-05 -// - Fix C++ build on older versions of GCC. -// -// v0.8.2 - 2018-08-02 -// - Fix some big-endian bugs. -// -// v0.8.1 - 2018-06-29 -// - Add support for sequential writing APIs. -// - Disable seeking in write mode. -// - Fix bugs with Wave64. -// - Fix typos. -// -// v0.8 - 2018-04-27 -// - Bug fix. -// - Start using major.minor.revision versioning. -// -// v0.7f - 2018-02-05 -// - Restrict ADPCM formats to a maximum of 2 channels. -// -// v0.7e - 2018-02-02 -// - Fix a crash. -// -// v0.7d - 2018-02-01 -// - Fix a crash. -// -// v0.7c - 2018-02-01 -// - Set drwav.bytesPerSample to 0 for all compressed formats. -// - Fix a crash when reading 16-bit floating point WAV files. In this case dr_wav will output silence for -// all format conversion reading APIs (*_s16, *_s32, *_f32 APIs). -// - Fix some divide-by-zero errors. -// -// v0.7b - 2018-01-22 -// - Fix errors with seeking of compressed formats. -// - Fix compilation error when DR_WAV_NO_CONVERSION_API -// -// v0.7a - 2017-11-17 -// - Fix some GCC warnings. -// -// v0.7 - 2017-11-04 -// - Add writing APIs. -// -// v0.6 - 2017-08-16 -// - API CHANGE: Rename dr_* types to drwav_*. -// - Add support for custom implementations of malloc(), realloc(), etc. -// - Add support for Microsoft ADPCM. -// - Add support for IMA ADPCM (DVI, format code 0x11). -// - Optimizations to drwav_read_s16(). -// - Bug fixes. -// -// v0.5g - 2017-07-16 -// - Change underlying type for booleans to unsigned. -// -// v0.5f - 2017-04-04 -// - Fix a minor bug with drwav_open_and_read_s16() and family. -// -// v0.5e - 2016-12-29 -// - Added support for reading samples as signed 16-bit integers. Use the _s16() family of APIs for this. -// - Minor fixes to documentation. -// -// v0.5d - 2016-12-28 -// - Use drwav_int*/drwav_uint* sized types to improve compiler support. -// -// v0.5c - 2016-11-11 -// - Properly handle JUNK chunks that come before the FMT chunk. -// -// v0.5b - 2016-10-23 -// - A minor change to drwav_bool8 and drwav_bool32 types. -// -// v0.5a - 2016-10-11 -// - Fixed a bug with drwav_open_and_read() and family due to incorrect argument ordering. -// - Improve A-law and mu-law efficiency. -// -// v0.5 - 2016-09-29 -// - API CHANGE. Swap the order of "channels" and "sampleRate" parameters in drwav_open_and_read*(). Rationale for this is to -// keep it consistent with dr_audio and dr_flac. -// -// v0.4b - 2016-09-18 -// - Fixed a typo in documentation. -// -// v0.4a - 2016-09-18 -// - Fixed a typo. -// - Change date format to ISO 8601 (YYYY-MM-DD) -// -// v0.4 - 2016-07-13 -// - API CHANGE. Make onSeek consistent with dr_flac. -// - API CHANGE. Rename drwav_seek() to drwav_seek_to_sample() for clarity and consistency with dr_flac. -// - Added support for Sony Wave64. -// -// v0.3a - 2016-05-28 -// - API CHANGE. Return drwav_bool32 instead of int in onSeek callback. -// - Fixed a memory leak. -// -// v0.3 - 2016-05-22 -// - Lots of API changes for consistency. -// -// v0.2a - 2016-05-16 -// - Fixed Linux/GCC build. -// -// v0.2 - 2016-05-11 -// - Added support for reading data as signed 32-bit PCM for consistency with dr_flac. -// -// v0.1a - 2016-05-07 -// - Fixed a bug in drwav_open_file() where the file handle would not be closed if the loader failed to initialize. -// -// v0.1 - 2016-05-04 -// - Initial versioned release. + return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut); +} +#endif /* DR_WAV_NO_CONVERSION_API */ + + +void drwav_free(void* p, const drwav_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks != NULL) { + drwav__free_from_callbacks(p, pAllocationCallbacks); + } else { + drwav__free_default(p, NULL); + } +} + +#endif /* DR_WAV_IMPLEMENTATION */ + +/* +REVISION HISTORY +================ +v0.11.1 - 2019-10-07 + - Internal code clean up. + +v0.11.0 - 2019-10-06 + - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation + routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs: + - drwav_init() + - drwav_init_ex() + - drwav_init_file() + - drwav_init_file_ex() + - drwav_init_file_w() + - drwav_init_file_w_ex() + - drwav_init_memory() + - drwav_init_memory_ex() + - drwav_init_write() + - drwav_init_write_sequential() + - drwav_init_write_sequential_pcm_frames() + - drwav_init_file_write() + - drwav_init_file_write_sequential() + - drwav_init_file_write_sequential_pcm_frames() + - drwav_init_file_write_w() + - drwav_init_file_write_sequential_w() + - drwav_init_file_write_sequential_pcm_frames_w() + - drwav_init_memory_write() + - drwav_init_memory_write_sequential() + - drwav_init_memory_write_sequential_pcm_frames() + - drwav_open_and_read_pcm_frames_s16() + - drwav_open_and_read_pcm_frames_f32() + - drwav_open_and_read_pcm_frames_s32() + - drwav_open_file_and_read_pcm_frames_s16() + - drwav_open_file_and_read_pcm_frames_f32() + - drwav_open_file_and_read_pcm_frames_s32() + - drwav_open_file_and_read_pcm_frames_s16_w() + - drwav_open_file_and_read_pcm_frames_f32_w() + - drwav_open_file_and_read_pcm_frames_s32_w() + - drwav_open_memory_and_read_pcm_frames_s16() + - drwav_open_memory_and_read_pcm_frames_f32() + - drwav_open_memory_and_read_pcm_frames_s32() + Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use + DRWAV_MALLOC, DRWAV_REALLOC and DRWAV_FREE. + - Add support for reading and writing PCM frames in an explicit endianness. New APIs: + - drwav_read_pcm_frames_le() + - drwav_read_pcm_frames_be() + - drwav_read_pcm_frames_s16le() + - drwav_read_pcm_frames_s16be() + - drwav_read_pcm_frames_f32le() + - drwav_read_pcm_frames_f32be() + - drwav_read_pcm_frames_s32le() + - drwav_read_pcm_frames_s32be() + - drwav_write_pcm_frames_le() + - drwav_write_pcm_frames_be() + - Remove deprecated APIs. + - API CHANGE: The following APIs now return native-endian data. Previously they returned little-endian data. + - drwav_read_pcm_frames() + - drwav_read_pcm_frames_s16() + - drwav_read_pcm_frames_s32() + - drwav_read_pcm_frames_f32() + - drwav_open_and_read_pcm_frames_s16() + - drwav_open_and_read_pcm_frames_s32() + - drwav_open_and_read_pcm_frames_f32() + - drwav_open_file_and_read_pcm_frames_s16() + - drwav_open_file_and_read_pcm_frames_s32() + - drwav_open_file_and_read_pcm_frames_f32() + - drwav_open_file_and_read_pcm_frames_s16_w() + - drwav_open_file_and_read_pcm_frames_s32_w() + - drwav_open_file_and_read_pcm_frames_f32_w() + - drwav_open_memory_and_read_pcm_frames_s16() + - drwav_open_memory_and_read_pcm_frames_s32() + - drwav_open_memory_and_read_pcm_frames_f32() + +v0.10.1 - 2019-08-31 + - Correctly handle partial trailing ADPCM blocks. + +v0.10.0 - 2019-08-04 + - Remove deprecated APIs. + - Add wchar_t variants for file loading APIs: + drwav_init_file_w() + drwav_init_file_ex_w() + drwav_init_file_write_w() + drwav_init_file_write_sequential_w() + - Add drwav_target_write_size_bytes() which calculates the total size in bytes of a WAV file given a format and sample count. + - Add APIs for specifying the PCM frame count instead of the sample count when opening in sequential write mode: + drwav_init_write_sequential_pcm_frames() + drwav_init_file_write_sequential_pcm_frames() + drwav_init_file_write_sequential_pcm_frames_w() + drwav_init_memory_write_sequential_pcm_frames() + - Deprecate drwav_open*() and drwav_close(): + drwav_open() + drwav_open_ex() + drwav_open_write() + drwav_open_write_sequential() + drwav_open_file() + drwav_open_file_ex() + drwav_open_file_write() + drwav_open_file_write_sequential() + drwav_open_memory() + drwav_open_memory_ex() + drwav_open_memory_write() + drwav_open_memory_write_sequential() + drwav_close() + - Minor documentation updates. + +v0.9.2 - 2019-05-21 + - Fix warnings. + +v0.9.1 - 2019-05-05 + - Add support for C89. + - Change license to choice of public domain or MIT-0. + +v0.9.0 - 2018-12-16 + - API CHANGE: Add new reading APIs for reading by PCM frames instead of samples. Old APIs have been deprecated and + will be removed in v0.10.0. Deprecated APIs and their replacements: + drwav_read() -> drwav_read_pcm_frames() + drwav_read_s16() -> drwav_read_pcm_frames_s16() + drwav_read_f32() -> drwav_read_pcm_frames_f32() + drwav_read_s32() -> drwav_read_pcm_frames_s32() + drwav_seek_to_sample() -> drwav_seek_to_pcm_frame() + drwav_write() -> drwav_write_pcm_frames() + drwav_open_and_read_s16() -> drwav_open_and_read_pcm_frames_s16() + drwav_open_and_read_f32() -> drwav_open_and_read_pcm_frames_f32() + drwav_open_and_read_s32() -> drwav_open_and_read_pcm_frames_s32() + drwav_open_file_and_read_s16() -> drwav_open_file_and_read_pcm_frames_s16() + drwav_open_file_and_read_f32() -> drwav_open_file_and_read_pcm_frames_f32() + drwav_open_file_and_read_s32() -> drwav_open_file_and_read_pcm_frames_s32() + drwav_open_memory_and_read_s16() -> drwav_open_memory_and_read_pcm_frames_s16() + drwav_open_memory_and_read_f32() -> drwav_open_memory_and_read_pcm_frames_f32() + drwav_open_memory_and_read_s32() -> drwav_open_memory_and_read_pcm_frames_s32() + drwav::totalSampleCount -> drwav::totalPCMFrameCount + - API CHANGE: Rename drwav_open_and_read_file_*() to drwav_open_file_and_read_*(). + - API CHANGE: Rename drwav_open_and_read_memory_*() to drwav_open_memory_and_read_*(). + - Add built-in support for smpl chunks. + - Add support for firing a callback for each chunk in the file at initialization time. + - This is enabled through the drwav_init_ex(), etc. family of APIs. + - Handle invalid FMT chunks more robustly. + +v0.8.5 - 2018-09-11 + - Const correctness. + - Fix a potential stack overflow. + +v0.8.4 - 2018-08-07 + - Improve 64-bit detection. + +v0.8.3 - 2018-08-05 + - Fix C++ build on older versions of GCC. + +v0.8.2 - 2018-08-02 + - Fix some big-endian bugs. + +v0.8.1 - 2018-06-29 + - Add support for sequential writing APIs. + - Disable seeking in write mode. + - Fix bugs with Wave64. + - Fix typos. + +v0.8 - 2018-04-27 + - Bug fix. + - Start using major.minor.revision versioning. + +v0.7f - 2018-02-05 + - Restrict ADPCM formats to a maximum of 2 channels. + +v0.7e - 2018-02-02 + - Fix a crash. + +v0.7d - 2018-02-01 + - Fix a crash. + +v0.7c - 2018-02-01 + - Set drwav.bytesPerSample to 0 for all compressed formats. + - Fix a crash when reading 16-bit floating point WAV files. In this case dr_wav will output silence for + all format conversion reading APIs (*_s16, *_s32, *_f32 APIs). + - Fix some divide-by-zero errors. + +v0.7b - 2018-01-22 + - Fix errors with seeking of compressed formats. + - Fix compilation error when DR_WAV_NO_CONVERSION_API + +v0.7a - 2017-11-17 + - Fix some GCC warnings. + +v0.7 - 2017-11-04 + - Add writing APIs. + +v0.6 - 2017-08-16 + - API CHANGE: Rename dr_* types to drwav_*. + - Add support for custom implementations of malloc(), realloc(), etc. + - Add support for Microsoft ADPCM. + - Add support for IMA ADPCM (DVI, format code 0x11). + - Optimizations to drwav_read_s16(). + - Bug fixes. + +v0.5g - 2017-07-16 + - Change underlying type for booleans to unsigned. + +v0.5f - 2017-04-04 + - Fix a minor bug with drwav_open_and_read_s16() and family. + +v0.5e - 2016-12-29 + - Added support for reading samples as signed 16-bit integers. Use the _s16() family of APIs for this. + - Minor fixes to documentation. + +v0.5d - 2016-12-28 + - Use drwav_int* and drwav_uint* sized types to improve compiler support. + +v0.5c - 2016-11-11 + - Properly handle JUNK chunks that come before the FMT chunk. + +v0.5b - 2016-10-23 + - A minor change to drwav_bool8 and drwav_bool32 types. + +v0.5a - 2016-10-11 + - Fixed a bug with drwav_open_and_read() and family due to incorrect argument ordering. + - Improve A-law and mu-law efficiency. + +v0.5 - 2016-09-29 + - API CHANGE. Swap the order of "channels" and "sampleRate" parameters in drwav_open_and_read*(). Rationale for this is to + keep it consistent with dr_audio and dr_flac. + +v0.4b - 2016-09-18 + - Fixed a typo in documentation. + +v0.4a - 2016-09-18 + - Fixed a typo. + - Change date format to ISO 8601 (YYYY-MM-DD) + +v0.4 - 2016-07-13 + - API CHANGE. Make onSeek consistent with dr_flac. + - API CHANGE. Rename drwav_seek() to drwav_seek_to_sample() for clarity and consistency with dr_flac. + - Added support for Sony Wave64. + +v0.3a - 2016-05-28 + - API CHANGE. Return drwav_bool32 instead of int in onSeek callback. + - Fixed a memory leak. + +v0.3 - 2016-05-22 + - Lots of API changes for consistency. +v0.2a - 2016-05-16 + - Fixed Linux/GCC build. + +v0.2 - 2016-05-11 + - Added support for reading data as signed 32-bit PCM for consistency with dr_flac. + +v0.1a - 2016-05-07 + - Fixed a bug in drwav_open_file() where the file handle would not be closed if the loader failed to initialize. + +v0.1 - 2016-05-04 + - Initial versioned release. +*/ /* +This software is available as a choice of the following licenses. Choose +whichever you prefer. + +=============================================================================== +ALTERNATIVE 1 - Public Domain (www.unlicense.org) +=============================================================================== This is free and unencumbered software released into the public domain. -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. + +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to <http://unlicense.org/> + +=============================================================================== +ALTERNATIVE 2 - MIT No Attribution +=============================================================================== +Copyright 2018 David Reid + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. */ -// clang-format on diff --git a/include/kfr/io/impl/audiofile-impl.cpp b/include/kfr/io/impl/audiofile-impl.cpp @@ -28,16 +28,21 @@ CMT_PRAGMA_GNU(GCC diagnostic push) CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wimplicit-fallthrough") -#if defined(KFR_ENABLE_WAV) && KFR_ENABLE_WAV +#ifndef KFR_DISABLE_WAV #define DR_WAV_NO_STDIO #define DR_WAV_NO_CONVERSION_API #define DR_WAV_IMPLEMENTATION #include "../dr/dr_wav.h" #endif -#if defined(KFR_ENABLE_FLAC) && KFR_ENABLE_FLAC +#ifndef KFR_DISABLE_FLAC #define DR_FLAC_IMPLEMENTATION #define DR_FLAC_NO_STDIO #include "../dr/dr_flac.h" #endif +#ifndef KFR_DISABLE_MP3 +#define DR_MP3_IMPLEMENTATION +#define DR_MP3_NO_STDIO +#include "../dr/dr_mp3.h" +#endif CMT_PRAGMA_GNU(GCC diagnostic pop) diff --git a/include/kfr/simd/impl/backend_generic.hpp b/include/kfr/simd/impl/backend_generic.hpp @@ -1090,8 +1090,15 @@ KFR_INTRINSIC simd<Tout, N> simd_allones() CMT_NOEXCEPT } /// @brief Converts input vector to vector with subtype Tout -template <typename Tout, typename Tin, size_t N, size_t Nout = (sizeof(Tin) * N / sizeof(Tout)), - KFR_ENABLE_IF(Nout == 1 || N == 1)> +template <typename Tout, typename Tin, size_t N, size_t Nout = (sizeof(Tin) * N / sizeof(Tout)) +#ifdef _MSC_VER + , + KFR_ENABLE_IF((Nout == 1 || N == 1) && !is_same<Tout, Tin>::value) +#else + , + KFR_ENABLE_IF(Nout == 1 || N == 1) +#endif + > KFR_INTRINSIC simd<Tout, Nout> simd_bitcast(simd_cvt_t<Tout, Tin, N>, const simd<Tin, N>& x) CMT_NOEXCEPT { not_optimized(CMT_FUNC_SIGNATURE); @@ -1099,8 +1106,15 @@ KFR_INTRINSIC simd<Tout, Nout> simd_bitcast(simd_cvt_t<Tout, Tin, N>, const simd } /// @brief Converts input vector to vector with subtype Tout -template <typename Tout, typename Tin, size_t N, size_t Nout = (sizeof(Tin) * N / sizeof(Tout)), - KFR_ENABLE_IF(Nout > 1 && N > 1)> +template <typename Tout, typename Tin, size_t N, size_t Nout = (sizeof(Tin) * N / sizeof(Tout)) +#ifdef _MSC_VER + , + KFR_ENABLE_IF(Nout > 1 && N > 1 && !is_same<Tout, Tin>::value) +#else + , + KFR_ENABLE_IF(Nout > 1 && N > 1) +#endif + > KFR_INTRINSIC simd<Tout, Nout> simd_bitcast(simd_cvt_t<Tout, Tin, N>, const simd<Tin, N>& x) CMT_NOEXCEPT { constexpr size_t Nlow = prev_poweroftwo(N - 1); diff --git a/include/kfr/simd/shuffle.hpp b/include/kfr/simd/shuffle.hpp @@ -144,12 +144,12 @@ KFR_INTRINSIC vec<T, Nout> extend(const vec<T, 1>& x) { return vec<T, Nout>(x.front()); } -template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(N != Nout)> +template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(N != Nout && N > 1)> KFR_INTRINSIC vec<T, Nout> extend(const vec<T, N>& x) { return x.shuffle(csizeseq<Nout>); } -template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(N == Nout)> +template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(N == Nout && N > 1)> constexpr KFR_INTRINSIC const vec<T, Nout>& extend(const vec<T, N>& x) { return x; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt @@ -20,6 +20,12 @@ cmake_minimum_required(VERSION 3.1) add_definitions(-DKFR_TESTING=1) add_definitions(-DKFR_SRC_DIR=\"${CMAKE_SOURCE_DIR}\") +if (MSVC) + link_libraries(-DEBUG) +else () + add_compile_options(-g) +endif () + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") add_compile_options(-Wno-parentheses) endif () @@ -67,7 +73,8 @@ set(ALL_TESTS_CPP expression_test.cpp intrinsic_test.cpp io_test.cpp - ${KFR_UNITTEST_SRC}) + ${KFR_UNITTEST_SRC} + ) # set(ALL_TESTS_MERGED_CPP all_tests_merged.cpp) diff --git a/tests/asm_test.cpp b/tests/asm_test.cpp @@ -7,7 +7,9 @@ #define KFR_EXTENDED_TESTS #include <kfr/base.hpp> +#ifdef KFR_ENABLE_DFT #include <kfr/dft/impl/fft-impl.hpp> +#endif #include <kfr/io.hpp> #include <kfr/testo/console_colors.hpp> @@ -245,6 +247,8 @@ TEST_ASM_UIF(read, TEST_READ) TEST_ASM_UIF(write, TEST_WRITE) +#ifdef KFR_ENABLE_DFT + #define TEST_FFT_SPEC(ty, size) \ static intrinsics::fft_specialization<ty, size> fft__##ty##__##size(static_cast<size_t>(1 << size)); \ KFR_PUBLIC void asm__test__fft__##ty##__##size(complex<ty>* out, const complex<ty>* in, u8* temp) \ @@ -280,6 +284,8 @@ TEST_FFT_GEN(f64) #endif +#endif + TEST_ASM_F(sin, TEST_ASM_VTY1_F) TEST_ASM_F(cos, TEST_ASM_VTY1_F) diff --git a/tests/io_test.cpp b/tests/io_test.cpp @@ -15,7 +15,7 @@ using namespace kfr; namespace CMT_ARCH_NAME { -#if KFR_ENABLE_WAV +#ifndef KFR_DISABLE_WAV TEST(write_wav_file) { audio_writer_wav<float> writer(open_file_for_writing(KFR_FILEPATH("temp_audio_file.wav")), @@ -41,10 +41,10 @@ TEST(read_wav_file) } #endif -#if KFR_ENABLE_FLAC -DTEST(read_flac_file) +#ifndef KFR_DISABLE_FLAC +TEST(read_flac_file) { - audio_reader_flac<float> reader(open_file_for_reading(KFR_FILEPATH("../../tests/test-audio/sine.flac"))); + audio_reader_flac<float> reader(open_file_for_reading(KFR_FILEPATH(KFR_SRC_DIR "/tests/test-audio/sine.flac"))); CHECK(reader.format().channels == 2u); CHECK(reader.format().type == audio_sample_type::i32); CHECK(reader.format().samplerate == 44100); @@ -55,6 +55,22 @@ DTEST(read_flac_file) CHECK(absmaxof(data - render(sin(counter() * 0.01f), data.size())) < 0.0001f); } #endif + +#ifndef KFR_DISABLE_MP3 +TEST(read_mp3_file) +{ + audio_reader_mp3<float> reader(open_file_for_reading(KFR_FILEPATH(KFR_SRC_DIR "/tests/test-audio/sine.mp3"))); + CHECK(reader.format().channels == 2u); + CHECK(reader.format().type == audio_sample_type::i16); + CHECK(reader.format().samplerate == 44100); + univector<float> data(44100 * 2); + CHECK(reader.format().length >= data.size() / 2); + size_t rd = reader.read(data.data(), data.size()); + CHECK(rd == data.size()); + data = data.slice(2402, 2 * 44100); // MP3 format delay + CHECK(absmaxof(data - render(sin(counter() * 0.01f), data.size())) < 0.005f); +} +#endif } // namespace CMT_ARCH_NAME #ifndef KFR_NO_MAIN diff --git a/tests/test-audio/sine.mp3 b/tests/test-audio/sine.mp3 Binary files differ.