dr_flac.h (466848B)
1 /* 2 FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file. 3 dr_flac - v0.12.2 - 2019-10-07 4 5 David Reid - [email protected] 6 */ 7 8 /* 9 RELEASE NOTES - v0.12.0 10 ======================= 11 Version 0.12.0 has breaking API changes including changes to the existing API and the removal of deprecated APIs. 12 13 14 Improved Client-Defined Memory Allocation 15 ----------------------------------------- 16 The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The 17 existing system of DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE are still in place and will be used by default when no custom 18 allocation callbacks are specified. 19 20 To use the new system, you pass in a pointer to a drflac_allocation_callbacks object to drflac_open() and family, like this: 21 22 void* my_malloc(size_t sz, void* pUserData) 23 { 24 return malloc(sz); 25 } 26 void* my_realloc(void* p, size_t sz, void* pUserData) 27 { 28 return realloc(p, sz); 29 } 30 void my_free(void* p, void* pUserData) 31 { 32 free(p); 33 } 34 35 ... 36 37 drflac_allocation_callbacks allocationCallbacks; 38 allocationCallbacks.pUserData = &myData; 39 allocationCallbacks.onMalloc = my_malloc; 40 allocationCallbacks.onRealloc = my_realloc; 41 allocationCallbacks.onFree = my_free; 42 drflac* pFlac = drflac_open_file("my_file.flac", &allocationCallbacks); 43 44 The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines. 45 46 Passing in null for the allocation callbacks object will cause dr_flac to use defaults which is the same as DRFLAC_MALLOC, 47 DRFLAC_REALLOC and DRFLAC_FREE and the equivalent of how it worked in previous versions. 48 49 Every API that opens a drflac object now takes this extra parameter. These include the following: 50 51 drflac_open() 52 drflac_open_relaxed() 53 drflac_open_with_metadata() 54 drflac_open_with_metadata_relaxed() 55 drflac_open_file() 56 drflac_open_file_with_metadata() 57 drflac_open_memory() 58 drflac_open_memory_with_metadata() 59 drflac_open_and_read_pcm_frames_s32() 60 drflac_open_and_read_pcm_frames_s16() 61 drflac_open_and_read_pcm_frames_f32() 62 drflac_open_file_and_read_pcm_frames_s32() 63 drflac_open_file_and_read_pcm_frames_s16() 64 drflac_open_file_and_read_pcm_frames_f32() 65 drflac_open_memory_and_read_pcm_frames_s32() 66 drflac_open_memory_and_read_pcm_frames_s16() 67 drflac_open_memory_and_read_pcm_frames_f32() 68 69 70 71 Optimizations 72 ------------- 73 Seeking performance has been greatly improved. A new binary search based seeking algorithm has been introduced which significantly 74 improves performance over the brute force method which was used when no seek table was present. Seek table based seeking also takes 75 advantage of the new binary search seeking system to further improve performance there as well. Note that this depends on CRC which 76 means it will be disabled when DR_FLAC_NO_CRC is used. 77 78 The SSE4.1 pipeline has been cleaned up and optimized. You should see some improvements with decoding speed of 24-bit files in 79 particular. 16-bit streams should also see some improvement. 80 81 drflac_read_pcm_frames_s16() has been optimized. Previously this sat on top of drflac_read_pcm_frames_s32() and performed it's s32 82 to s16 conversion in a second pass. This is now all done in a single pass. This includes SSE2 and ARM NEON optimized paths. 83 84 A minor optimization has been implemented for drflac_read_pcm_frames_s32(). This will now use an SSE2 optimized pipeline for stereo 85 channel reconstruction which is the last part of the decoding process. 86 87 The ARM build has seen a few improvements. The CLZ (count leading zeroes) and REV (byte swap) instructions are now used when 88 compiling with GCC and Clang which is achieved using inline assembly. The CLZ instruction requires ARM architecture version 5 at 89 compile time and the REV instruction requires ARM architecture version 6. 90 91 An ARM NEON optimized pipeline has been implemented. To enable this you'll need to add -mfpu=neon to the command line when compiling. 92 93 94 Removed APIs 95 ------------ 96 The following APIs were deprecated in version 0.11.0 and have been completely removed in version 0.12.0: 97 98 drflac_read_s32() -> drflac_read_pcm_frames_s32() 99 drflac_read_s16() -> drflac_read_pcm_frames_s16() 100 drflac_read_f32() -> drflac_read_pcm_frames_f32() 101 drflac_seek_to_sample() -> drflac_seek_to_pcm_frame() 102 drflac_open_and_decode_s32() -> drflac_open_and_read_pcm_frames_s32() 103 drflac_open_and_decode_s16() -> drflac_open_and_read_pcm_frames_s16() 104 drflac_open_and_decode_f32() -> drflac_open_and_read_pcm_frames_f32() 105 drflac_open_and_decode_file_s32() -> drflac_open_file_and_read_pcm_frames_s32() 106 drflac_open_and_decode_file_s16() -> drflac_open_file_and_read_pcm_frames_s16() 107 drflac_open_and_decode_file_f32() -> drflac_open_file_and_read_pcm_frames_f32() 108 drflac_open_and_decode_memory_s32() -> drflac_open_memory_and_read_pcm_frames_s32() 109 drflac_open_and_decode_memory_s16() -> drflac_open_memory_and_read_pcm_frames_s16() 110 drflac_open_and_decode_memory_f32() -> drflac_open_memroy_and_read_pcm_frames_f32() 111 112 Prior versions of dr_flac operated on a per-sample basis whereas now it operates on PCM frames. The removed APIs all relate 113 to the old per-sample APIs. You now need to use the "pcm_frame" versions. 114 */ 115 116 117 /* 118 USAGE 119 ===== 120 dr_flac is a single-file library. To use it, do something like the following in one .c file. 121 122 #define DR_FLAC_IMPLEMENTATION 123 #include "dr_flac.h" 124 125 You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, 126 do something like the following: 127 128 drflac* pFlac = drflac_open_file("MySong.flac", NULL); 129 if (pFlac == NULL) { 130 // Failed to open FLAC file 131 } 132 133 drflac_int32* pSamples = malloc(pFlac->totalPCMFrameCount * pFlac->channels * sizeof(drflac_int32)); 134 drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_pcm_frames_s32(pFlac, pFlac->totalPCMFrameCount, pSamples); 135 136 The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of 137 channels and the bits per sample, should be directly accessible - just make sure you don't change their values. Samples are 138 always output as interleaved signed 32-bit PCM. In the example above a native FLAC stream was opened, however dr_flac has 139 seamless support for Ogg encapsulated FLAC streams as well. 140 141 You do not need to decode the entire stream in one go - you just specify how many samples you'd like at any given time and 142 the decoder will give you as many samples as it can, up to the amount requested. Later on when you need the next batch of 143 samples, just call it again. Example: 144 145 while (drflac_read_pcm_frames_s32(pFlac, chunkSizeInPCMFrames, pChunkSamples) > 0) { 146 do_something(); 147 } 148 149 You can seek to a specific sample with drflac_seek_to_sample(). The given sample is based on interleaving. So for example, 150 if you were to seek to the sample at index 0 in a stereo stream, you'll be seeking to the first sample of the left channel. 151 The sample at index 1 will be the first sample of the right channel. The sample at index 2 will be the second sample of the 152 left channel, etc. 153 154 155 If you just want to quickly decode an entire FLAC file in one go you can do something like this: 156 157 unsigned int channels; 158 unsigned int sampleRate; 159 drflac_uint64 totalPCMFrameCount; 160 drflac_int32* pSampleData = drflac_open_file_and_read_pcm_frames_s32("MySong.flac", &channels, &sampleRate, &totalPCMFrameCount, NULL); 161 if (pSampleData == NULL) { 162 // Failed to open and decode FLAC file. 163 } 164 165 ... 166 167 drflac_free(pSampleData); 168 169 170 You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs 171 respectively, but note that these should be considered lossy. 172 173 174 If you need access to metadata (album art, etc.), use drflac_open_with_metadata(), drflac_open_file_with_metdata() or 175 drflac_open_memory_with_metadata(). The rationale for keeping these APIs separate is that they're slightly slower than the 176 normal versions and also just a little bit harder to use. 177 178 dr_flac reports metadata to the application through the use of a callback, and every metadata block is reported before 179 drflac_open_with_metdata() returns. 180 181 182 The main opening APIs (drflac_open(), etc.) will fail if the header is not present. The presents a problem in certain 183 scenarios such as broadcast style streams or internet radio where the header may not be present because the user has 184 started playback mid-stream. To handle this, use the relaxed APIs: drflac_open_relaxed() and drflac_open_with_metadata_relaxed(). 185 186 It is not recommended to use these APIs for file based streams because a missing header would usually indicate a 187 corrupt or perverse file. In addition, these APIs can take a long time to initialize because they may need to spend 188 a lot of time finding the first frame. 189 190 191 192 OPTIONS 193 ======= 194 #define these options before including this file. 195 196 #define DR_FLAC_NO_STDIO 197 Disable drflac_open_file() and family. 198 199 #define DR_FLAC_NO_OGG 200 Disables support for Ogg/FLAC streams. 201 202 #define DR_FLAC_BUFFER_SIZE <number> 203 Defines the size of the internal buffer to store data from onRead(). This buffer is used to reduce the number of calls 204 back to the client for more data. Larger values means more memory, but better performance. My tests show diminishing 205 returns after about 4KB (which is the default). Consider reducing this if you have a very efficient implementation of 206 onRead(), or increase it if it's very inefficient. Must be a multiple of 8. 207 208 #define DR_FLAC_NO_CRC 209 Disables CRC checks. This will offer a performance boost when CRC is unnecessary. This will disable binary search seeking. 210 When seeking, the seek table will be used if available. Otherwise the seek will be performed using brute force. 211 212 #define DR_FLAC_NO_SIMD 213 Disables SIMD optimizations (SSE on x86/x64 architectures, NEON on ARM architectures). Use this if you are having 214 compatibility issues with your compiler. 215 216 217 218 QUICK NOTES 219 =========== 220 - dr_flac does not currently support changing the sample rate nor channel count mid stream. 221 - This has not been tested on big-endian architectures. 222 - dr_flac is not thread-safe, but its APIs can be called from any thread so long as you do your own synchronization. 223 - When using Ogg encapsulation, a corrupted metadata block will result in drflac_open_with_metadata() and drflac_open() 224 returning inconsistent samples. 225 */ 226 227 #ifndef dr_flac_h 228 #define dr_flac_h 229 230 #include <stddef.h> 231 232 #if defined(_MSC_VER) && _MSC_VER < 1600 233 typedef signed char drflac_int8; 234 typedef unsigned char drflac_uint8; 235 typedef signed short drflac_int16; 236 typedef unsigned short drflac_uint16; 237 typedef signed int drflac_int32; 238 typedef unsigned int drflac_uint32; 239 typedef signed __int64 drflac_int64; 240 typedef unsigned __int64 drflac_uint64; 241 #else 242 #include <stdint.h> 243 typedef int8_t drflac_int8; 244 typedef uint8_t drflac_uint8; 245 typedef int16_t drflac_int16; 246 typedef uint16_t drflac_uint16; 247 typedef int32_t drflac_int32; 248 typedef uint32_t drflac_uint32; 249 typedef int64_t drflac_int64; 250 typedef uint64_t drflac_uint64; 251 #endif 252 typedef drflac_uint8 drflac_bool8; 253 typedef drflac_uint32 drflac_bool32; 254 #define DRFLAC_TRUE 1 255 #define DRFLAC_FALSE 0 256 257 #if defined(_MSC_VER) && _MSC_VER >= 1700 /* Visual Studio 2012 */ 258 #define DRFLAC_DEPRECATED __declspec(deprecated) 259 #elif (defined(__GNUC__) && __GNUC__ >= 4) /* GCC 4 */ 260 #define DRFLAC_DEPRECATED __attribute__((deprecated)) 261 #elif defined(__has_feature) /* Clang */ 262 #if __has_feature(attribute_deprecated) 263 #define DRFLAC_DEPRECATED __attribute__((deprecated)) 264 #else 265 #define DRFLAC_DEPRECATED 266 #endif 267 #else 268 #define DRFLAC_DEPRECATED 269 #endif 270 271 /* 272 As data is read from the client it is placed into an internal buffer for fast access. This controls the 273 size of that buffer. Larger values means more speed, but also more memory. In my testing there is diminishing 274 returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8. 275 */ 276 #ifndef DR_FLAC_BUFFER_SIZE 277 #define DR_FLAC_BUFFER_SIZE 4096 278 #endif 279 280 #ifdef __cplusplus 281 extern "C" { 282 #endif 283 284 /* Check if we can enable 64-bit optimizations. */ 285 #if defined(_WIN64) || defined(_LP64) || defined(__LP64__) 286 #define DRFLAC_64BIT 287 #endif 288 289 #ifdef DRFLAC_64BIT 290 typedef drflac_uint64 drflac_cache_t; 291 #else 292 typedef drflac_uint32 drflac_cache_t; 293 #endif 294 295 /* The various metadata block types. */ 296 #define DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO 0 297 #define DRFLAC_METADATA_BLOCK_TYPE_PADDING 1 298 #define DRFLAC_METADATA_BLOCK_TYPE_APPLICATION 2 299 #define DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE 3 300 #define DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT 4 301 #define DRFLAC_METADATA_BLOCK_TYPE_CUESHEET 5 302 #define DRFLAC_METADATA_BLOCK_TYPE_PICTURE 6 303 #define DRFLAC_METADATA_BLOCK_TYPE_INVALID 127 304 305 /* The various picture types specified in the PICTURE block. */ 306 #define DRFLAC_PICTURE_TYPE_OTHER 0 307 #define DRFLAC_PICTURE_TYPE_FILE_ICON 1 308 #define DRFLAC_PICTURE_TYPE_OTHER_FILE_ICON 2 309 #define DRFLAC_PICTURE_TYPE_COVER_FRONT 3 310 #define DRFLAC_PICTURE_TYPE_COVER_BACK 4 311 #define DRFLAC_PICTURE_TYPE_LEAFLET_PAGE 5 312 #define DRFLAC_PICTURE_TYPE_MEDIA 6 313 #define DRFLAC_PICTURE_TYPE_LEAD_ARTIST 7 314 #define DRFLAC_PICTURE_TYPE_ARTIST 8 315 #define DRFLAC_PICTURE_TYPE_CONDUCTOR 9 316 #define DRFLAC_PICTURE_TYPE_BAND 10 317 #define DRFLAC_PICTURE_TYPE_COMPOSER 11 318 #define DRFLAC_PICTURE_TYPE_LYRICIST 12 319 #define DRFLAC_PICTURE_TYPE_RECORDING_LOCATION 13 320 #define DRFLAC_PICTURE_TYPE_DURING_RECORDING 14 321 #define DRFLAC_PICTURE_TYPE_DURING_PERFORMANCE 15 322 #define DRFLAC_PICTURE_TYPE_SCREEN_CAPTURE 16 323 #define DRFLAC_PICTURE_TYPE_BRIGHT_COLORED_FISH 17 324 #define DRFLAC_PICTURE_TYPE_ILLUSTRATION 18 325 #define DRFLAC_PICTURE_TYPE_BAND_LOGOTYPE 19 326 #define DRFLAC_PICTURE_TYPE_PUBLISHER_LOGOTYPE 20 327 328 typedef enum 329 { 330 drflac_container_native, 331 drflac_container_ogg, 332 drflac_container_unknown 333 } drflac_container; 334 335 typedef enum 336 { 337 drflac_seek_origin_start, 338 drflac_seek_origin_current 339 } drflac_seek_origin; 340 341 /* Packing is important on this structure because we map this directly to the raw data within the SEEKTABLE metadata block. */ 342 #pragma pack(2) 343 typedef struct 344 { 345 drflac_uint64 firstPCMFrame; 346 drflac_uint64 flacFrameOffset; /* The offset from the first byte of the header of the first frame. */ 347 drflac_uint16 pcmFrameCount; 348 } drflac_seekpoint; 349 #pragma pack() 350 351 typedef struct 352 { 353 drflac_uint16 minBlockSizeInPCMFrames; 354 drflac_uint16 maxBlockSizeInPCMFrames; 355 drflac_uint32 minFrameSizeInPCMFrames; 356 drflac_uint32 maxFrameSizeInPCMFrames; 357 drflac_uint32 sampleRate; 358 drflac_uint8 channels; 359 drflac_uint8 bitsPerSample; 360 drflac_uint64 totalPCMFrameCount; 361 drflac_uint8 md5[16]; 362 } drflac_streaminfo; 363 364 typedef struct 365 { 366 /* The metadata type. Use this to know how to interpret the data below. */ 367 drflac_uint32 type; 368 369 /* 370 A pointer to the raw data. This points to a temporary buffer so don't hold on to it. It's best to 371 not modify the contents of this buffer. Use the structures below for more meaningful and structured 372 information about the metadata. It's possible for this to be null. 373 */ 374 const void* pRawData; 375 376 /* The size in bytes of the block and the buffer pointed to by pRawData if it's non-NULL. */ 377 drflac_uint32 rawDataSize; 378 379 union 380 { 381 drflac_streaminfo streaminfo; 382 383 struct 384 { 385 int unused; 386 } padding; 387 388 struct 389 { 390 drflac_uint32 id; 391 const void* pData; 392 drflac_uint32 dataSize; 393 } application; 394 395 struct 396 { 397 drflac_uint32 seekpointCount; 398 const drflac_seekpoint* pSeekpoints; 399 } seektable; 400 401 struct 402 { 403 drflac_uint32 vendorLength; 404 const char* vendor; 405 drflac_uint32 commentCount; 406 const void* pComments; 407 } vorbis_comment; 408 409 struct 410 { 411 char catalog[128]; 412 drflac_uint64 leadInSampleCount; 413 drflac_bool32 isCD; 414 drflac_uint8 trackCount; 415 const void* pTrackData; 416 } cuesheet; 417 418 struct 419 { 420 drflac_uint32 type; 421 drflac_uint32 mimeLength; 422 const char* mime; 423 drflac_uint32 descriptionLength; 424 const char* description; 425 drflac_uint32 width; 426 drflac_uint32 height; 427 drflac_uint32 colorDepth; 428 drflac_uint32 indexColorCount; 429 drflac_uint32 pictureDataSize; 430 const drflac_uint8* pPictureData; 431 } picture; 432 } data; 433 } drflac_metadata; 434 435 436 /* 437 Callback for when data needs to be read from the client. 438 439 pUserData [in] The user data that was passed to drflac_open() and family. 440 pBufferOut [out] The output buffer. 441 bytesToRead [in] The number of bytes to read. 442 443 Returns the number of bytes actually read. 444 445 A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until 446 either the entire bytesToRead is filled or you have reached the end of the stream. 447 */ 448 typedef size_t (* drflac_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead); 449 450 /* 451 Callback for when data needs to be seeked. 452 453 pUserData [in] The user data that was passed to drflac_open() and family. 454 offset [in] The number of bytes to move, relative to the origin. Will never be negative. 455 origin [in] The origin of the seek - the current position or the start of the stream. 456 457 Returns whether or not the seek was successful. 458 459 The offset will never be negative. Whether or not it is relative to the beginning or current position is determined 460 by the "origin" parameter which will be either drflac_seek_origin_start or drflac_seek_origin_current. 461 462 When seeking to a PCM frame using drflac_seek_to_pcm_frame(), dr_flac may call this with an offset beyond the end of 463 the FLAC stream. This needs to be detected and handled by returning DRFLAC_FALSE. 464 */ 465 typedef drflac_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin); 466 467 /* 468 Callback for when a metadata block is read. 469 470 pUserData [in] The user data that was passed to drflac_open() and family. 471 pMetadata [in] A pointer to a structure containing the data of the metadata block. 472 473 Use pMetadata->type to determine which metadata block is being handled and how to read the data. 474 */ 475 typedef void (* drflac_meta_proc)(void* pUserData, drflac_metadata* pMetadata); 476 477 478 typedef struct 479 { 480 void* pUserData; 481 void* (* onMalloc)(size_t sz, void* pUserData); 482 void* (* onRealloc)(void* p, size_t sz, void* pUserData); 483 void (* onFree)(void* p, void* pUserData); 484 } drflac_allocation_callbacks; 485 486 /* Structure for internal use. Only used for decoders opened with drflac_open_memory. */ 487 typedef struct 488 { 489 const drflac_uint8* data; 490 size_t dataSize; 491 size_t currentReadPos; 492 } drflac__memory_stream; 493 494 /* Structure for internal use. Used for bit streaming. */ 495 typedef struct 496 { 497 /* The function to call when more data needs to be read. */ 498 drflac_read_proc onRead; 499 500 /* The function to call when the current read position needs to be moved. */ 501 drflac_seek_proc onSeek; 502 503 /* The user data to pass around to onRead and onSeek. */ 504 void* pUserData; 505 506 507 /* 508 The number of unaligned bytes in the L2 cache. This will always be 0 until the end of the stream is hit. At the end of the 509 stream there will be a number of bytes that don't cleanly fit in an L1 cache line, so we use this variable to know whether 510 or not the bistreamer needs to run on a slower path to read those last bytes. This will never be more than sizeof(drflac_cache_t). 511 */ 512 size_t unalignedByteCount; 513 514 /* The content of the unaligned bytes. */ 515 drflac_cache_t unalignedCache; 516 517 /* The index of the next valid cache line in the "L2" cache. */ 518 drflac_uint32 nextL2Line; 519 520 /* The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining. */ 521 drflac_uint32 consumedBits; 522 523 /* 524 The cached data which was most recently read from the client. There are two levels of cache. Data flows as such: 525 Client -> L2 -> L1. The L2 -> L1 movement is aligned and runs on a fast path in just a few instructions. 526 */ 527 drflac_cache_t cacheL2[DR_FLAC_BUFFER_SIZE/sizeof(drflac_cache_t)]; 528 drflac_cache_t cache; 529 530 /* 531 CRC-16. This is updated whenever bits are read from the bit stream. Manually set this to 0 to reset the CRC. For FLAC, this 532 is reset to 0 at the beginning of each frame. 533 */ 534 drflac_uint16 crc16; 535 drflac_cache_t crc16Cache; /* A cache for optimizing CRC calculations. This is filled when when the L1 cache is reloaded. */ 536 drflac_uint32 crc16CacheIgnoredBytes; /* The number of bytes to ignore when updating the CRC-16 from the CRC-16 cache. */ 537 } drflac_bs; 538 539 typedef struct 540 { 541 /* The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC. */ 542 drflac_uint8 subframeType; 543 544 /* The number of wasted bits per sample as specified by the sub-frame header. */ 545 drflac_uint8 wastedBitsPerSample; 546 547 /* The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC. */ 548 drflac_uint8 lpcOrder; 549 550 /* A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData. */ 551 drflac_int32* pSamplesS32; 552 } drflac_subframe; 553 554 typedef struct 555 { 556 /* 557 If the stream uses variable block sizes, this will be set to the index of the first PCM frame. If fixed block sizes are used, this will 558 always be set to 0. 559 */ 560 drflac_uint64 pcmFrameNumber; 561 562 /* If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0. */ 563 drflac_uint32 flacFrameNumber; 564 565 /* The sample rate of this frame. */ 566 drflac_uint32 sampleRate; 567 568 /* The number of PCM frames in each sub-frame within this frame. */ 569 drflac_uint16 blockSizeInPCMFrames; 570 571 /* 572 The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this 573 will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE. 574 */ 575 drflac_uint8 channelAssignment; 576 577 /* The number of bits per sample within this frame. */ 578 drflac_uint8 bitsPerSample; 579 580 /* The frame's CRC. */ 581 drflac_uint8 crc8; 582 } drflac_frame_header; 583 584 typedef struct 585 { 586 /* The header. */ 587 drflac_frame_header header; 588 589 /* 590 The number of PCM frames left to be read in this FLAC frame. This is initially set to the block size. As PCM frames are read, 591 this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame. 592 */ 593 drflac_uint32 pcmFramesRemaining; 594 595 /* The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels. */ 596 drflac_subframe subframes[8]; 597 } drflac_frame; 598 599 typedef struct 600 { 601 /* The function to call when a metadata block is read. */ 602 drflac_meta_proc onMeta; 603 604 /* The user data posted to the metadata callback function. */ 605 void* pUserDataMD; 606 607 /* Memory allocation callbacks. */ 608 drflac_allocation_callbacks allocationCallbacks; 609 610 611 /* The sample rate. Will be set to something like 44100. */ 612 drflac_uint32 sampleRate; 613 614 /* 615 The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the 616 value specified in the STREAMINFO block. 617 */ 618 drflac_uint8 channels; 619 620 /* The bits per sample. Will be set to something like 16, 24, etc. */ 621 drflac_uint8 bitsPerSample; 622 623 /* The maximum block size, in samples. This number represents the number of samples in each channel (not combined). */ 624 drflac_uint16 maxBlockSizeInPCMFrames; 625 626 /* 627 The total number of PCM Frames making up the stream. Can be 0 in which case it's still a valid stream, but just means 628 the total PCM frame count is unknown. Likely the case with streams like internet radio. 629 */ 630 drflac_uint64 totalPCMFrameCount; 631 632 633 /* The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream. */ 634 drflac_container container; 635 636 /* The number of seekpoints in the seektable. */ 637 drflac_uint32 seekpointCount; 638 639 640 /* Information about the frame the decoder is currently sitting on. */ 641 drflac_frame currentFLACFrame; 642 643 644 /* The index of the PCM frame the decoder is currently sitting on. This is only used for seeking. */ 645 drflac_uint64 currentPCMFrame; 646 647 /* The position of the first FLAC frame in the stream. This is only ever used for seeking. */ 648 drflac_uint64 firstFLACFramePosInBytes; 649 650 651 /* A hack to avoid a malloc() when opening a decoder with drflac_open_memory(). */ 652 drflac__memory_stream memoryStream; 653 654 655 /* A pointer to the decoded sample data. This is an offset of pExtraData. */ 656 drflac_int32* pDecodedSamples; 657 658 /* A pointer to the seek table. This is an offset of pExtraData, or NULL if there is no seek table. */ 659 drflac_seekpoint* pSeekpoints; 660 661 /* Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData. */ 662 void* _oggbs; 663 664 /* Internal use only. Used for profiling and testing different seeking modes. */ 665 drflac_bool32 _noSeekTableSeek : 1; 666 drflac_bool32 _noBinarySearchSeek : 1; 667 drflac_bool32 _noBruteForceSeek : 1; 668 669 /* The bit streamer. The raw FLAC data is fed through this object. */ 670 drflac_bs bs; 671 672 /* Variable length extra data. We attach this to the end of the object so we can avoid unnecessary mallocs. */ 673 drflac_uint8 pExtraData[1]; 674 } drflac; 675 676 /* 677 Opens a FLAC decoder. 678 679 onRead [in] The function to call when data needs to be read from the client. 680 onSeek [in] The function to call when the read position of the client data needs to move. 681 pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek. 682 pAllocationCallbacks [in, optional] A pointer to application defined callbacks for managing memory allocations. 683 684 Returns a pointer to an object representing the decoder. 685 686 Close the decoder with drflac_close(). 687 688 pAllocationCallbacks can be NULL in which case it will use DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE. 689 690 This function will automatically detect whether or not you are attempting to open a native or Ogg encapsulated 691 FLAC, both of which should work seamlessly without any manual intervention. Ogg encapsulation also works with 692 multiplexed streams which basically means it can play FLAC encoded audio tracks in videos. 693 694 This is the lowest level function for opening a FLAC stream. You can also use drflac_open_file() and drflac_open_memory() 695 to open the stream from a file or from a block of memory respectively. 696 697 The STREAMINFO block must be present for this to succeed. Use drflac_open_relaxed() to open a FLAC stream where 698 the header may not be present. 699 700 See also: drflac_open_file(), drflac_open_memory(), drflac_open_with_metadata(), drflac_close() 701 */ 702 drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); 703 704 /* 705 The same as drflac_open(), except attempts to open the stream even when a header block is not present. 706 707 Because the header is not necessarily available, the caller must explicitly define the container (Native or Ogg). Do 708 not set this to drflac_container_unknown - that is for internal use only. 709 710 Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never 711 found it will continue forever. To abort, force your onRead callback to return 0, which dr_flac will use as an 712 indicator that the end of the stream was found. 713 */ 714 drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); 715 716 /* 717 Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.). 718 719 onRead [in] The function to call when data needs to be read from the client. 720 onSeek [in] The function to call when the read position of the client data needs to move. 721 onMeta [in] The function to call for every metadata block. 722 pUserData [in, optional] A pointer to application defined data that will be passed to onRead, onSeek and onMeta. 723 pAllocationCallbacks [in, optional] A pointer to application defined callbacks for managing memory allocations. 724 725 Returns a pointer to an object representing the decoder. 726 727 Close the decoder with drflac_close(). 728 729 pAllocationCallbacks can be NULL in which case it will use DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE. 730 731 This is slower than drflac_open(), so avoid this one if you don't need metadata. Internally, this will allocate and free 732 memory on the heap for every metadata block except for STREAMINFO and PADDING blocks. 733 734 The caller is notified of the metadata via the onMeta callback. All metadata blocks will be handled before the function 735 returns. 736 737 The STREAMINFO block must be present for this to succeed. Use drflac_open_with_metadata_relaxed() to open a FLAC 738 stream where the header may not be present. 739 740 Note that this will behave inconsistently with drflac_open() if the stream is an Ogg encapsulated stream and a metadata 741 block is corrupted. This is due to the way the Ogg stream recovers from corrupted pages. When drflac_open_with_metadata() 742 is being used, the open routine will try to read the contents of the metadata block, whereas drflac_open() will simply 743 seek past it (for the sake of efficiency). This inconsistency can result in different samples being returned depending on 744 whether or not the stream is being opened with metadata. 745 746 See also: drflac_open_file_with_metadata(), drflac_open_memory_with_metadata(), drflac_open(), drflac_close() 747 */ 748 drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); 749 750 /* 751 The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present. 752 753 See also: drflac_open_with_metadata(), drflac_open_relaxed() 754 */ 755 drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); 756 757 /* 758 Closes the given FLAC decoder. 759 760 pFlac [in] The decoder to close. 761 762 This will destroy the decoder object. 763 */ 764 void drflac_close(drflac* pFlac); 765 766 767 /* 768 Reads sample data from the given FLAC decoder, output as interleaved signed 32-bit PCM. 769 770 pFlac [in] The decoder. 771 framesToRead [in] The number of PCM frames to read. 772 pBufferOut [out, optional] A pointer to the buffer that will receive the decoded samples. 773 774 Returns the number of PCM frames actually read. 775 776 pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames 777 seeked. 778 */ 779 drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut); 780 781 /* 782 Same as drflac_read_pcm_frames_s32(), except outputs samples as 16-bit integer PCM rather than 32-bit. 783 784 Note that this is lossy for streams where the bits per sample is larger than 16. 785 */ 786 drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut); 787 788 /* 789 Same as drflac_read_pcm_frames_s32(), except outputs samples as 32-bit floating-point PCM. 790 791 Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly 792 represent every possible number. 793 */ 794 drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut); 795 796 /* 797 Seeks to the PCM frame at the given index. 798 799 pFlac [in] The decoder. 800 pcmFrameIndex [in] The index of the PCM frame to seek to. See notes below. 801 802 Returns DRFLAC_TRUE if successful; DRFLAC_FALSE otherwise. 803 */ 804 drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex); 805 806 807 808 #ifndef DR_FLAC_NO_STDIO 809 /* 810 Opens a FLAC decoder from the file at the given path. 811 812 filename [in] The path of the file to open, either absolute or relative to the current directory. 813 pAllocationCallbacks [in, optional] A pointer to application defined callbacks for managing memory allocations. 814 815 Returns a pointer to an object representing the decoder. 816 817 Close the decoder with drflac_close(). 818 819 This will hold a handle to the file until the decoder is closed with drflac_close(). Some platforms will restrict the 820 number of files a process can have open at any given time, so keep this mind if you have many decoders open at the 821 same time. 822 823 See also: drflac_open(), drflac_open_file_with_metadata(), drflac_close() 824 */ 825 drflac* drflac_open_file(const char* filename, const drflac_allocation_callbacks* pAllocationCallbacks); 826 827 /* 828 Opens a FLAC decoder from the file at the given path and notifies the caller of the metadata chunks (album art, etc.) 829 830 Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled. 831 */ 832 drflac* drflac_open_file_with_metadata(const char* filename, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); 833 #endif 834 835 /* 836 Opens a FLAC decoder from a pre-allocated block of memory 837 838 This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for 839 the lifetime of the decoder. 840 */ 841 drflac* drflac_open_memory(const void* data, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks); 842 843 /* 844 Opens a FLAC decoder from a pre-allocated block of memory and notifies the caller of the metadata chunks (album art, etc.) 845 846 Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled. 847 */ 848 drflac* drflac_open_memory_with_metadata(const void* data, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); 849 850 851 852 /* High Level APIs */ 853 854 /* 855 Opens a FLAC stream from the given callbacks and fully decodes it in a single operation. The return value is a 856 pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with drflac_free(). 857 858 You can pass in custom memory allocation callbacks via the pAllocationCallbacks parameter. This can be NULL in which 859 case it will use DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE. 860 861 Sometimes a FLAC file won't keep track of the total sample count. In this situation the function will continuously 862 read samples into a dynamically sized buffer on the heap until no samples are left. 863 864 Do not call this function on a broadcast type of stream (like internet radio streams and whatnot). 865 */ 866 drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); 867 868 /* Same as drflac_open_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */ 869 drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); 870 871 /* Same as drflac_open_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */ 872 float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); 873 874 #ifndef DR_FLAC_NO_STDIO 875 /* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a file. */ 876 drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); 877 878 /* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */ 879 drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); 880 881 /* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */ 882 float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); 883 #endif 884 885 /* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a block of memory. */ 886 drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); 887 888 /* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */ 889 drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); 890 891 /* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */ 892 float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); 893 894 /* 895 Frees memory that was allocated internally by dr_flac. 896 897 Set pAllocationCallbacks to the same object that was passed to drflac_open_*_and_read_pcm_frames_*(). If you originally passed in NULL, pass in NULL for this. 898 */ 899 void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks); 900 901 902 /* Structure representing an iterator for vorbis comments in a VORBIS_COMMENT metadata block. */ 903 typedef struct 904 { 905 drflac_uint32 countRemaining; 906 const char* pRunningData; 907 } drflac_vorbis_comment_iterator; 908 909 /* 910 Initializes a vorbis comment iterator. This can be used for iterating over the vorbis comments in a VORBIS_COMMENT 911 metadata block. 912 */ 913 void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments); 914 915 /* 916 Goes to the next vorbis comment in the given iterator. If null is returned it means there are no more comments. The 917 returned string is NOT null terminated. 918 */ 919 const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut); 920 921 922 /* Structure representing an iterator for cuesheet tracks in a CUESHEET metadata block. */ 923 typedef struct 924 { 925 drflac_uint32 countRemaining; 926 const char* pRunningData; 927 } drflac_cuesheet_track_iterator; 928 929 /* Packing is important on this structure because we map this directly to the raw data within the CUESHEET metadata block. */ 930 #pragma pack(4) 931 typedef struct 932 { 933 drflac_uint64 offset; 934 drflac_uint8 index; 935 drflac_uint8 reserved[3]; 936 } drflac_cuesheet_track_index; 937 #pragma pack() 938 939 typedef struct 940 { 941 drflac_uint64 offset; 942 drflac_uint8 trackNumber; 943 char ISRC[12]; 944 drflac_bool8 isAudio; 945 drflac_bool8 preEmphasis; 946 drflac_uint8 indexCount; 947 const drflac_cuesheet_track_index* pIndexPoints; 948 } drflac_cuesheet_track; 949 950 /* 951 Initializes a cuesheet track iterator. This can be used for iterating over the cuesheet tracks in a CUESHEET metadata 952 block. 953 */ 954 void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData); 955 956 /* Goes to the next cuesheet track in the given iterator. If DRFLAC_FALSE is returned it means there are no more comments. */ 957 drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack); 958 959 960 #ifdef __cplusplus 961 } 962 #endif 963 #endif /* dr_flac_h */ 964 965 966 /************************************************************************************************************************************************************ 967 ************************************************************************************************************************************************************ 968 969 IMPLEMENTATION 970 971 ************************************************************************************************************************************************************ 972 ************************************************************************************************************************************************************/ 973 #ifdef DR_FLAC_IMPLEMENTATION 974 975 /* Disable some annoying warnings. */ 976 #if defined(__GNUC__) 977 #pragma GCC diagnostic push 978 #if __GNUC__ >= 7 979 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" 980 #endif 981 #endif 982 983 #ifdef __linux__ 984 #ifndef _BSD_SOURCE 985 #define _BSD_SOURCE 986 #endif 987 #ifndef __USE_BSD 988 #define __USE_BSD 989 #endif 990 #include <endian.h> 991 #endif 992 993 #include <stdlib.h> 994 #include <string.h> 995 996 #ifdef _MSC_VER 997 #define DRFLAC_INLINE __forceinline 998 #elif defined(__GNUC__) 999 /* 1000 I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when 1001 the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some 1002 case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the 1003 command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue 1004 I am using "__inline__" only when we're compiling in strict ANSI mode. 1005 */ 1006 #if defined(__STRICT_ANSI__) 1007 #define DRFLAC_INLINE __inline__ __attribute__((always_inline)) 1008 #else 1009 #define DRFLAC_INLINE inline __attribute__((always_inline)) 1010 #endif 1011 #else 1012 #define DRFLAC_INLINE 1013 #endif 1014 1015 /* CPU architecture. */ 1016 #if defined(__x86_64__) || defined(_M_X64) 1017 #define DRFLAC_X64 1018 #elif defined(__i386) || defined(_M_IX86) 1019 #define DRFLAC_X86 1020 #elif defined(__arm__) || defined(_M_ARM) 1021 #define DRFLAC_ARM 1022 #endif 1023 1024 /* Intrinsics Support */ 1025 #if !defined(DR_FLAC_NO_SIMD) 1026 #if defined(DRFLAC_X64) || defined(DRFLAC_X86) 1027 #if defined(_MSC_VER) && !defined(__clang__) 1028 /* MSVC. */ 1029 #if _MSC_VER >= 1400 && !defined(DRFLAC_NO_SSE2) /* 2005 */ 1030 #define DRFLAC_SUPPORT_SSE2 1031 #endif 1032 #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41) /* 2010 */ 1033 #define DRFLAC_SUPPORT_SSE41 1034 #endif 1035 #else 1036 /* Assume GNUC-style. */ 1037 #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2) 1038 #define DRFLAC_SUPPORT_SSE2 1039 #endif 1040 #if defined(__SSE4_1__) && !defined(DRFLAC_NO_SSE41) 1041 #define DRFLAC_SUPPORT_SSE41 1042 #endif 1043 #endif 1044 1045 /* If at this point we still haven't determined compiler support for the intrinsics just fall back to __has_include. */ 1046 #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include) 1047 #if !defined(DRFLAC_SUPPORT_SSE2) && !defined(DRFLAC_NO_SSE2) && __has_include(<emmintrin.h>) 1048 #define DRFLAC_SUPPORT_SSE2 1049 #endif 1050 #if !defined(DRFLAC_SUPPORT_SSE41) && !defined(DRFLAC_NO_SSE41) && __has_include(<smmintrin.h>) 1051 #define DRFLAC_SUPPORT_SSE41 1052 #endif 1053 #endif 1054 1055 #if defined(DRFLAC_SUPPORT_SSE41) 1056 #include <smmintrin.h> 1057 #elif defined(DRFLAC_SUPPORT_SSE2) 1058 #include <emmintrin.h> 1059 #endif 1060 #endif 1061 1062 #if defined(DRFLAC_ARM) 1063 #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)) 1064 #define DRFLAC_SUPPORT_NEON 1065 #endif 1066 1067 /* Fall back to looking for the #include file. */ 1068 #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include) 1069 #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>) 1070 #define DRFLAC_SUPPORT_NEON 1071 #endif 1072 #endif 1073 1074 #if defined(DRFLAC_SUPPORT_NEON) 1075 #include <arm_neon.h> 1076 #endif 1077 #endif 1078 #endif 1079 1080 /* Compile-time CPU feature support. */ 1081 #if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) 1082 #if defined(_MSC_VER) && !defined(__clang__) 1083 #if _MSC_VER >= 1400 1084 #include <intrin.h> 1085 static void drflac__cpuid(int info[4], int fid) 1086 { 1087 __cpuid(info, fid); 1088 } 1089 #else 1090 #define DRFLAC_NO_CPUID 1091 #endif 1092 #else 1093 #if defined(__GNUC__) || defined(__clang__) 1094 static void drflac__cpuid(int info[4], int fid) 1095 { 1096 /* 1097 It looks like the -fPIC option uses the ebx register which GCC complains about. We can work around this by just using a different register, the 1098 specific register of which I'm letting the compiler decide on. The "k" prefix is used to specify a 32-bit register. The {...} syntax is for 1099 supporting different assembly dialects. 1100 1101 What's basically happening is that we're saving and restoring the ebx register manually. 1102 */ 1103 #if defined(DRFLAC_X86) && defined(__PIC__) 1104 __asm__ __volatile__ ( 1105 "xchg{l} {%%}ebx, %k1;" 1106 "cpuid;" 1107 "xchg{l} {%%}ebx, %k1;" 1108 : "=a"(info[0]), "=&r"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0) 1109 ); 1110 #else 1111 __asm__ __volatile__ ( 1112 "cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0) 1113 ); 1114 #endif 1115 } 1116 #else 1117 #define DRFLAC_NO_CPUID 1118 #endif 1119 #endif 1120 #else 1121 #define DRFLAC_NO_CPUID 1122 #endif 1123 1124 static DRFLAC_INLINE drflac_bool32 drflac_has_sse2() 1125 { 1126 #if defined(DRFLAC_SUPPORT_SSE2) 1127 #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE2) 1128 #if defined(DRFLAC_X64) 1129 return DRFLAC_TRUE; /* 64-bit targets always support SSE2. */ 1130 #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__) 1131 return DRFLAC_TRUE; /* If the compiler is allowed to freely generate SSE2 code we can assume support. */ 1132 #else 1133 #if defined(DRFLAC_NO_CPUID) 1134 return DRFLAC_FALSE; 1135 #else 1136 int info[4]; 1137 drflac__cpuid(info, 1); 1138 return (info[3] & (1 << 26)) != 0; 1139 #endif 1140 #endif 1141 #else 1142 return DRFLAC_FALSE; /* SSE2 is only supported on x86 and x64 architectures. */ 1143 #endif 1144 #else 1145 return DRFLAC_FALSE; /* No compiler support. */ 1146 #endif 1147 } 1148 1149 static DRFLAC_INLINE drflac_bool32 drflac_has_sse41() 1150 { 1151 #if defined(DRFLAC_SUPPORT_SSE41) 1152 #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41) 1153 #if defined(DRFLAC_X64) 1154 return DRFLAC_TRUE; /* 64-bit targets always support SSE4.1. */ 1155 #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__) 1156 return DRFLAC_TRUE; /* If the compiler is allowed to freely generate SSE41 code we can assume support. */ 1157 #else 1158 #if defined(DRFLAC_NO_CPUID) 1159 return DRFLAC_FALSE; 1160 #else 1161 int info[4]; 1162 drflac__cpuid(info, 1); 1163 return (info[2] & (1 << 19)) != 0; 1164 #endif 1165 #endif 1166 #else 1167 return DRFLAC_FALSE; /* SSE41 is only supported on x86 and x64 architectures. */ 1168 #endif 1169 #else 1170 return DRFLAC_FALSE; /* No compiler support. */ 1171 #endif 1172 } 1173 1174 1175 #if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) 1176 #define DRFLAC_HAS_LZCNT_INTRINSIC 1177 #elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) 1178 #define DRFLAC_HAS_LZCNT_INTRINSIC 1179 #elif defined(__clang__) 1180 #if defined(__has_builtin) 1181 #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl) 1182 #define DRFLAC_HAS_LZCNT_INTRINSIC 1183 #endif 1184 #endif 1185 #endif 1186 1187 #if defined(_MSC_VER) && _MSC_VER >= 1300 1188 #define DRFLAC_HAS_BYTESWAP16_INTRINSIC 1189 #define DRFLAC_HAS_BYTESWAP32_INTRINSIC 1190 #define DRFLAC_HAS_BYTESWAP64_INTRINSIC 1191 #elif defined(__clang__) 1192 #if defined(__has_builtin) 1193 #if __has_builtin(__builtin_bswap16) 1194 #define DRFLAC_HAS_BYTESWAP16_INTRINSIC 1195 #endif 1196 #if __has_builtin(__builtin_bswap32) 1197 #define DRFLAC_HAS_BYTESWAP32_INTRINSIC 1198 #endif 1199 #if __has_builtin(__builtin_bswap64) 1200 #define DRFLAC_HAS_BYTESWAP64_INTRINSIC 1201 #endif 1202 #endif 1203 #elif defined(__GNUC__) 1204 #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) 1205 #define DRFLAC_HAS_BYTESWAP32_INTRINSIC 1206 #define DRFLAC_HAS_BYTESWAP64_INTRINSIC 1207 #endif 1208 #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) 1209 #define DRFLAC_HAS_BYTESWAP16_INTRINSIC 1210 #endif 1211 #endif 1212 1213 1214 /* Standard library stuff. */ 1215 #ifndef DRFLAC_ASSERT 1216 #include <assert.h> 1217 #define DRFLAC_ASSERT(expression) assert(expression) 1218 #endif 1219 #ifndef DRFLAC_MALLOC 1220 #define DRFLAC_MALLOC(sz) malloc((sz)) 1221 #endif 1222 #ifndef DRFLAC_REALLOC 1223 #define DRFLAC_REALLOC(p, sz) realloc((p), (sz)) 1224 #endif 1225 #ifndef DRFLAC_FREE 1226 #define DRFLAC_FREE(p) free((p)) 1227 #endif 1228 #ifndef DRFLAC_COPY_MEMORY 1229 #define DRFLAC_COPY_MEMORY(dst, src, sz) memcpy((dst), (src), (sz)) 1230 #endif 1231 #ifndef DRFLAC_ZERO_MEMORY 1232 #define DRFLAC_ZERO_MEMORY(p, sz) memset((p), 0, (sz)) 1233 #endif 1234 1235 #define DRFLAC_MAX_SIMD_VECTOR_SIZE 64 /* 64 for AVX-512 in the future. */ 1236 1237 typedef drflac_int32 drflac_result; 1238 #define DRFLAC_SUCCESS 0 1239 #define DRFLAC_ERROR -1 /* A generic error. */ 1240 #define DRFLAC_INVALID_ARGS -2 1241 #define DRFLAC_END_OF_STREAM -128 1242 #define DRFLAC_CRC_MISMATCH -129 1243 1244 #define DRFLAC_SUBFRAME_CONSTANT 0 1245 #define DRFLAC_SUBFRAME_VERBATIM 1 1246 #define DRFLAC_SUBFRAME_FIXED 8 1247 #define DRFLAC_SUBFRAME_LPC 32 1248 #define DRFLAC_SUBFRAME_RESERVED 255 1249 1250 #define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE 0 1251 #define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2 1 1252 1253 #define DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT 0 1254 #define DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE 8 1255 #define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE 9 1256 #define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE 10 1257 1258 #define drflac_align(x, a) ((((x) + (a) - 1) / (a)) * (a)) 1259 1260 1261 /* CPU caps. */ 1262 #if defined(__has_feature) 1263 #if __has_feature(thread_sanitizer) 1264 #define DRFLAC_NO_THREAD_SANITIZE __attribute__((no_sanitize("thread"))) 1265 #else 1266 #define DRFLAC_NO_THREAD_SANITIZE 1267 #endif 1268 #else 1269 #define DRFLAC_NO_THREAD_SANITIZE 1270 #endif 1271 1272 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC) 1273 static drflac_bool32 drflac__gIsLZCNTSupported = DRFLAC_FALSE; 1274 #endif 1275 1276 #ifndef DRFLAC_NO_CPUID 1277 static drflac_bool32 drflac__gIsSSE2Supported = DRFLAC_FALSE; 1278 static drflac_bool32 drflac__gIsSSE41Supported = DRFLAC_FALSE; 1279 1280 /* 1281 I've had a bug report that Clang's ThreadSanitizer presents a warning in this function. Having reviewed this, this does 1282 actually make sense. However, since CPU caps should never differ for a running process, I don't think the trade off of 1283 complicating internal API's by passing around CPU caps versus just disabling the warnings is worthwhile. I'm therefore 1284 just going to disable these warnings. This is disabled via the DRFLAC_NO_THREAD_SANITIZE attribute. 1285 */ 1286 DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps() 1287 { 1288 static drflac_bool32 isCPUCapsInitialized = DRFLAC_FALSE; 1289 1290 if (!isCPUCapsInitialized) { 1291 int info[4] = {0}; 1292 1293 /* LZCNT */ 1294 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC) 1295 drflac__cpuid(info, 0x80000001); 1296 drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0; 1297 #endif 1298 1299 /* SSE2 */ 1300 drflac__gIsSSE2Supported = drflac_has_sse2(); 1301 1302 /* SSE4.1 */ 1303 drflac__gIsSSE41Supported = drflac_has_sse41(); 1304 1305 /* Initialized. */ 1306 isCPUCapsInitialized = DRFLAC_TRUE; 1307 } 1308 } 1309 #else 1310 static drflac_bool32 drflac__gIsNEONSupported = DRFLAC_FALSE; 1311 1312 static DRFLAC_INLINE drflac_bool32 drflac__has_neon() 1313 { 1314 #if defined(DRFLAC_SUPPORT_NEON) 1315 #if defined(DRFLAC_ARM) && !defined(DRFLAC_NO_NEON) 1316 #if (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)) 1317 return DRFLAC_TRUE; /* If the compiler is allowed to freely generate NEON code we can assume support. */ 1318 #else 1319 /* TODO: Runtime check. */ 1320 return DRFLAC_FALSE; 1321 #endif 1322 #else 1323 return DRFLAC_FALSE; /* NEON is only supported on ARM architectures. */ 1324 #endif 1325 #else 1326 return DRFLAC_FALSE; /* No compiler support. */ 1327 #endif 1328 } 1329 1330 DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps() 1331 { 1332 drflac__gIsNEONSupported = drflac__has_neon(); 1333 1334 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) 1335 drflac__gIsLZCNTSupported = DRFLAC_TRUE; 1336 #endif 1337 } 1338 #endif 1339 1340 1341 /* Endian Management */ 1342 static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian() 1343 { 1344 #if defined(DRFLAC_X86) || defined(DRFLAC_X64) 1345 return DRFLAC_TRUE; 1346 #elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN 1347 return DRFLAC_TRUE; 1348 #else 1349 int n = 1; 1350 return (*(char*)&n) == 1; 1351 #endif 1352 } 1353 1354 static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n) 1355 { 1356 #ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC 1357 #if defined(_MSC_VER) 1358 return _byteswap_ushort(n); 1359 #elif defined(__GNUC__) || defined(__clang__) 1360 return __builtin_bswap16(n); 1361 #else 1362 #error "This compiler does not support the byte swap intrinsic." 1363 #endif 1364 #else 1365 return ((n & 0xFF00) >> 8) | 1366 ((n & 0x00FF) << 8); 1367 #endif 1368 } 1369 1370 static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n) 1371 { 1372 #ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC 1373 #if defined(_MSC_VER) 1374 return _byteswap_ulong(n); 1375 #elif defined(__GNUC__) || defined(__clang__) 1376 #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRFLAC_64BIT) /* <-- 64-bit inline assembly has not been tested, so disabling for now. */ 1377 /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */ 1378 drflac_uint32 r; 1379 __asm__ __volatile__ ( 1380 #if defined(DRFLAC_64BIT) 1381 "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n) /* <-- This is untested. If someone in the community could test this, that would be appreciated! */ 1382 #else 1383 "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n) 1384 #endif 1385 ); 1386 return r; 1387 #else 1388 return __builtin_bswap32(n); 1389 #endif 1390 #else 1391 #error "This compiler does not support the byte swap intrinsic." 1392 #endif 1393 #else 1394 return ((n & 0xFF000000) >> 24) | 1395 ((n & 0x00FF0000) >> 8) | 1396 ((n & 0x0000FF00) << 8) | 1397 ((n & 0x000000FF) << 24); 1398 #endif 1399 } 1400 1401 static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n) 1402 { 1403 #ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC 1404 #if defined(_MSC_VER) 1405 return _byteswap_uint64(n); 1406 #elif defined(__GNUC__) || defined(__clang__) 1407 return __builtin_bswap64(n); 1408 #else 1409 #error "This compiler does not support the byte swap intrinsic." 1410 #endif 1411 #else 1412 return ((n & (drflac_uint64)0xFF00000000000000) >> 56) | 1413 ((n & (drflac_uint64)0x00FF000000000000) >> 40) | 1414 ((n & (drflac_uint64)0x0000FF0000000000) >> 24) | 1415 ((n & (drflac_uint64)0x000000FF00000000) >> 8) | 1416 ((n & (drflac_uint64)0x00000000FF000000) << 8) | 1417 ((n & (drflac_uint64)0x0000000000FF0000) << 24) | 1418 ((n & (drflac_uint64)0x000000000000FF00) << 40) | 1419 ((n & (drflac_uint64)0x00000000000000FF) << 56); 1420 #endif 1421 } 1422 1423 1424 static DRFLAC_INLINE drflac_uint16 drflac__be2host_16(drflac_uint16 n) 1425 { 1426 if (drflac__is_little_endian()) { 1427 return drflac__swap_endian_uint16(n); 1428 } 1429 1430 return n; 1431 } 1432 1433 static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n) 1434 { 1435 if (drflac__is_little_endian()) { 1436 return drflac__swap_endian_uint32(n); 1437 } 1438 1439 return n; 1440 } 1441 1442 static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n) 1443 { 1444 if (drflac__is_little_endian()) { 1445 return drflac__swap_endian_uint64(n); 1446 } 1447 1448 return n; 1449 } 1450 1451 1452 static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n) 1453 { 1454 if (!drflac__is_little_endian()) { 1455 return drflac__swap_endian_uint32(n); 1456 } 1457 1458 return n; 1459 } 1460 1461 1462 static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n) 1463 { 1464 drflac_uint32 result = 0; 1465 result |= (n & 0x7F000000) >> 3; 1466 result |= (n & 0x007F0000) >> 2; 1467 result |= (n & 0x00007F00) >> 1; 1468 result |= (n & 0x0000007F) >> 0; 1469 1470 return result; 1471 } 1472 1473 1474 1475 /* The CRC code below is based on this document: http://zlib.net/crc_v3.txt */ 1476 static drflac_uint8 drflac__crc8_table[] = { 1477 0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D, 1478 0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65, 0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D, 1479 0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5, 0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD, 1480 0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85, 0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD, 1481 0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2, 0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA, 1482 0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2, 0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A, 1483 0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32, 0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A, 1484 0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42, 0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A, 1485 0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C, 0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4, 1486 0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC, 0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4, 1487 0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C, 0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44, 1488 0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C, 0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34, 1489 0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B, 0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63, 1490 0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B, 0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13, 1491 0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB, 0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83, 1492 0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3 1493 }; 1494 1495 static drflac_uint16 drflac__crc16_table[] = { 1496 0x0000, 0x8005, 0x800F, 0x000A, 0x801B, 0x001E, 0x0014, 0x8011, 1497 0x8033, 0x0036, 0x003C, 0x8039, 0x0028, 0x802D, 0x8027, 0x0022, 1498 0x8063, 0x0066, 0x006C, 0x8069, 0x0078, 0x807D, 0x8077, 0x0072, 1499 0x0050, 0x8055, 0x805F, 0x005A, 0x804B, 0x004E, 0x0044, 0x8041, 1500 0x80C3, 0x00C6, 0x00CC, 0x80C9, 0x00D8, 0x80DD, 0x80D7, 0x00D2, 1501 0x00F0, 0x80F5, 0x80FF, 0x00FA, 0x80EB, 0x00EE, 0x00E4, 0x80E1, 1502 0x00A0, 0x80A5, 0x80AF, 0x00AA, 0x80BB, 0x00BE, 0x00B4, 0x80B1, 1503 0x8093, 0x0096, 0x009C, 0x8099, 0x0088, 0x808D, 0x8087, 0x0082, 1504 0x8183, 0x0186, 0x018C, 0x8189, 0x0198, 0x819D, 0x8197, 0x0192, 1505 0x01B0, 0x81B5, 0x81BF, 0x01BA, 0x81AB, 0x01AE, 0x01A4, 0x81A1, 1506 0x01E0, 0x81E5, 0x81EF, 0x01EA, 0x81FB, 0x01FE, 0x01F4, 0x81F1, 1507 0x81D3, 0x01D6, 0x01DC, 0x81D9, 0x01C8, 0x81CD, 0x81C7, 0x01C2, 1508 0x0140, 0x8145, 0x814F, 0x014A, 0x815B, 0x015E, 0x0154, 0x8151, 1509 0x8173, 0x0176, 0x017C, 0x8179, 0x0168, 0x816D, 0x8167, 0x0162, 1510 0x8123, 0x0126, 0x012C, 0x8129, 0x0138, 0x813D, 0x8137, 0x0132, 1511 0x0110, 0x8115, 0x811F, 0x011A, 0x810B, 0x010E, 0x0104, 0x8101, 1512 0x8303, 0x0306, 0x030C, 0x8309, 0x0318, 0x831D, 0x8317, 0x0312, 1513 0x0330, 0x8335, 0x833F, 0x033A, 0x832B, 0x032E, 0x0324, 0x8321, 1514 0x0360, 0x8365, 0x836F, 0x036A, 0x837B, 0x037E, 0x0374, 0x8371, 1515 0x8353, 0x0356, 0x035C, 0x8359, 0x0348, 0x834D, 0x8347, 0x0342, 1516 0x03C0, 0x83C5, 0x83CF, 0x03CA, 0x83DB, 0x03DE, 0x03D4, 0x83D1, 1517 0x83F3, 0x03F6, 0x03FC, 0x83F9, 0x03E8, 0x83ED, 0x83E7, 0x03E2, 1518 0x83A3, 0x03A6, 0x03AC, 0x83A9, 0x03B8, 0x83BD, 0x83B7, 0x03B2, 1519 0x0390, 0x8395, 0x839F, 0x039A, 0x838B, 0x038E, 0x0384, 0x8381, 1520 0x0280, 0x8285, 0x828F, 0x028A, 0x829B, 0x029E, 0x0294, 0x8291, 1521 0x82B3, 0x02B6, 0x02BC, 0x82B9, 0x02A8, 0x82AD, 0x82A7, 0x02A2, 1522 0x82E3, 0x02E6, 0x02EC, 0x82E9, 0x02F8, 0x82FD, 0x82F7, 0x02F2, 1523 0x02D0, 0x82D5, 0x82DF, 0x02DA, 0x82CB, 0x02CE, 0x02C4, 0x82C1, 1524 0x8243, 0x0246, 0x024C, 0x8249, 0x0258, 0x825D, 0x8257, 0x0252, 1525 0x0270, 0x8275, 0x827F, 0x027A, 0x826B, 0x026E, 0x0264, 0x8261, 1526 0x0220, 0x8225, 0x822F, 0x022A, 0x823B, 0x023E, 0x0234, 0x8231, 1527 0x8213, 0x0216, 0x021C, 0x8219, 0x0208, 0x820D, 0x8207, 0x0202 1528 }; 1529 1530 static DRFLAC_INLINE drflac_uint8 drflac_crc8_byte(drflac_uint8 crc, drflac_uint8 data) 1531 { 1532 return drflac__crc8_table[crc ^ data]; 1533 } 1534 1535 static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 data, drflac_uint32 count) 1536 { 1537 #ifdef DR_FLAC_NO_CRC 1538 (void)crc; 1539 (void)data; 1540 (void)count; 1541 return 0; 1542 #else 1543 #if 0 1544 /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc8(crc, 0, 8);") */ 1545 drflac_uint8 p = 0x07; 1546 for (int i = count-1; i >= 0; --i) { 1547 drflac_uint8 bit = (data & (1 << i)) >> i; 1548 if (crc & 0x80) { 1549 crc = ((crc << 1) | bit) ^ p; 1550 } else { 1551 crc = ((crc << 1) | bit); 1552 } 1553 } 1554 return crc; 1555 #else 1556 drflac_uint32 wholeBytes; 1557 drflac_uint32 leftoverBits; 1558 drflac_uint64 leftoverDataMask; 1559 1560 static drflac_uint64 leftoverDataMaskTable[8] = { 1561 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F 1562 }; 1563 1564 DRFLAC_ASSERT(count <= 32); 1565 1566 wholeBytes = count >> 3; 1567 leftoverBits = count - (wholeBytes*8); 1568 leftoverDataMask = leftoverDataMaskTable[leftoverBits]; 1569 1570 switch (wholeBytes) { 1571 case 4: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits))); 1572 case 3: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits))); 1573 case 2: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits))); 1574 case 1: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits))); 1575 case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc8_table[(crc >> (8 - leftoverBits)) ^ (data & leftoverDataMask)]; 1576 } 1577 return crc; 1578 #endif 1579 #endif 1580 } 1581 1582 static DRFLAC_INLINE drflac_uint16 drflac_crc16_byte(drflac_uint16 crc, drflac_uint8 data) 1583 { 1584 return (crc << 8) ^ drflac__crc16_table[(drflac_uint8)(crc >> 8) ^ data]; 1585 } 1586 1587 static DRFLAC_INLINE drflac_uint16 drflac_crc16_cache(drflac_uint16 crc, drflac_cache_t data) 1588 { 1589 #ifdef DRFLAC_64BIT 1590 crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF)); 1591 crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF)); 1592 crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF)); 1593 crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF)); 1594 #endif 1595 crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF)); 1596 crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF)); 1597 crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 8) & 0xFF)); 1598 crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 0) & 0xFF)); 1599 1600 return crc; 1601 } 1602 1603 static DRFLAC_INLINE drflac_uint16 drflac_crc16_bytes(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 byteCount) 1604 { 1605 switch (byteCount) 1606 { 1607 #ifdef DRFLAC_64BIT 1608 case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF)); 1609 case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF)); 1610 case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF)); 1611 case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF)); 1612 #endif 1613 case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF)); 1614 case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF)); 1615 case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 8) & 0xFF)); 1616 case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 0) & 0xFF)); 1617 } 1618 1619 return crc; 1620 } 1621 1622 #if 0 1623 static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac_uint32 data, drflac_uint32 count) 1624 { 1625 #ifdef DR_FLAC_NO_CRC 1626 (void)crc; 1627 (void)data; 1628 (void)count; 1629 return 0; 1630 #else 1631 #if 0 1632 /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc16(crc, 0, 16);") */ 1633 drflac_uint16 p = 0x8005; 1634 for (int i = count-1; i >= 0; --i) { 1635 drflac_uint16 bit = (data & (1ULL << i)) >> i; 1636 if (r & 0x8000) { 1637 r = ((r << 1) | bit) ^ p; 1638 } else { 1639 r = ((r << 1) | bit); 1640 } 1641 } 1642 1643 return crc; 1644 #else 1645 drflac_uint32 wholeBytes; 1646 drflac_uint32 leftoverBits; 1647 drflac_uint64 leftoverDataMask; 1648 1649 static drflac_uint64 leftoverDataMaskTable[8] = { 1650 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F 1651 }; 1652 1653 DRFLAC_ASSERT(count <= 64); 1654 1655 wholeBytes = count >> 3; 1656 leftoverBits = count & 7; 1657 leftoverDataMask = leftoverDataMaskTable[leftoverBits]; 1658 1659 switch (wholeBytes) { 1660 default: 1661 case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits))); 1662 case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits))); 1663 case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits))); 1664 case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits))); 1665 case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)]; 1666 } 1667 return crc; 1668 #endif 1669 #endif 1670 } 1671 1672 static DRFLAC_INLINE drflac_uint16 drflac_crc16__64bit(drflac_uint16 crc, drflac_uint64 data, drflac_uint32 count) 1673 { 1674 #ifdef DR_FLAC_NO_CRC 1675 (void)crc; 1676 (void)data; 1677 (void)count; 1678 return 0; 1679 #else 1680 drflac_uint32 wholeBytes; 1681 drflac_uint32 leftoverBits; 1682 drflac_uint64 leftoverDataMask; 1683 1684 static drflac_uint64 leftoverDataMaskTable[8] = { 1685 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F 1686 }; 1687 1688 DRFLAC_ASSERT(count <= 64); 1689 1690 wholeBytes = count >> 3; 1691 leftoverBits = count & 7; 1692 leftoverDataMask = leftoverDataMaskTable[leftoverBits]; 1693 1694 switch (wholeBytes) { 1695 default: 1696 case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000 << 32) << leftoverBits)) >> (56 + leftoverBits))); /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */ 1697 case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000 << 32) << leftoverBits)) >> (48 + leftoverBits))); 1698 case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00 << 32) << leftoverBits)) >> (40 + leftoverBits))); 1699 case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF << 32) << leftoverBits)) >> (32 + leftoverBits))); 1700 case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000 ) << leftoverBits)) >> (24 + leftoverBits))); 1701 case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000 ) << leftoverBits)) >> (16 + leftoverBits))); 1702 case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00 ) << leftoverBits)) >> ( 8 + leftoverBits))); 1703 case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF ) << leftoverBits)) >> ( 0 + leftoverBits))); 1704 case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)]; 1705 } 1706 return crc; 1707 #endif 1708 } 1709 1710 1711 static DRFLAC_INLINE drflac_uint16 drflac_crc16(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 count) 1712 { 1713 #ifdef DRFLAC_64BIT 1714 return drflac_crc16__64bit(crc, data, count); 1715 #else 1716 return drflac_crc16__32bit(crc, data, count); 1717 #endif 1718 } 1719 #endif 1720 1721 1722 #ifdef DRFLAC_64BIT 1723 #define drflac__be2host__cache_line drflac__be2host_64 1724 #else 1725 #define drflac__be2host__cache_line drflac__be2host_32 1726 #endif 1727 1728 /* 1729 BIT READING ATTEMPT #2 1730 1731 This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting 1732 on the most significant bit. It uses the notion of an L1 and L2 cache (borrowed from CPU architecture), where the L1 cache 1733 is a 32- or 64-bit unsigned integer (depending on whether or not a 32- or 64-bit build is being compiled) and the L2 is an 1734 array of "cache lines", with each cache line being the same size as the L1. The L2 is a buffer of about 4KB and is where data 1735 from onRead() is read into. 1736 */ 1737 #define DRFLAC_CACHE_L1_SIZE_BYTES(bs) (sizeof((bs)->cache)) 1738 #define DRFLAC_CACHE_L1_SIZE_BITS(bs) (sizeof((bs)->cache)*8) 1739 #define DRFLAC_CACHE_L1_BITS_REMAINING(bs) (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (bs)->consumedBits) 1740 #define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount) (~((~(drflac_cache_t)0) >> (_bitCount))) 1741 #define DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, _bitCount) (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (_bitCount)) 1742 #define DRFLAC_CACHE_L1_SELECT(bs, _bitCount) (((bs)->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)) 1743 #define DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, _bitCount) (DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount))) 1744 #define DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, _bitCount)(DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> (DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)) & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1))) 1745 #define DRFLAC_CACHE_L2_SIZE_BYTES(bs) (sizeof((bs)->cacheL2)) 1746 #define DRFLAC_CACHE_L2_LINE_COUNT(bs) (DRFLAC_CACHE_L2_SIZE_BYTES(bs) / sizeof((bs)->cacheL2[0])) 1747 #define DRFLAC_CACHE_L2_LINES_REMAINING(bs) (DRFLAC_CACHE_L2_LINE_COUNT(bs) - (bs)->nextL2Line) 1748 1749 1750 #ifndef DR_FLAC_NO_CRC 1751 static DRFLAC_INLINE void drflac__reset_crc16(drflac_bs* bs) 1752 { 1753 bs->crc16 = 0; 1754 bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3; 1755 } 1756 1757 static DRFLAC_INLINE void drflac__update_crc16(drflac_bs* bs) 1758 { 1759 if (bs->crc16CacheIgnoredBytes == 0) { 1760 bs->crc16 = drflac_crc16_cache(bs->crc16, bs->crc16Cache); 1761 } else { 1762 bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache, DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bs->crc16CacheIgnoredBytes); 1763 bs->crc16CacheIgnoredBytes = 0; 1764 } 1765 } 1766 1767 static DRFLAC_INLINE drflac_uint16 drflac__flush_crc16(drflac_bs* bs) 1768 { 1769 /* We should never be flushing in a situation where we are not aligned on a byte boundary. */ 1770 DRFLAC_ASSERT((DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7) == 0); 1771 1772 /* 1773 The bits that were read from the L1 cache need to be accumulated. The number of bytes needing to be accumulated is determined 1774 by the number of bits that have been consumed. 1775 */ 1776 if (DRFLAC_CACHE_L1_BITS_REMAINING(bs) == 0) { 1777 drflac__update_crc16(bs); 1778 } else { 1779 /* We only accumulate the consumed bits. */ 1780 bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache >> DRFLAC_CACHE_L1_BITS_REMAINING(bs), (bs->consumedBits >> 3) - bs->crc16CacheIgnoredBytes); 1781 1782 /* 1783 The bits that we just accumulated should never be accumulated again. We need to keep track of how many bytes were accumulated 1784 so we can handle that later. 1785 */ 1786 bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3; 1787 } 1788 1789 return bs->crc16; 1790 } 1791 #endif 1792 1793 static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs) 1794 { 1795 size_t bytesRead; 1796 size_t alignedL1LineCount; 1797 1798 /* Fast path. Try loading straight from L2. */ 1799 if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { 1800 bs->cache = bs->cacheL2[bs->nextL2Line++]; 1801 return DRFLAC_TRUE; 1802 } 1803 1804 /* 1805 If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client, if there's 1806 any left. 1807 */ 1808 if (bs->unalignedByteCount > 0) { 1809 return DRFLAC_FALSE; /* If we have any unaligned bytes it means there's no more aligned bytes left in the client. */ 1810 } 1811 1812 bytesRead = bs->onRead(bs->pUserData, bs->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES(bs)); 1813 1814 bs->nextL2Line = 0; 1815 if (bytesRead == DRFLAC_CACHE_L2_SIZE_BYTES(bs)) { 1816 bs->cache = bs->cacheL2[bs->nextL2Line++]; 1817 return DRFLAC_TRUE; 1818 } 1819 1820 1821 /* 1822 If we get here it means we were unable to retrieve enough data to fill the entire L2 cache. It probably 1823 means we've just reached the end of the file. We need to move the valid data down to the end of the buffer 1824 and adjust the index of the next line accordingly. Also keep in mind that the L2 cache must be aligned to 1825 the size of the L1 so we'll need to seek backwards by any misaligned bytes. 1826 */ 1827 alignedL1LineCount = bytesRead / DRFLAC_CACHE_L1_SIZE_BYTES(bs); 1828 1829 /* We need to keep track of any unaligned bytes for later use. */ 1830 bs->unalignedByteCount = bytesRead - (alignedL1LineCount * DRFLAC_CACHE_L1_SIZE_BYTES(bs)); 1831 if (bs->unalignedByteCount > 0) { 1832 bs->unalignedCache = bs->cacheL2[alignedL1LineCount]; 1833 } 1834 1835 if (alignedL1LineCount > 0) { 1836 size_t offset = DRFLAC_CACHE_L2_LINE_COUNT(bs) - alignedL1LineCount; 1837 size_t i; 1838 for (i = alignedL1LineCount; i > 0; --i) { 1839 bs->cacheL2[i-1 + offset] = bs->cacheL2[i-1]; 1840 } 1841 1842 bs->nextL2Line = (drflac_uint32)offset; 1843 bs->cache = bs->cacheL2[bs->nextL2Line++]; 1844 return DRFLAC_TRUE; 1845 } else { 1846 /* If we get into this branch it means we weren't able to load any L1-aligned data. */ 1847 bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs); 1848 return DRFLAC_FALSE; 1849 } 1850 } 1851 1852 static drflac_bool32 drflac__reload_cache(drflac_bs* bs) 1853 { 1854 size_t bytesRead; 1855 1856 #ifndef DR_FLAC_NO_CRC 1857 drflac__update_crc16(bs); 1858 #endif 1859 1860 /* Fast path. Try just moving the next value in the L2 cache to the L1 cache. */ 1861 if (drflac__reload_l1_cache_from_l2(bs)) { 1862 bs->cache = drflac__be2host__cache_line(bs->cache); 1863 bs->consumedBits = 0; 1864 #ifndef DR_FLAC_NO_CRC 1865 bs->crc16Cache = bs->cache; 1866 #endif 1867 return DRFLAC_TRUE; 1868 } 1869 1870 /* Slow path. */ 1871 1872 /* 1873 If we get here it means we have failed to load the L1 cache from the L2. Likely we've just reached the end of the stream and the last 1874 few bytes did not meet the alignment requirements for the L2 cache. In this case we need to fall back to a slower path and read the 1875 data from the unaligned cache. 1876 */ 1877 bytesRead = bs->unalignedByteCount; 1878 if (bytesRead == 0) { 1879 bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs); /* <-- The stream has been exhausted, so marked the bits as consumed. */ 1880 return DRFLAC_FALSE; 1881 } 1882 1883 DRFLAC_ASSERT(bytesRead < DRFLAC_CACHE_L1_SIZE_BYTES(bs)); 1884 bs->consumedBits = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bytesRead) * 8; 1885 1886 bs->cache = drflac__be2host__cache_line(bs->unalignedCache); 1887 bs->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_BITS_REMAINING(bs)); /* <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property. */ 1888 bs->unalignedByteCount = 0; /* <-- At this point the unaligned bytes have been moved into the cache and we thus have no more unaligned bytes. */ 1889 1890 #ifndef DR_FLAC_NO_CRC 1891 bs->crc16Cache = bs->cache >> bs->consumedBits; 1892 bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3; 1893 #endif 1894 return DRFLAC_TRUE; 1895 } 1896 1897 static void drflac__reset_cache(drflac_bs* bs) 1898 { 1899 bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs); /* <-- This clears the L2 cache. */ 1900 bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs); /* <-- This clears the L1 cache. */ 1901 bs->cache = 0; 1902 bs->unalignedByteCount = 0; /* <-- This clears the trailing unaligned bytes. */ 1903 bs->unalignedCache = 0; 1904 1905 #ifndef DR_FLAC_NO_CRC 1906 bs->crc16Cache = 0; 1907 bs->crc16CacheIgnoredBytes = 0; 1908 #endif 1909 } 1910 1911 1912 static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned int bitCount, drflac_uint32* pResultOut) 1913 { 1914 DRFLAC_ASSERT(bs != NULL); 1915 DRFLAC_ASSERT(pResultOut != NULL); 1916 DRFLAC_ASSERT(bitCount > 0); 1917 DRFLAC_ASSERT(bitCount <= 32); 1918 1919 if (bs->consumedBits == DRFLAC_CACHE_L1_SIZE_BITS(bs)) { 1920 if (!drflac__reload_cache(bs)) { 1921 return DRFLAC_FALSE; 1922 } 1923 } 1924 1925 if (bitCount <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { 1926 /* 1927 If we want to load all 32-bits from a 32-bit cache we need to do it slightly differently because we can't do 1928 a 32-bit shift on a 32-bit integer. This will never be the case on 64-bit caches, so we can have a slightly 1929 more optimal solution for this. 1930 */ 1931 #ifdef DRFLAC_64BIT 1932 *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount); 1933 bs->consumedBits += bitCount; 1934 bs->cache <<= bitCount; 1935 #else 1936 if (bitCount < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { 1937 *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount); 1938 bs->consumedBits += bitCount; 1939 bs->cache <<= bitCount; 1940 } else { 1941 /* Cannot shift by 32-bits, so need to do it differently. */ 1942 *pResultOut = (drflac_uint32)bs->cache; 1943 bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs); 1944 bs->cache = 0; 1945 } 1946 #endif 1947 1948 return DRFLAC_TRUE; 1949 } else { 1950 /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */ 1951 drflac_uint32 bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING(bs); 1952 drflac_uint32 bitCountLo = bitCount - bitCountHi; 1953 drflac_uint32 resultHi = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi); 1954 1955 if (!drflac__reload_cache(bs)) { 1956 return DRFLAC_FALSE; 1957 } 1958 1959 *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo); 1960 bs->consumedBits += bitCountLo; 1961 bs->cache <<= bitCountLo; 1962 return DRFLAC_TRUE; 1963 } 1964 } 1965 1966 static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, drflac_int32* pResult) 1967 { 1968 drflac_uint32 result; 1969 drflac_uint32 signbit; 1970 1971 DRFLAC_ASSERT(bs != NULL); 1972 DRFLAC_ASSERT(pResult != NULL); 1973 DRFLAC_ASSERT(bitCount > 0); 1974 DRFLAC_ASSERT(bitCount <= 32); 1975 1976 if (!drflac__read_uint32(bs, bitCount, &result)) { 1977 return DRFLAC_FALSE; 1978 } 1979 1980 signbit = ((result >> (bitCount-1)) & 0x01); 1981 result |= (~signbit + 1) << bitCount; 1982 1983 *pResult = (drflac_int32)result; 1984 return DRFLAC_TRUE; 1985 } 1986 1987 #ifdef DRFLAC_64BIT 1988 static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, drflac_uint64* pResultOut) 1989 { 1990 drflac_uint32 resultHi; 1991 drflac_uint32 resultLo; 1992 1993 DRFLAC_ASSERT(bitCount <= 64); 1994 DRFLAC_ASSERT(bitCount > 32); 1995 1996 if (!drflac__read_uint32(bs, bitCount - 32, &resultHi)) { 1997 return DRFLAC_FALSE; 1998 } 1999 2000 if (!drflac__read_uint32(bs, 32, &resultLo)) { 2001 return DRFLAC_FALSE; 2002 } 2003 2004 *pResultOut = (((drflac_uint64)resultHi) << 32) | ((drflac_uint64)resultLo); 2005 return DRFLAC_TRUE; 2006 } 2007 #endif 2008 2009 /* Function below is unused, but leaving it here in case I need to quickly add it again. */ 2010 #if 0 2011 static drflac_bool32 drflac__read_int64(drflac_bs* bs, unsigned int bitCount, drflac_int64* pResultOut) 2012 { 2013 drflac_uint64 result; 2014 drflac_uint64 signbit; 2015 2016 DRFLAC_ASSERT(bitCount <= 64); 2017 2018 if (!drflac__read_uint64(bs, bitCount, &result)) { 2019 return DRFLAC_FALSE; 2020 } 2021 2022 signbit = ((result >> (bitCount-1)) & 0x01); 2023 result |= (~signbit + 1) << bitCount; 2024 2025 *pResultOut = (drflac_int64)result; 2026 return DRFLAC_TRUE; 2027 } 2028 #endif 2029 2030 static drflac_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, drflac_uint16* pResult) 2031 { 2032 drflac_uint32 result; 2033 2034 DRFLAC_ASSERT(bs != NULL); 2035 DRFLAC_ASSERT(pResult != NULL); 2036 DRFLAC_ASSERT(bitCount > 0); 2037 DRFLAC_ASSERT(bitCount <= 16); 2038 2039 if (!drflac__read_uint32(bs, bitCount, &result)) { 2040 return DRFLAC_FALSE; 2041 } 2042 2043 *pResult = (drflac_uint16)result; 2044 return DRFLAC_TRUE; 2045 } 2046 2047 #if 0 2048 static drflac_bool32 drflac__read_int16(drflac_bs* bs, unsigned int bitCount, drflac_int16* pResult) 2049 { 2050 drflac_int32 result; 2051 2052 DRFLAC_ASSERT(bs != NULL); 2053 DRFLAC_ASSERT(pResult != NULL); 2054 DRFLAC_ASSERT(bitCount > 0); 2055 DRFLAC_ASSERT(bitCount <= 16); 2056 2057 if (!drflac__read_int32(bs, bitCount, &result)) { 2058 return DRFLAC_FALSE; 2059 } 2060 2061 *pResult = (drflac_int16)result; 2062 return DRFLAC_TRUE; 2063 } 2064 #endif 2065 2066 static drflac_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, drflac_uint8* pResult) 2067 { 2068 drflac_uint32 result; 2069 2070 DRFLAC_ASSERT(bs != NULL); 2071 DRFLAC_ASSERT(pResult != NULL); 2072 DRFLAC_ASSERT(bitCount > 0); 2073 DRFLAC_ASSERT(bitCount <= 8); 2074 2075 if (!drflac__read_uint32(bs, bitCount, &result)) { 2076 return DRFLAC_FALSE; 2077 } 2078 2079 *pResult = (drflac_uint8)result; 2080 return DRFLAC_TRUE; 2081 } 2082 2083 static drflac_bool32 drflac__read_int8(drflac_bs* bs, unsigned int bitCount, drflac_int8* pResult) 2084 { 2085 drflac_int32 result; 2086 2087 DRFLAC_ASSERT(bs != NULL); 2088 DRFLAC_ASSERT(pResult != NULL); 2089 DRFLAC_ASSERT(bitCount > 0); 2090 DRFLAC_ASSERT(bitCount <= 8); 2091 2092 if (!drflac__read_int32(bs, bitCount, &result)) { 2093 return DRFLAC_FALSE; 2094 } 2095 2096 *pResult = (drflac_int8)result; 2097 return DRFLAC_TRUE; 2098 } 2099 2100 2101 static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek) 2102 { 2103 if (bitsToSeek <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { 2104 bs->consumedBits += (drflac_uint32)bitsToSeek; 2105 bs->cache <<= bitsToSeek; 2106 return DRFLAC_TRUE; 2107 } else { 2108 /* It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here. */ 2109 bitsToSeek -= DRFLAC_CACHE_L1_BITS_REMAINING(bs); 2110 bs->consumedBits += DRFLAC_CACHE_L1_BITS_REMAINING(bs); 2111 bs->cache = 0; 2112 2113 /* Simple case. Seek in groups of the same number as bits that fit within a cache line. */ 2114 #ifdef DRFLAC_64BIT 2115 while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) { 2116 drflac_uint64 bin; 2117 if (!drflac__read_uint64(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) { 2118 return DRFLAC_FALSE; 2119 } 2120 bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs); 2121 } 2122 #else 2123 while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) { 2124 drflac_uint32 bin; 2125 if (!drflac__read_uint32(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) { 2126 return DRFLAC_FALSE; 2127 } 2128 bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs); 2129 } 2130 #endif 2131 2132 /* Whole leftover bytes. */ 2133 while (bitsToSeek >= 8) { 2134 drflac_uint8 bin; 2135 if (!drflac__read_uint8(bs, 8, &bin)) { 2136 return DRFLAC_FALSE; 2137 } 2138 bitsToSeek -= 8; 2139 } 2140 2141 /* Leftover bits. */ 2142 if (bitsToSeek > 0) { 2143 drflac_uint8 bin; 2144 if (!drflac__read_uint8(bs, (drflac_uint32)bitsToSeek, &bin)) { 2145 return DRFLAC_FALSE; 2146 } 2147 bitsToSeek = 0; /* <-- Necessary for the assert below. */ 2148 } 2149 2150 DRFLAC_ASSERT(bitsToSeek == 0); 2151 return DRFLAC_TRUE; 2152 } 2153 } 2154 2155 2156 /* This function moves the bit streamer to the first bit after the sync code (bit 15 of the of the frame header). It will also update the CRC-16. */ 2157 static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs) 2158 { 2159 DRFLAC_ASSERT(bs != NULL); 2160 2161 /* 2162 The sync code is always aligned to 8 bits. This is convenient for us because it means we can do byte-aligned movements. The first 2163 thing to do is align to the next byte. 2164 */ 2165 if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) { 2166 return DRFLAC_FALSE; 2167 } 2168 2169 for (;;) { 2170 drflac_uint8 hi; 2171 2172 #ifndef DR_FLAC_NO_CRC 2173 drflac__reset_crc16(bs); 2174 #endif 2175 2176 if (!drflac__read_uint8(bs, 8, &hi)) { 2177 return DRFLAC_FALSE; 2178 } 2179 2180 if (hi == 0xFF) { 2181 drflac_uint8 lo; 2182 if (!drflac__read_uint8(bs, 6, &lo)) { 2183 return DRFLAC_FALSE; 2184 } 2185 2186 if (lo == 0x3E) { 2187 return DRFLAC_TRUE; 2188 } else { 2189 if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) { 2190 return DRFLAC_FALSE; 2191 } 2192 } 2193 } 2194 } 2195 2196 /* Should never get here. */ 2197 /*return DRFLAC_FALSE;*/ 2198 } 2199 2200 2201 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC) 2202 #define DRFLAC_IMPLEMENT_CLZ_LZCNT 2203 #endif 2204 #if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86)) 2205 #define DRFLAC_IMPLEMENT_CLZ_MSVC 2206 #endif 2207 2208 static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x) 2209 { 2210 drflac_uint32 n; 2211 static drflac_uint32 clz_table_4[] = { 2212 0, 2213 4, 2214 3, 3, 2215 2, 2, 2, 2, 2216 1, 1, 1, 1, 1, 1, 1, 1 2217 }; 2218 2219 if (x == 0) { 2220 return sizeof(x)*8; 2221 } 2222 2223 n = clz_table_4[x >> (sizeof(x)*8 - 4)]; 2224 if (n == 0) { 2225 #ifdef DRFLAC_64BIT 2226 if ((x & ((drflac_uint64)0xFFFFFFFF << 32)) == 0) { n = 32; x <<= 32; } 2227 if ((x & ((drflac_uint64)0xFFFF0000 << 32)) == 0) { n += 16; x <<= 16; } 2228 if ((x & ((drflac_uint64)0xFF000000 << 32)) == 0) { n += 8; x <<= 8; } 2229 if ((x & ((drflac_uint64)0xF0000000 << 32)) == 0) { n += 4; x <<= 4; } 2230 #else 2231 if ((x & 0xFFFF0000) == 0) { n = 16; x <<= 16; } 2232 if ((x & 0xFF000000) == 0) { n += 8; x <<= 8; } 2233 if ((x & 0xF0000000) == 0) { n += 4; x <<= 4; } 2234 #endif 2235 n += clz_table_4[x >> (sizeof(x)*8 - 4)]; 2236 } 2237 2238 return n - 1; 2239 } 2240 2241 #ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT 2242 static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported() 2243 { 2244 /* Fast compile time check for ARM. */ 2245 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) 2246 return DRFLAC_TRUE; 2247 #else 2248 /* If the compiler itself does not support the intrinsic then we'll need to return false. */ 2249 #ifdef DRFLAC_HAS_LZCNT_INTRINSIC 2250 return drflac__gIsLZCNTSupported; 2251 #else 2252 return DRFLAC_FALSE; 2253 #endif 2254 #endif 2255 } 2256 2257 static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x) 2258 { 2259 #if defined(_MSC_VER) && !defined(__clang__) 2260 #ifdef DRFLAC_64BIT 2261 return (drflac_uint32)__lzcnt64(x); 2262 #else 2263 return (drflac_uint32)__lzcnt(x); 2264 #endif 2265 #else 2266 #if defined(__GNUC__) || defined(__clang__) 2267 #if 0 /* defined(DRFLAC_X64) --- workaround for Clang bug */ 2268 { 2269 drflac_uint64 r; 2270 __asm__ __volatile__ ( 2271 "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc" 2272 ); 2273 2274 return (drflac_uint32)r; 2275 } 2276 #elif 0 /* defined(DRFLAC_X86) --- workaround for Clang bug */ 2277 { 2278 drflac_uint32 r; 2279 __asm__ __volatile__ ( 2280 "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc" 2281 ); 2282 2283 return r; 2284 } 2285 #elif defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) && !defined(DRFLAC_64BIT) /* <-- I haven't tested 64-bit inline assembly, so only enabling this for the 32-bit build for now. */ 2286 { 2287 unsigned int r; 2288 __asm__ __volatile__ ( 2289 #if defined(DRFLAC_64BIT) 2290 "clz %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(x) /* <-- This is untested. If someone in the community could test this, that would be appreciated! */ 2291 #else 2292 "clz %[out], %[in]" : [out]"=r"(r) : [in]"r"(x) 2293 #endif 2294 ); 2295 2296 return r; 2297 } 2298 #else 2299 if (x == 0) { 2300 return sizeof(x)*8; 2301 } 2302 #ifdef DRFLAC_64BIT 2303 return (drflac_uint32)__builtin_clzll((drflac_uint64)x); 2304 #else 2305 return (drflac_uint32)__builtin_clzl((drflac_uint32)x); 2306 #endif 2307 #endif 2308 #else 2309 /* Unsupported compiler. */ 2310 #error "This compiler does not support the lzcnt intrinsic." 2311 #endif 2312 #endif 2313 } 2314 #endif 2315 2316 #ifdef DRFLAC_IMPLEMENT_CLZ_MSVC 2317 #include <intrin.h> /* For BitScanReverse(). */ 2318 2319 static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x) 2320 { 2321 drflac_uint32 n; 2322 2323 if (x == 0) { 2324 return sizeof(x)*8; 2325 } 2326 2327 #ifdef DRFLAC_64BIT 2328 _BitScanReverse64((unsigned long*)&n, x); 2329 #else 2330 _BitScanReverse((unsigned long*)&n, x); 2331 #endif 2332 return sizeof(x)*8 - n - 1; 2333 } 2334 #endif 2335 2336 static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x) 2337 { 2338 #ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT 2339 if (drflac__is_lzcnt_supported()) { 2340 return drflac__clz_lzcnt(x); 2341 } else 2342 #endif 2343 { 2344 #ifdef DRFLAC_IMPLEMENT_CLZ_MSVC 2345 return drflac__clz_msvc(x); 2346 #else 2347 return drflac__clz_software(x); 2348 #endif 2349 } 2350 } 2351 2352 2353 static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut) 2354 { 2355 drflac_uint32 zeroCounter = 0; 2356 drflac_uint32 setBitOffsetPlus1; 2357 2358 while (bs->cache == 0) { 2359 zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs); 2360 if (!drflac__reload_cache(bs)) { 2361 return DRFLAC_FALSE; 2362 } 2363 } 2364 2365 setBitOffsetPlus1 = drflac__clz(bs->cache); 2366 setBitOffsetPlus1 += 1; 2367 2368 bs->consumedBits += setBitOffsetPlus1; 2369 bs->cache <<= setBitOffsetPlus1; 2370 2371 *pOffsetOut = zeroCounter + setBitOffsetPlus1 - 1; 2372 return DRFLAC_TRUE; 2373 } 2374 2375 2376 2377 static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFromStart) 2378 { 2379 DRFLAC_ASSERT(bs != NULL); 2380 DRFLAC_ASSERT(offsetFromStart > 0); 2381 2382 /* 2383 Seeking from the start is not quite as trivial as it sounds because the onSeek callback takes a signed 32-bit integer (which 2384 is intentional because it simplifies the implementation of the onSeek callbacks), however offsetFromStart is unsigned 64-bit. 2385 To resolve we just need to do an initial seek from the start, and then a series of offset seeks to make up the remainder. 2386 */ 2387 if (offsetFromStart > 0x7FFFFFFF) { 2388 drflac_uint64 bytesRemaining = offsetFromStart; 2389 if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) { 2390 return DRFLAC_FALSE; 2391 } 2392 bytesRemaining -= 0x7FFFFFFF; 2393 2394 while (bytesRemaining > 0x7FFFFFFF) { 2395 if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) { 2396 return DRFLAC_FALSE; 2397 } 2398 bytesRemaining -= 0x7FFFFFFF; 2399 } 2400 2401 if (bytesRemaining > 0) { 2402 if (!bs->onSeek(bs->pUserData, (int)bytesRemaining, drflac_seek_origin_current)) { 2403 return DRFLAC_FALSE; 2404 } 2405 } 2406 } else { 2407 if (!bs->onSeek(bs->pUserData, (int)offsetFromStart, drflac_seek_origin_start)) { 2408 return DRFLAC_FALSE; 2409 } 2410 } 2411 2412 /* The cache should be reset to force a reload of fresh data from the client. */ 2413 drflac__reset_cache(bs); 2414 return DRFLAC_TRUE; 2415 } 2416 2417 2418 static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64* pNumberOut, drflac_uint8* pCRCOut) 2419 { 2420 drflac_uint8 crc; 2421 drflac_uint64 result; 2422 unsigned char utf8[7] = {0}; 2423 int byteCount; 2424 int i; 2425 2426 DRFLAC_ASSERT(bs != NULL); 2427 DRFLAC_ASSERT(pNumberOut != NULL); 2428 DRFLAC_ASSERT(pCRCOut != NULL); 2429 2430 crc = *pCRCOut; 2431 2432 if (!drflac__read_uint8(bs, 8, utf8)) { 2433 *pNumberOut = 0; 2434 return DRFLAC_END_OF_STREAM; 2435 } 2436 crc = drflac_crc8(crc, utf8[0], 8); 2437 2438 if ((utf8[0] & 0x80) == 0) { 2439 *pNumberOut = utf8[0]; 2440 *pCRCOut = crc; 2441 return DRFLAC_SUCCESS; 2442 } 2443 2444 /*byteCount = 1;*/ 2445 if ((utf8[0] & 0xE0) == 0xC0) { 2446 byteCount = 2; 2447 } else if ((utf8[0] & 0xF0) == 0xE0) { 2448 byteCount = 3; 2449 } else if ((utf8[0] & 0xF8) == 0xF0) { 2450 byteCount = 4; 2451 } else if ((utf8[0] & 0xFC) == 0xF8) { 2452 byteCount = 5; 2453 } else if ((utf8[0] & 0xFE) == 0xFC) { 2454 byteCount = 6; 2455 } else if ((utf8[0] & 0xFF) == 0xFE) { 2456 byteCount = 7; 2457 } else { 2458 *pNumberOut = 0; 2459 return DRFLAC_CRC_MISMATCH; /* Bad UTF-8 encoding. */ 2460 } 2461 2462 /* Read extra bytes. */ 2463 DRFLAC_ASSERT(byteCount > 1); 2464 2465 result = (drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1))); 2466 for (i = 1; i < byteCount; ++i) { 2467 if (!drflac__read_uint8(bs, 8, utf8 + i)) { 2468 *pNumberOut = 0; 2469 return DRFLAC_END_OF_STREAM; 2470 } 2471 crc = drflac_crc8(crc, utf8[i], 8); 2472 2473 result = (result << 6) | (utf8[i] & 0x3F); 2474 } 2475 2476 *pNumberOut = result; 2477 *pCRCOut = crc; 2478 return DRFLAC_SUCCESS; 2479 } 2480 2481 2482 2483 /* 2484 The next two functions are responsible for calculating the prediction. 2485 2486 When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's 2487 safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16. 2488 */ 2489 static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) 2490 { 2491 drflac_int32 prediction = 0; 2492 2493 DRFLAC_ASSERT(order <= 32); 2494 2495 /* 32-bit version. */ 2496 2497 /* VC++ optimizes this to a single jmp. I've not yet verified this for other compilers. */ 2498 switch (order) 2499 { 2500 case 32: prediction += coefficients[31] * pDecodedSamples[-32]; 2501 case 31: prediction += coefficients[30] * pDecodedSamples[-31]; 2502 case 30: prediction += coefficients[29] * pDecodedSamples[-30]; 2503 case 29: prediction += coefficients[28] * pDecodedSamples[-29]; 2504 case 28: prediction += coefficients[27] * pDecodedSamples[-28]; 2505 case 27: prediction += coefficients[26] * pDecodedSamples[-27]; 2506 case 26: prediction += coefficients[25] * pDecodedSamples[-26]; 2507 case 25: prediction += coefficients[24] * pDecodedSamples[-25]; 2508 case 24: prediction += coefficients[23] * pDecodedSamples[-24]; 2509 case 23: prediction += coefficients[22] * pDecodedSamples[-23]; 2510 case 22: prediction += coefficients[21] * pDecodedSamples[-22]; 2511 case 21: prediction += coefficients[20] * pDecodedSamples[-21]; 2512 case 20: prediction += coefficients[19] * pDecodedSamples[-20]; 2513 case 19: prediction += coefficients[18] * pDecodedSamples[-19]; 2514 case 18: prediction += coefficients[17] * pDecodedSamples[-18]; 2515 case 17: prediction += coefficients[16] * pDecodedSamples[-17]; 2516 case 16: prediction += coefficients[15] * pDecodedSamples[-16]; 2517 case 15: prediction += coefficients[14] * pDecodedSamples[-15]; 2518 case 14: prediction += coefficients[13] * pDecodedSamples[-14]; 2519 case 13: prediction += coefficients[12] * pDecodedSamples[-13]; 2520 case 12: prediction += coefficients[11] * pDecodedSamples[-12]; 2521 case 11: prediction += coefficients[10] * pDecodedSamples[-11]; 2522 case 10: prediction += coefficients[ 9] * pDecodedSamples[-10]; 2523 case 9: prediction += coefficients[ 8] * pDecodedSamples[- 9]; 2524 case 8: prediction += coefficients[ 7] * pDecodedSamples[- 8]; 2525 case 7: prediction += coefficients[ 6] * pDecodedSamples[- 7]; 2526 case 6: prediction += coefficients[ 5] * pDecodedSamples[- 6]; 2527 case 5: prediction += coefficients[ 4] * pDecodedSamples[- 5]; 2528 case 4: prediction += coefficients[ 3] * pDecodedSamples[- 4]; 2529 case 3: prediction += coefficients[ 2] * pDecodedSamples[- 3]; 2530 case 2: prediction += coefficients[ 1] * pDecodedSamples[- 2]; 2531 case 1: prediction += coefficients[ 0] * pDecodedSamples[- 1]; 2532 } 2533 2534 return (drflac_int32)(prediction >> shift); 2535 } 2536 2537 static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) 2538 { 2539 drflac_int64 prediction; 2540 2541 DRFLAC_ASSERT(order <= 32); 2542 2543 /* 64-bit version. */ 2544 2545 /* This method is faster on the 32-bit build when compiling with VC++. See note below. */ 2546 #ifndef DRFLAC_64BIT 2547 if (order == 8) 2548 { 2549 prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; 2550 prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; 2551 prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; 2552 prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; 2553 prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; 2554 prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; 2555 prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; 2556 prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; 2557 } 2558 else if (order == 7) 2559 { 2560 prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; 2561 prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; 2562 prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; 2563 prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; 2564 prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; 2565 prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; 2566 prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; 2567 } 2568 else if (order == 3) 2569 { 2570 prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; 2571 prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; 2572 prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; 2573 } 2574 else if (order == 6) 2575 { 2576 prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; 2577 prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; 2578 prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; 2579 prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; 2580 prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; 2581 prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; 2582 } 2583 else if (order == 5) 2584 { 2585 prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; 2586 prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; 2587 prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; 2588 prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; 2589 prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; 2590 } 2591 else if (order == 4) 2592 { 2593 prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; 2594 prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; 2595 prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; 2596 prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; 2597 } 2598 else if (order == 12) 2599 { 2600 prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; 2601 prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; 2602 prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; 2603 prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; 2604 prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; 2605 prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; 2606 prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; 2607 prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; 2608 prediction += coefficients[8] * (drflac_int64)pDecodedSamples[-9]; 2609 prediction += coefficients[9] * (drflac_int64)pDecodedSamples[-10]; 2610 prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11]; 2611 prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12]; 2612 } 2613 else if (order == 2) 2614 { 2615 prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; 2616 prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; 2617 } 2618 else if (order == 1) 2619 { 2620 prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; 2621 } 2622 else if (order == 10) 2623 { 2624 prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; 2625 prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; 2626 prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; 2627 prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; 2628 prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; 2629 prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; 2630 prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; 2631 prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; 2632 prediction += coefficients[8] * (drflac_int64)pDecodedSamples[-9]; 2633 prediction += coefficients[9] * (drflac_int64)pDecodedSamples[-10]; 2634 } 2635 else if (order == 9) 2636 { 2637 prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; 2638 prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; 2639 prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; 2640 prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; 2641 prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; 2642 prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; 2643 prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; 2644 prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; 2645 prediction += coefficients[8] * (drflac_int64)pDecodedSamples[-9]; 2646 } 2647 else if (order == 11) 2648 { 2649 prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; 2650 prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; 2651 prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; 2652 prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; 2653 prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; 2654 prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; 2655 prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; 2656 prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; 2657 prediction += coefficients[8] * (drflac_int64)pDecodedSamples[-9]; 2658 prediction += coefficients[9] * (drflac_int64)pDecodedSamples[-10]; 2659 prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11]; 2660 } 2661 else 2662 { 2663 int j; 2664 2665 prediction = 0; 2666 for (j = 0; j < (int)order; ++j) { 2667 prediction += coefficients[j] * (drflac_int64)pDecodedSamples[-j-1]; 2668 } 2669 } 2670 #endif 2671 2672 /* 2673 VC++ optimizes this to a single jmp instruction, but only the 64-bit build. The 32-bit build generates less efficient code for some 2674 reason. The ugly version above is faster so we'll just switch between the two depending on the target platform. 2675 */ 2676 #ifdef DRFLAC_64BIT 2677 prediction = 0; 2678 switch (order) 2679 { 2680 case 32: prediction += coefficients[31] * (drflac_int64)pDecodedSamples[-32]; 2681 case 31: prediction += coefficients[30] * (drflac_int64)pDecodedSamples[-31]; 2682 case 30: prediction += coefficients[29] * (drflac_int64)pDecodedSamples[-30]; 2683 case 29: prediction += coefficients[28] * (drflac_int64)pDecodedSamples[-29]; 2684 case 28: prediction += coefficients[27] * (drflac_int64)pDecodedSamples[-28]; 2685 case 27: prediction += coefficients[26] * (drflac_int64)pDecodedSamples[-27]; 2686 case 26: prediction += coefficients[25] * (drflac_int64)pDecodedSamples[-26]; 2687 case 25: prediction += coefficients[24] * (drflac_int64)pDecodedSamples[-25]; 2688 case 24: prediction += coefficients[23] * (drflac_int64)pDecodedSamples[-24]; 2689 case 23: prediction += coefficients[22] * (drflac_int64)pDecodedSamples[-23]; 2690 case 22: prediction += coefficients[21] * (drflac_int64)pDecodedSamples[-22]; 2691 case 21: prediction += coefficients[20] * (drflac_int64)pDecodedSamples[-21]; 2692 case 20: prediction += coefficients[19] * (drflac_int64)pDecodedSamples[-20]; 2693 case 19: prediction += coefficients[18] * (drflac_int64)pDecodedSamples[-19]; 2694 case 18: prediction += coefficients[17] * (drflac_int64)pDecodedSamples[-18]; 2695 case 17: prediction += coefficients[16] * (drflac_int64)pDecodedSamples[-17]; 2696 case 16: prediction += coefficients[15] * (drflac_int64)pDecodedSamples[-16]; 2697 case 15: prediction += coefficients[14] * (drflac_int64)pDecodedSamples[-15]; 2698 case 14: prediction += coefficients[13] * (drflac_int64)pDecodedSamples[-14]; 2699 case 13: prediction += coefficients[12] * (drflac_int64)pDecodedSamples[-13]; 2700 case 12: prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12]; 2701 case 11: prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11]; 2702 case 10: prediction += coefficients[ 9] * (drflac_int64)pDecodedSamples[-10]; 2703 case 9: prediction += coefficients[ 8] * (drflac_int64)pDecodedSamples[- 9]; 2704 case 8: prediction += coefficients[ 7] * (drflac_int64)pDecodedSamples[- 8]; 2705 case 7: prediction += coefficients[ 6] * (drflac_int64)pDecodedSamples[- 7]; 2706 case 6: prediction += coefficients[ 5] * (drflac_int64)pDecodedSamples[- 6]; 2707 case 5: prediction += coefficients[ 4] * (drflac_int64)pDecodedSamples[- 5]; 2708 case 4: prediction += coefficients[ 3] * (drflac_int64)pDecodedSamples[- 4]; 2709 case 3: prediction += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 3]; 2710 case 2: prediction += coefficients[ 1] * (drflac_int64)pDecodedSamples[- 2]; 2711 case 1: prediction += coefficients[ 0] * (drflac_int64)pDecodedSamples[- 1]; 2712 } 2713 #endif 2714 2715 return (drflac_int32)(prediction >> shift); 2716 } 2717 2718 2719 #if 0 2720 /* 2721 Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the 2722 sake of readability and should only be used as a reference. 2723 */ 2724 static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) 2725 { 2726 drflac_uint32 i; 2727 2728 DRFLAC_ASSERT(bs != NULL); 2729 DRFLAC_ASSERT(count > 0); 2730 DRFLAC_ASSERT(pSamplesOut != NULL); 2731 2732 for (i = 0; i < count; ++i) { 2733 drflac_uint32 zeroCounter = 0; 2734 for (;;) { 2735 drflac_uint8 bit; 2736 if (!drflac__read_uint8(bs, 1, &bit)) { 2737 return DRFLAC_FALSE; 2738 } 2739 2740 if (bit == 0) { 2741 zeroCounter += 1; 2742 } else { 2743 break; 2744 } 2745 } 2746 2747 drflac_uint32 decodedRice; 2748 if (riceParam > 0) { 2749 if (!drflac__read_uint32(bs, riceParam, &decodedRice)) { 2750 return DRFLAC_FALSE; 2751 } 2752 } else { 2753 decodedRice = 0; 2754 } 2755 2756 decodedRice |= (zeroCounter << riceParam); 2757 if ((decodedRice & 0x01)) { 2758 decodedRice = ~(decodedRice >> 1); 2759 } else { 2760 decodedRice = (decodedRice >> 1); 2761 } 2762 2763 2764 if (bitsPerSample+shift >= 32) { 2765 pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i); 2766 } else { 2767 pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i); 2768 } 2769 } 2770 2771 return DRFLAC_TRUE; 2772 } 2773 #endif 2774 2775 #if 0 2776 static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) 2777 { 2778 drflac_uint32 zeroCounter = 0; 2779 drflac_uint32 decodedRice; 2780 2781 for (;;) { 2782 drflac_uint8 bit; 2783 if (!drflac__read_uint8(bs, 1, &bit)) { 2784 return DRFLAC_FALSE; 2785 } 2786 2787 if (bit == 0) { 2788 zeroCounter += 1; 2789 } else { 2790 break; 2791 } 2792 } 2793 2794 if (riceParam > 0) { 2795 if (!drflac__read_uint32(bs, riceParam, &decodedRice)) { 2796 return DRFLAC_FALSE; 2797 } 2798 } else { 2799 decodedRice = 0; 2800 } 2801 2802 *pZeroCounterOut = zeroCounter; 2803 *pRiceParamPartOut = decodedRice; 2804 return DRFLAC_TRUE; 2805 } 2806 #endif 2807 2808 #if 0 2809 static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) 2810 { 2811 drflac_cache_t riceParamMask; 2812 drflac_uint32 zeroCounter; 2813 drflac_uint32 setBitOffsetPlus1; 2814 drflac_uint32 riceParamPart; 2815 drflac_uint32 riceLength; 2816 2817 DRFLAC_ASSERT(riceParam > 0); /* <-- riceParam should never be 0. drflac__read_rice_parts__param_equals_zero() should be used instead for this case. */ 2818 2819 riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam); 2820 2821 zeroCounter = 0; 2822 while (bs->cache == 0) { 2823 zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs); 2824 if (!drflac__reload_cache(bs)) { 2825 return DRFLAC_FALSE; 2826 } 2827 } 2828 2829 setBitOffsetPlus1 = drflac__clz(bs->cache); 2830 zeroCounter += setBitOffsetPlus1; 2831 setBitOffsetPlus1 += 1; 2832 2833 riceLength = setBitOffsetPlus1 + riceParam; 2834 if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { 2835 riceParamPart = (drflac_uint32)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceLength)); 2836 2837 bs->consumedBits += riceLength; 2838 bs->cache <<= riceLength; 2839 } else { 2840 drflac_uint32 bitCountLo; 2841 drflac_cache_t resultHi; 2842 2843 bs->consumedBits += riceLength; 2844 bs->cache <<= setBitOffsetPlus1 & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1); /* <-- Equivalent to "if (setBitOffsetPlus1 < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { bs->cache <<= setBitOffsetPlus1; }" */ 2845 2846 /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */ 2847 bitCountLo = bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs); 2848 resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, riceParam); /* <-- Use DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE() if ever this function allows riceParam=0. */ 2849 2850 if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { 2851 #ifndef DR_FLAC_NO_CRC 2852 drflac__update_crc16(bs); 2853 #endif 2854 bs->cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); 2855 bs->consumedBits = 0; 2856 #ifndef DR_FLAC_NO_CRC 2857 bs->crc16Cache = bs->cache; 2858 #endif 2859 } else { 2860 /* Slow path. We need to fetch more data from the client. */ 2861 if (!drflac__reload_cache(bs)) { 2862 return DRFLAC_FALSE; 2863 } 2864 } 2865 2866 riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo)); 2867 2868 bs->consumedBits += bitCountLo; 2869 bs->cache <<= bitCountLo; 2870 } 2871 2872 pZeroCounterOut[0] = zeroCounter; 2873 pRiceParamPartOut[0] = riceParamPart; 2874 2875 return DRFLAC_TRUE; 2876 } 2877 #endif 2878 2879 static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) 2880 { 2881 drflac_uint32 riceParamPlus1 = riceParam + 1; 2882 /*drflac_cache_t riceParamPlus1Mask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParamPlus1);*/ 2883 drflac_uint32 riceParamPlus1Shift = DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPlus1); 2884 drflac_uint32 riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1; 2885 2886 /* 2887 The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have 2888 no idea how this will work in practice... 2889 */ 2890 drflac_cache_t bs_cache = bs->cache; 2891 drflac_uint32 bs_consumedBits = bs->consumedBits; 2892 2893 /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */ 2894 drflac_uint32 lzcount = drflac__clz(bs_cache); 2895 if (lzcount < sizeof(bs_cache)*8) { 2896 pZeroCounterOut[0] = lzcount; 2897 2898 /* 2899 It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting 2900 this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled 2901 outside of this function at a higher level. 2902 */ 2903 extract_rice_param_part: 2904 bs_cache <<= lzcount; 2905 bs_consumedBits += lzcount; 2906 2907 if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) { 2908 /* Getting here means the rice parameter part is wholly contained within the current cache line. */ 2909 pRiceParamPartOut[0] = (drflac_uint32)(bs_cache >> riceParamPlus1Shift); 2910 bs_cache <<= riceParamPlus1; 2911 bs_consumedBits += riceParamPlus1; 2912 } else { 2913 drflac_uint32 riceParamPartHi; 2914 drflac_uint32 riceParamPartLo; 2915 drflac_uint32 riceParamPartLoBitCount; 2916 2917 /* 2918 Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache 2919 line, reload the cache, and then combine it with the head of the next cache line. 2920 */ 2921 2922 /* Grab the high part of the rice parameter part. */ 2923 riceParamPartHi = (drflac_uint32)(bs_cache >> riceParamPlus1Shift); 2924 2925 /* Before reloading the cache we need to grab the size in bits of the low part. */ 2926 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits; 2927 DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32); 2928 2929 /* Now reload the cache. */ 2930 if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { 2931 #ifndef DR_FLAC_NO_CRC 2932 drflac__update_crc16(bs); 2933 #endif 2934 bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); 2935 bs_consumedBits = riceParamPartLoBitCount; 2936 #ifndef DR_FLAC_NO_CRC 2937 bs->crc16Cache = bs_cache; 2938 #endif 2939 } else { 2940 /* Slow path. We need to fetch more data from the client. */ 2941 if (!drflac__reload_cache(bs)) { 2942 return DRFLAC_FALSE; 2943 } 2944 2945 bs_cache = bs->cache; 2946 bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount; 2947 } 2948 2949 /* We should now have enough information to construct the rice parameter part. */ 2950 riceParamPartLo = (drflac_uint32)(bs_cache >> (DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPartLoBitCount))); 2951 pRiceParamPartOut[0] = riceParamPartHi | riceParamPartLo; 2952 2953 bs_cache <<= riceParamPartLoBitCount; 2954 } 2955 } else { 2956 /* 2957 Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call 2958 to drflac__clz() and we need to reload the cache. 2959 */ 2960 drflac_uint32 zeroCounter = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs_consumedBits); 2961 for (;;) { 2962 if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { 2963 #ifndef DR_FLAC_NO_CRC 2964 drflac__update_crc16(bs); 2965 #endif 2966 bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); 2967 bs_consumedBits = 0; 2968 #ifndef DR_FLAC_NO_CRC 2969 bs->crc16Cache = bs_cache; 2970 #endif 2971 } else { 2972 /* Slow path. We need to fetch more data from the client. */ 2973 if (!drflac__reload_cache(bs)) { 2974 return DRFLAC_FALSE; 2975 } 2976 2977 bs_cache = bs->cache; 2978 bs_consumedBits = bs->consumedBits; 2979 } 2980 2981 lzcount = drflac__clz(bs_cache); 2982 zeroCounter += lzcount; 2983 2984 if (lzcount < sizeof(bs_cache)*8) { 2985 break; 2986 } 2987 } 2988 2989 pZeroCounterOut[0] = zeroCounter; 2990 goto extract_rice_param_part; 2991 } 2992 2993 /* Make sure the cache is restored at the end of it all. */ 2994 bs->cache = bs_cache; 2995 bs->consumedBits = bs_consumedBits; 2996 2997 return DRFLAC_TRUE; 2998 } 2999 3000 static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac_uint8 riceParam) 3001 { 3002 drflac_uint32 riceParamPlus1 = riceParam + 1; 3003 drflac_uint32 riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1; 3004 3005 /* 3006 The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have 3007 no idea how this will work in practice... 3008 */ 3009 drflac_cache_t bs_cache = bs->cache; 3010 drflac_uint32 bs_consumedBits = bs->consumedBits; 3011 3012 /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */ 3013 drflac_uint32 lzcount = drflac__clz(bs_cache); 3014 if (lzcount < sizeof(bs_cache)*8) { 3015 /* 3016 It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting 3017 this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled 3018 outside of this function at a higher level. 3019 */ 3020 extract_rice_param_part: 3021 bs_cache <<= lzcount; 3022 bs_consumedBits += lzcount; 3023 3024 if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) { 3025 /* Getting here means the rice parameter part is wholly contained within the current cache line. */ 3026 bs_cache <<= riceParamPlus1; 3027 bs_consumedBits += riceParamPlus1; 3028 } else { 3029 /* 3030 Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache 3031 line, reload the cache, and then combine it with the head of the next cache line. 3032 */ 3033 3034 /* Before reloading the cache we need to grab the size in bits of the low part. */ 3035 drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits; 3036 DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32); 3037 3038 /* Now reload the cache. */ 3039 if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { 3040 #ifndef DR_FLAC_NO_CRC 3041 drflac__update_crc16(bs); 3042 #endif 3043 bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); 3044 bs_consumedBits = riceParamPartLoBitCount; 3045 #ifndef DR_FLAC_NO_CRC 3046 bs->crc16Cache = bs_cache; 3047 #endif 3048 } else { 3049 /* Slow path. We need to fetch more data from the client. */ 3050 if (!drflac__reload_cache(bs)) { 3051 return DRFLAC_FALSE; 3052 } 3053 3054 bs_cache = bs->cache; 3055 bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount; 3056 } 3057 3058 bs_cache <<= riceParamPartLoBitCount; 3059 } 3060 } else { 3061 /* 3062 Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call 3063 to drflac__clz() and we need to reload the cache. 3064 */ 3065 for (;;) { 3066 if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { 3067 #ifndef DR_FLAC_NO_CRC 3068 drflac__update_crc16(bs); 3069 #endif 3070 bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); 3071 bs_consumedBits = 0; 3072 #ifndef DR_FLAC_NO_CRC 3073 bs->crc16Cache = bs_cache; 3074 #endif 3075 } else { 3076 /* Slow path. We need to fetch more data from the client. */ 3077 if (!drflac__reload_cache(bs)) { 3078 return DRFLAC_FALSE; 3079 } 3080 3081 bs_cache = bs->cache; 3082 bs_consumedBits = bs->consumedBits; 3083 } 3084 3085 lzcount = drflac__clz(bs_cache); 3086 if (lzcount < sizeof(bs_cache)*8) { 3087 break; 3088 } 3089 } 3090 3091 goto extract_rice_param_part; 3092 } 3093 3094 /* Make sure the cache is restored at the end of it all. */ 3095 bs->cache = bs_cache; 3096 bs->consumedBits = bs_consumedBits; 3097 3098 return DRFLAC_TRUE; 3099 } 3100 3101 3102 static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorder(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) 3103 { 3104 drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; 3105 drflac_uint32 zeroCountPart0; 3106 drflac_uint32 riceParamPart0; 3107 drflac_uint32 riceParamMask; 3108 drflac_uint32 i; 3109 3110 DRFLAC_ASSERT(bs != NULL); 3111 DRFLAC_ASSERT(count > 0); 3112 DRFLAC_ASSERT(pSamplesOut != NULL); 3113 3114 (void)bitsPerSample; 3115 (void)order; 3116 (void)shift; 3117 (void)coefficients; 3118 3119 riceParamMask = (drflac_uint32)~((~0UL) << riceParam); 3120 3121 i = 0; 3122 while (i < count) { 3123 /* Rice extraction. */ 3124 if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) { 3125 return DRFLAC_FALSE; 3126 } 3127 3128 /* Rice reconstruction. */ 3129 riceParamPart0 &= riceParamMask; 3130 riceParamPart0 |= (zeroCountPart0 << riceParam); 3131 riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; 3132 3133 pSamplesOut[i] = riceParamPart0; 3134 3135 i += 1; 3136 } 3137 3138 return DRFLAC_TRUE; 3139 } 3140 3141 static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) 3142 { 3143 drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; 3144 drflac_uint32 zeroCountPart0; 3145 drflac_uint32 zeroCountPart1; 3146 drflac_uint32 zeroCountPart2; 3147 drflac_uint32 zeroCountPart3; 3148 drflac_uint32 riceParamPart0; 3149 drflac_uint32 riceParamPart1; 3150 drflac_uint32 riceParamPart2; 3151 drflac_uint32 riceParamPart3; 3152 drflac_uint32 riceParamMask; 3153 const drflac_int32* pSamplesOutEnd; 3154 drflac_uint32 i; 3155 3156 DRFLAC_ASSERT(bs != NULL); 3157 DRFLAC_ASSERT(count > 0); 3158 DRFLAC_ASSERT(pSamplesOut != NULL); 3159 3160 if (order == 0) { 3161 return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); 3162 } 3163 3164 riceParamMask = (drflac_uint32)~((~0UL) << riceParam); 3165 pSamplesOutEnd = pSamplesOut + (count & ~3); 3166 3167 if (bitsPerSample+shift > 32) { 3168 while (pSamplesOut < pSamplesOutEnd) { 3169 /* 3170 Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version 3171 against an array. Not sure why, but perhaps it's making more efficient use of registers? 3172 */ 3173 if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) || 3174 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) || 3175 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) || 3176 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) { 3177 return DRFLAC_FALSE; 3178 } 3179 3180 riceParamPart0 &= riceParamMask; 3181 riceParamPart1 &= riceParamMask; 3182 riceParamPart2 &= riceParamMask; 3183 riceParamPart3 &= riceParamMask; 3184 3185 riceParamPart0 |= (zeroCountPart0 << riceParam); 3186 riceParamPart1 |= (zeroCountPart1 << riceParam); 3187 riceParamPart2 |= (zeroCountPart2 << riceParam); 3188 riceParamPart3 |= (zeroCountPart3 << riceParam); 3189 3190 riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; 3191 riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; 3192 riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; 3193 riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; 3194 3195 pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); 3196 pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1); 3197 pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2); 3198 pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3); 3199 3200 pSamplesOut += 4; 3201 } 3202 } else { 3203 while (pSamplesOut < pSamplesOutEnd) { 3204 if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) || 3205 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) || 3206 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) || 3207 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) { 3208 return DRFLAC_FALSE; 3209 } 3210 3211 riceParamPart0 &= riceParamMask; 3212 riceParamPart1 &= riceParamMask; 3213 riceParamPart2 &= riceParamMask; 3214 riceParamPart3 &= riceParamMask; 3215 3216 riceParamPart0 |= (zeroCountPart0 << riceParam); 3217 riceParamPart1 |= (zeroCountPart1 << riceParam); 3218 riceParamPart2 |= (zeroCountPart2 << riceParam); 3219 riceParamPart3 |= (zeroCountPart3 << riceParam); 3220 3221 riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; 3222 riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; 3223 riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; 3224 riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; 3225 3226 pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); 3227 pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1); 3228 pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2); 3229 pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3); 3230 3231 pSamplesOut += 4; 3232 } 3233 } 3234 3235 i = (count & ~3); 3236 while (i < count) { 3237 /* Rice extraction. */ 3238 if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) { 3239 return DRFLAC_FALSE; 3240 } 3241 3242 /* Rice reconstruction. */ 3243 riceParamPart0 &= riceParamMask; 3244 riceParamPart0 |= (zeroCountPart0 << riceParam); 3245 riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; 3246 /*riceParamPart0 = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/ 3247 3248 /* Sample reconstruction. */ 3249 if (bitsPerSample+shift > 32) { 3250 pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); 3251 } else { 3252 pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); 3253 } 3254 3255 i += 1; 3256 pSamplesOut += 1; 3257 } 3258 3259 return DRFLAC_TRUE; 3260 } 3261 3262 #if defined(DRFLAC_SUPPORT_SSE2) 3263 static DRFLAC_INLINE __m128i drflac__mm_packs_interleaved_epi32(__m128i a, __m128i b) 3264 { 3265 __m128i r; 3266 3267 /* Pack. */ 3268 r = _mm_packs_epi32(a, b); 3269 3270 /* a3a2 a1a0 b3b2 b1b0 -> a3a2 b3b2 a1a0 b1b0 */ 3271 r = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 1, 2, 0)); 3272 3273 /* a3a2 b3b2 a1a0 b1b0 -> a3b3 a2b2 a1b1 a0b0 */ 3274 r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(3, 1, 2, 0)); 3275 r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(3, 1, 2, 0)); 3276 3277 return r; 3278 } 3279 #endif 3280 3281 #if defined(DRFLAC_SUPPORT_SSE41) 3282 static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a) 3283 { 3284 return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); 3285 } 3286 3287 static DRFLAC_INLINE __m128i drflac__mm_hadd_epi32(__m128i x) 3288 { 3289 __m128i x64 = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2))); 3290 __m128i x32 = _mm_shufflelo_epi16(x64, _MM_SHUFFLE(1, 0, 3, 2)); 3291 return _mm_add_epi32(x64, x32); 3292 } 3293 3294 static DRFLAC_INLINE __m128i drflac__mm_hadd_epi64(__m128i x) 3295 { 3296 return _mm_add_epi64(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2))); 3297 } 3298 3299 static DRFLAC_INLINE __m128i drflac__mm_srai_epi64(__m128i x, int count) 3300 { 3301 /* 3302 To simplify this we are assuming count < 32. This restriction allows us to work on a low side and a high side. The low side 3303 is shifted with zero bits, whereas the right side is shifted with sign bits. 3304 */ 3305 __m128i lo = _mm_srli_epi64(x, count); 3306 __m128i hi = _mm_srai_epi32(x, count); 3307 3308 hi = _mm_and_si128(hi, _mm_set_epi32(0xFFFFFFFF, 0, 0xFFFFFFFF, 0)); /* The high part needs to have the low part cleared. */ 3309 3310 return _mm_or_si128(lo, hi); 3311 } 3312 3313 static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) 3314 { 3315 int i; 3316 drflac_uint32 riceParamMask; 3317 drflac_int32* pDecodedSamples = pSamplesOut; 3318 drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); 3319 drflac_uint32 zeroCountParts0; 3320 drflac_uint32 zeroCountParts1; 3321 drflac_uint32 zeroCountParts2; 3322 drflac_uint32 zeroCountParts3; 3323 drflac_uint32 riceParamParts0; 3324 drflac_uint32 riceParamParts1; 3325 drflac_uint32 riceParamParts2; 3326 drflac_uint32 riceParamParts3; 3327 __m128i coefficients128_0; 3328 __m128i coefficients128_4; 3329 __m128i coefficients128_8; 3330 __m128i samples128_0; 3331 __m128i samples128_4; 3332 __m128i samples128_8; 3333 __m128i riceParamMask128; 3334 3335 const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; 3336 3337 riceParamMask = (drflac_uint32)~((~0UL) << riceParam); 3338 riceParamMask128 = _mm_set1_epi32(riceParamMask); 3339 3340 /* Pre-load. */ 3341 coefficients128_0 = _mm_setzero_si128(); 3342 coefficients128_4 = _mm_setzero_si128(); 3343 coefficients128_8 = _mm_setzero_si128(); 3344 3345 samples128_0 = _mm_setzero_si128(); 3346 samples128_4 = _mm_setzero_si128(); 3347 samples128_8 = _mm_setzero_si128(); 3348 3349 /* 3350 Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than 3351 what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results 3352 in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted 3353 so I think there's opportunity for this to be simplified. 3354 */ 3355 #if 1 3356 { 3357 int runningOrder = order; 3358 3359 /* 0 - 3. */ 3360 if (runningOrder >= 4) { 3361 coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0)); 3362 samples128_0 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 4)); 3363 runningOrder -= 4; 3364 } else { 3365 switch (runningOrder) { 3366 case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break; 3367 case 2: coefficients128_0 = _mm_set_epi32(0, 0, coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0, 0); break; 3368 case 1: coefficients128_0 = _mm_set_epi32(0, 0, 0, coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0, 0, 0); break; 3369 } 3370 runningOrder = 0; 3371 } 3372 3373 /* 4 - 7 */ 3374 if (runningOrder >= 4) { 3375 coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4)); 3376 samples128_4 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 8)); 3377 runningOrder -= 4; 3378 } else { 3379 switch (runningOrder) { 3380 case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break; 3381 case 2: coefficients128_4 = _mm_set_epi32(0, 0, coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0, 0); break; 3382 case 1: coefficients128_4 = _mm_set_epi32(0, 0, 0, coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0, 0, 0); break; 3383 } 3384 runningOrder = 0; 3385 } 3386 3387 /* 8 - 11 */ 3388 if (runningOrder == 4) { 3389 coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8)); 3390 samples128_8 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 12)); 3391 runningOrder -= 4; 3392 } else { 3393 switch (runningOrder) { 3394 case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break; 3395 case 2: coefficients128_8 = _mm_set_epi32(0, 0, coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0, 0); break; 3396 case 1: coefficients128_8 = _mm_set_epi32(0, 0, 0, coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0, 0, 0); break; 3397 } 3398 runningOrder = 0; 3399 } 3400 3401 /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ 3402 coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3)); 3403 coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3)); 3404 coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3)); 3405 } 3406 #else 3407 /* This causes strict-aliasing warnings with GCC. */ 3408 switch (order) 3409 { 3410 case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12]; 3411 case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11]; 3412 case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10]; 3413 case 9: ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9]; 3414 case 8: ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8]; 3415 case 7: ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7]; 3416 case 6: ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6]; 3417 case 5: ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5]; 3418 case 4: ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4]; 3419 case 3: ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3]; 3420 case 2: ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2]; 3421 case 1: ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1]; 3422 } 3423 #endif 3424 3425 /* For this version we are doing one sample at a time. */ 3426 while (pDecodedSamples < pDecodedSamplesEnd) { 3427 __m128i prediction128; 3428 __m128i zeroCountPart128; 3429 __m128i riceParamPart128; 3430 3431 if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) || 3432 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) || 3433 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) || 3434 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) { 3435 return DRFLAC_FALSE; 3436 } 3437 3438 zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0); 3439 riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0); 3440 3441 riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128); 3442 riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam)); 3443 riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01))), _mm_set1_epi32(0x01))); /* <-- SSE2 compatible */ 3444 /*riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01)), _mm_set1_epi32(0xFFFFFFFF)));*/ /* <-- Only supported from SSE4.1 and is slower in my testing... */ 3445 3446 if (order <= 4) { 3447 for (i = 0; i < 4; i += 1) { 3448 prediction128 = _mm_mullo_epi32(coefficients128_0, samples128_0); 3449 3450 /* Horizontal add and shift. */ 3451 prediction128 = drflac__mm_hadd_epi32(prediction128); 3452 prediction128 = _mm_srai_epi32(prediction128, shift); 3453 prediction128 = _mm_add_epi32(riceParamPart128, prediction128); 3454 3455 samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); 3456 riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); 3457 } 3458 } else if (order <= 8) { 3459 for (i = 0; i < 4; i += 1) { 3460 prediction128 = _mm_mullo_epi32(coefficients128_4, samples128_4); 3461 prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0)); 3462 3463 /* Horizontal add and shift. */ 3464 prediction128 = drflac__mm_hadd_epi32(prediction128); 3465 prediction128 = _mm_srai_epi32(prediction128, shift); 3466 prediction128 = _mm_add_epi32(riceParamPart128, prediction128); 3467 3468 samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4); 3469 samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); 3470 riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); 3471 } 3472 } else { 3473 for (i = 0; i < 4; i += 1) { 3474 prediction128 = _mm_mullo_epi32(coefficients128_8, samples128_8); 3475 prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_4, samples128_4)); 3476 prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0)); 3477 3478 /* Horizontal add and shift. */ 3479 prediction128 = drflac__mm_hadd_epi32(prediction128); 3480 prediction128 = _mm_srai_epi32(prediction128, shift); 3481 prediction128 = _mm_add_epi32(riceParamPart128, prediction128); 3482 3483 samples128_8 = _mm_alignr_epi8(samples128_4, samples128_8, 4); 3484 samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4); 3485 samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); 3486 riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); 3487 } 3488 } 3489 3490 /* We store samples in groups of 4. */ 3491 _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0); 3492 pDecodedSamples += 4; 3493 } 3494 3495 /* Make sure we process the last few samples. */ 3496 i = (count & ~3); 3497 while (i < (int)count) { 3498 /* Rice extraction. */ 3499 if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) { 3500 return DRFLAC_FALSE; 3501 } 3502 3503 /* Rice reconstruction. */ 3504 riceParamParts0 &= riceParamMask; 3505 riceParamParts0 |= (zeroCountParts0 << riceParam); 3506 riceParamParts0 = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01]; 3507 3508 /* Sample reconstruction. */ 3509 pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples); 3510 3511 i += 1; 3512 pDecodedSamples += 1; 3513 } 3514 3515 return DRFLAC_TRUE; 3516 } 3517 3518 static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) 3519 { 3520 int i; 3521 drflac_uint32 riceParamMask; 3522 drflac_int32* pDecodedSamples = pSamplesOut; 3523 drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); 3524 drflac_uint32 zeroCountParts0; 3525 drflac_uint32 zeroCountParts1; 3526 drflac_uint32 zeroCountParts2; 3527 drflac_uint32 zeroCountParts3; 3528 drflac_uint32 riceParamParts0; 3529 drflac_uint32 riceParamParts1; 3530 drflac_uint32 riceParamParts2; 3531 drflac_uint32 riceParamParts3; 3532 __m128i coefficients128_0; 3533 __m128i coefficients128_4; 3534 __m128i coefficients128_8; 3535 __m128i samples128_0; 3536 __m128i samples128_4; 3537 __m128i samples128_8; 3538 __m128i prediction128; 3539 __m128i riceParamMask128; 3540 3541 const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; 3542 3543 DRFLAC_ASSERT(order <= 12); 3544 3545 riceParamMask = (drflac_uint32)~((~0UL) << riceParam); 3546 riceParamMask128 = _mm_set1_epi32(riceParamMask); 3547 3548 prediction128 = _mm_setzero_si128(); 3549 3550 /* Pre-load. */ 3551 coefficients128_0 = _mm_setzero_si128(); 3552 coefficients128_4 = _mm_setzero_si128(); 3553 coefficients128_8 = _mm_setzero_si128(); 3554 3555 samples128_0 = _mm_setzero_si128(); 3556 samples128_4 = _mm_setzero_si128(); 3557 samples128_8 = _mm_setzero_si128(); 3558 3559 #if 1 3560 { 3561 int runningOrder = order; 3562 3563 /* 0 - 3. */ 3564 if (runningOrder >= 4) { 3565 coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0)); 3566 samples128_0 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 4)); 3567 runningOrder -= 4; 3568 } else { 3569 switch (runningOrder) { 3570 case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break; 3571 case 2: coefficients128_0 = _mm_set_epi32(0, 0, coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0, 0); break; 3572 case 1: coefficients128_0 = _mm_set_epi32(0, 0, 0, coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0, 0, 0); break; 3573 } 3574 runningOrder = 0; 3575 } 3576 3577 /* 4 - 7 */ 3578 if (runningOrder >= 4) { 3579 coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4)); 3580 samples128_4 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 8)); 3581 runningOrder -= 4; 3582 } else { 3583 switch (runningOrder) { 3584 case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break; 3585 case 2: coefficients128_4 = _mm_set_epi32(0, 0, coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0, 0); break; 3586 case 1: coefficients128_4 = _mm_set_epi32(0, 0, 0, coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0, 0, 0); break; 3587 } 3588 runningOrder = 0; 3589 } 3590 3591 /* 8 - 11 */ 3592 if (runningOrder == 4) { 3593 coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8)); 3594 samples128_8 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 12)); 3595 runningOrder -= 4; 3596 } else { 3597 switch (runningOrder) { 3598 case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break; 3599 case 2: coefficients128_8 = _mm_set_epi32(0, 0, coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0, 0); break; 3600 case 1: coefficients128_8 = _mm_set_epi32(0, 0, 0, coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0, 0, 0); break; 3601 } 3602 runningOrder = 0; 3603 } 3604 3605 /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ 3606 coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3)); 3607 coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3)); 3608 coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3)); 3609 } 3610 #else 3611 switch (order) 3612 { 3613 case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12]; 3614 case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11]; 3615 case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10]; 3616 case 9: ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9]; 3617 case 8: ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8]; 3618 case 7: ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7]; 3619 case 6: ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6]; 3620 case 5: ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5]; 3621 case 4: ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4]; 3622 case 3: ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3]; 3623 case 2: ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2]; 3624 case 1: ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1]; 3625 } 3626 #endif 3627 3628 /* For this version we are doing one sample at a time. */ 3629 while (pDecodedSamples < pDecodedSamplesEnd) { 3630 __m128i zeroCountPart128; 3631 __m128i riceParamPart128; 3632 3633 if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) || 3634 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) || 3635 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) || 3636 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) { 3637 return DRFLAC_FALSE; 3638 } 3639 3640 zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0); 3641 riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0); 3642 3643 riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128); 3644 riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam)); 3645 riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(1))), _mm_set1_epi32(1))); 3646 3647 for (i = 0; i < 4; i += 1) { 3648 prediction128 = _mm_xor_si128(prediction128, prediction128); /* Reset to 0. */ 3649 3650 switch (order) 3651 { 3652 case 12: 3653 case 11: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(1, 1, 0, 0)))); 3654 case 10: 3655 case 9: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(3, 3, 2, 2)))); 3656 case 8: 3657 case 7: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(1, 1, 0, 0)))); 3658 case 6: 3659 case 5: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(3, 3, 2, 2)))); 3660 case 4: 3661 case 3: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(1, 1, 0, 0)))); 3662 case 2: 3663 case 1: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(3, 3, 2, 2)))); 3664 } 3665 3666 /* Horizontal add and shift. */ 3667 prediction128 = drflac__mm_hadd_epi64(prediction128); 3668 prediction128 = drflac__mm_srai_epi64(prediction128, shift); 3669 prediction128 = _mm_add_epi32(riceParamPart128, prediction128); 3670 3671 /* Our value should be sitting in prediction128[0]. We need to combine this with our SSE samples. */ 3672 samples128_8 = _mm_alignr_epi8(samples128_4, samples128_8, 4); 3673 samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4); 3674 samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); 3675 3676 /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */ 3677 riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); 3678 } 3679 3680 /* We store samples in groups of 4. */ 3681 _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0); 3682 pDecodedSamples += 4; 3683 } 3684 3685 /* Make sure we process the last few samples. */ 3686 i = (count & ~3); 3687 while (i < (int)count) { 3688 /* Rice extraction. */ 3689 if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) { 3690 return DRFLAC_FALSE; 3691 } 3692 3693 /* Rice reconstruction. */ 3694 riceParamParts0 &= riceParamMask; 3695 riceParamParts0 |= (zeroCountParts0 << riceParam); 3696 riceParamParts0 = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01]; 3697 3698 /* Sample reconstruction. */ 3699 pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples); 3700 3701 i += 1; 3702 pDecodedSamples += 1; 3703 } 3704 3705 return DRFLAC_TRUE; 3706 } 3707 3708 static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) 3709 { 3710 DRFLAC_ASSERT(bs != NULL); 3711 DRFLAC_ASSERT(count > 0); 3712 DRFLAC_ASSERT(pSamplesOut != NULL); 3713 3714 /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */ 3715 if (order > 0 && order <= 12) { 3716 if (bitsPerSample+shift > 32) { 3717 return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut); 3718 } else { 3719 return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut); 3720 } 3721 } else { 3722 return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); 3723 } 3724 } 3725 #endif 3726 3727 #if defined(DRFLAC_SUPPORT_NEON) 3728 static DRFLAC_INLINE void drflac__vst2q_s32(drflac_int32* p, int32x4x2_t x) 3729 { 3730 vst1q_s32(p+0, x.val[0]); 3731 vst1q_s32(p+4, x.val[1]); 3732 } 3733 3734 static DRFLAC_INLINE void drflac__vst2q_f32(float* p, float32x4x2_t x) 3735 { 3736 vst1q_f32(p+0, x.val[0]); 3737 vst1q_f32(p+4, x.val[1]); 3738 } 3739 3740 static DRFLAC_INLINE void drflac__vst2q_s16(drflac_int16* p, int16x4x2_t x) 3741 { 3742 vst1q_s16(p, vcombine_s16(x.val[0], x.val[1])); 3743 } 3744 3745 static DRFLAC_INLINE int32x4_t drflac__vdupq_n_s32x4(drflac_int32 x3, drflac_int32 x2, drflac_int32 x1, drflac_int32 x0) 3746 { 3747 drflac_int32 x[4]; 3748 x[3] = x3; 3749 x[2] = x2; 3750 x[1] = x1; 3751 x[0] = x0; 3752 return vld1q_s32(x); 3753 } 3754 3755 static DRFLAC_INLINE int32x4_t drflac__valignrq_s32_1(int32x4_t a, int32x4_t b) 3756 { 3757 /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */ 3758 3759 /* Reference */ 3760 /*return drflac__vdupq_n_s32x4( 3761 vgetq_lane_s32(a, 0), 3762 vgetq_lane_s32(b, 3), 3763 vgetq_lane_s32(b, 2), 3764 vgetq_lane_s32(b, 1) 3765 );*/ 3766 3767 return vextq_s32(b, a, 1); 3768 } 3769 3770 static DRFLAC_INLINE uint32x4_t drflac__valignrq_u32_1(uint32x4_t a, uint32x4_t b) 3771 { 3772 /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */ 3773 3774 /* Reference */ 3775 /*return drflac__vdupq_n_s32x4( 3776 vgetq_lane_s32(a, 0), 3777 vgetq_lane_s32(b, 3), 3778 vgetq_lane_s32(b, 2), 3779 vgetq_lane_s32(b, 1) 3780 );*/ 3781 3782 return vextq_u32(b, a, 1); 3783 } 3784 3785 static DRFLAC_INLINE int32x2_t drflac__vhaddq_s32(int32x4_t x) 3786 { 3787 /* The sum must end up in position 0. */ 3788 3789 /* Reference */ 3790 /*return vdupq_n_s32( 3791 vgetq_lane_s32(x, 3) + 3792 vgetq_lane_s32(x, 2) + 3793 vgetq_lane_s32(x, 1) + 3794 vgetq_lane_s32(x, 0) 3795 );*/ 3796 3797 int32x2_t r = vadd_s32(vget_high_s32(x), vget_low_s32(x)); 3798 return vpadd_s32(r, r); 3799 } 3800 3801 static DRFLAC_INLINE int64x1_t drflac__vhaddq_s64(int64x2_t x) 3802 { 3803 return vadd_s64(vget_high_s64(x), vget_low_s64(x)); 3804 } 3805 3806 static DRFLAC_INLINE int32x4_t drflac__vrevq_s32(int32x4_t x) 3807 { 3808 /* Reference */ 3809 /*return drflac__vdupq_n_s32x4( 3810 vgetq_lane_s32(x, 0), 3811 vgetq_lane_s32(x, 1), 3812 vgetq_lane_s32(x, 2), 3813 vgetq_lane_s32(x, 3) 3814 );*/ 3815 3816 return vrev64q_s32(vcombine_s32(vget_high_s32(x), vget_low_s32(x))); 3817 } 3818 3819 static DRFLAC_INLINE int32x4_t drflac__vnotq_s32(int32x4_t x) 3820 { 3821 return veorq_s32(x, vdupq_n_s32(0xFFFFFFFF)); 3822 } 3823 3824 static DRFLAC_INLINE uint32x4_t drflac__vnotq_u32(uint32x4_t x) 3825 { 3826 return veorq_u32(x, vdupq_n_u32(0xFFFFFFFF)); 3827 } 3828 3829 static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) 3830 { 3831 int i; 3832 drflac_uint32 riceParamMask; 3833 drflac_int32* pDecodedSamples = pSamplesOut; 3834 drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); 3835 drflac_uint32 zeroCountParts[4]; 3836 drflac_uint32 riceParamParts[4]; 3837 int32x4_t coefficients128_0; 3838 int32x4_t coefficients128_4; 3839 int32x4_t coefficients128_8; 3840 int32x4_t samples128_0; 3841 int32x4_t samples128_4; 3842 int32x4_t samples128_8; 3843 uint32x4_t riceParamMask128; 3844 int32x4_t riceParam128; 3845 int32x2_t shift64; 3846 uint32x4_t one128; 3847 3848 const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; 3849 3850 riceParamMask = ~((~0UL) << riceParam); 3851 riceParamMask128 = vdupq_n_u32(riceParamMask); 3852 3853 riceParam128 = vdupq_n_s32(riceParam); 3854 shift64 = vdup_n_s32(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */ 3855 one128 = vdupq_n_u32(1); 3856 3857 /* 3858 Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than 3859 what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results 3860 in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted 3861 so I think there's opportunity for this to be simplified. 3862 */ 3863 { 3864 int runningOrder = order; 3865 drflac_int32 tempC[4] = {0, 0, 0, 0}; 3866 drflac_int32 tempS[4] = {0, 0, 0, 0}; 3867 3868 /* 0 - 3. */ 3869 if (runningOrder >= 4) { 3870 coefficients128_0 = vld1q_s32(coefficients + 0); 3871 samples128_0 = vld1q_s32(pSamplesOut - 4); 3872 runningOrder -= 4; 3873 } else { 3874 switch (runningOrder) { 3875 case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */ 3876 case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */ 3877 case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */ 3878 } 3879 3880 coefficients128_0 = vld1q_s32(tempC); 3881 samples128_0 = vld1q_s32(tempS); 3882 runningOrder = 0; 3883 } 3884 3885 /* 4 - 7 */ 3886 if (runningOrder >= 4) { 3887 coefficients128_4 = vld1q_s32(coefficients + 4); 3888 samples128_4 = vld1q_s32(pSamplesOut - 8); 3889 runningOrder -= 4; 3890 } else { 3891 switch (runningOrder) { 3892 case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */ 3893 case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */ 3894 case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */ 3895 } 3896 3897 coefficients128_4 = vld1q_s32(tempC); 3898 samples128_4 = vld1q_s32(tempS); 3899 runningOrder = 0; 3900 } 3901 3902 /* 8 - 11 */ 3903 if (runningOrder == 4) { 3904 coefficients128_8 = vld1q_s32(coefficients + 8); 3905 samples128_8 = vld1q_s32(pSamplesOut - 12); 3906 runningOrder -= 4; 3907 } else { 3908 switch (runningOrder) { 3909 case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */ 3910 case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */ 3911 case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */ 3912 } 3913 3914 coefficients128_8 = vld1q_s32(tempC); 3915 samples128_8 = vld1q_s32(tempS); 3916 runningOrder = 0; 3917 } 3918 3919 /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ 3920 coefficients128_0 = drflac__vrevq_s32(coefficients128_0); 3921 coefficients128_4 = drflac__vrevq_s32(coefficients128_4); 3922 coefficients128_8 = drflac__vrevq_s32(coefficients128_8); 3923 } 3924 3925 /* For this version we are doing one sample at a time. */ 3926 while (pDecodedSamples < pDecodedSamplesEnd) { 3927 int32x4_t prediction128; 3928 int32x2_t prediction64; 3929 uint32x4_t zeroCountPart128; 3930 uint32x4_t riceParamPart128; 3931 3932 if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) || 3933 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) || 3934 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) || 3935 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) { 3936 return DRFLAC_FALSE; 3937 } 3938 3939 zeroCountPart128 = vld1q_u32(zeroCountParts); 3940 riceParamPart128 = vld1q_u32(riceParamParts); 3941 3942 riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128); 3943 riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128)); 3944 riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128)); 3945 3946 if (order <= 4) { 3947 for (i = 0; i < 4; i += 1) { 3948 prediction128 = vmulq_s32(coefficients128_0, samples128_0); 3949 3950 /* Horizontal add and shift. */ 3951 prediction64 = drflac__vhaddq_s32(prediction128); 3952 prediction64 = vshl_s32(prediction64, shift64); 3953 prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128))); 3954 3955 samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0); 3956 riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); 3957 } 3958 } else if (order <= 8) { 3959 for (i = 0; i < 4; i += 1) { 3960 prediction128 = vmulq_s32(coefficients128_4, samples128_4); 3961 prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0); 3962 3963 /* Horizontal add and shift. */ 3964 prediction64 = drflac__vhaddq_s32(prediction128); 3965 prediction64 = vshl_s32(prediction64, shift64); 3966 prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128))); 3967 3968 samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4); 3969 samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0); 3970 riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); 3971 } 3972 } else { 3973 for (i = 0; i < 4; i += 1) { 3974 prediction128 = vmulq_s32(coefficients128_8, samples128_8); 3975 prediction128 = vmlaq_s32(prediction128, coefficients128_4, samples128_4); 3976 prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0); 3977 3978 /* Horizontal add and shift. */ 3979 prediction64 = drflac__vhaddq_s32(prediction128); 3980 prediction64 = vshl_s32(prediction64, shift64); 3981 prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128))); 3982 3983 samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8); 3984 samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4); 3985 samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0); 3986 riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); 3987 } 3988 } 3989 3990 /* We store samples in groups of 4. */ 3991 vst1q_s32(pDecodedSamples, samples128_0); 3992 pDecodedSamples += 4; 3993 } 3994 3995 /* Make sure we process the last few samples. */ 3996 i = (count & ~3); 3997 while (i < (int)count) { 3998 /* Rice extraction. */ 3999 if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) { 4000 return DRFLAC_FALSE; 4001 } 4002 4003 /* Rice reconstruction. */ 4004 riceParamParts[0] &= riceParamMask; 4005 riceParamParts[0] |= (zeroCountParts[0] << riceParam); 4006 riceParamParts[0] = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01]; 4007 4008 /* Sample reconstruction. */ 4009 pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples); 4010 4011 i += 1; 4012 pDecodedSamples += 1; 4013 } 4014 4015 return DRFLAC_TRUE; 4016 } 4017 4018 static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) 4019 { 4020 int i; 4021 drflac_uint32 riceParamMask; 4022 drflac_int32* pDecodedSamples = pSamplesOut; 4023 drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); 4024 drflac_uint32 zeroCountParts[4]; 4025 drflac_uint32 riceParamParts[4]; 4026 int32x4_t coefficients128_0; 4027 int32x4_t coefficients128_4; 4028 int32x4_t coefficients128_8; 4029 int32x4_t samples128_0; 4030 int32x4_t samples128_4; 4031 int32x4_t samples128_8; 4032 uint32x4_t riceParamMask128; 4033 int32x4_t riceParam128; 4034 int64x1_t shift64; 4035 uint32x4_t one128; 4036 4037 const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; 4038 4039 riceParamMask = ~((~0UL) << riceParam); 4040 riceParamMask128 = vdupq_n_u32(riceParamMask); 4041 4042 riceParam128 = vdupq_n_s32(riceParam); 4043 shift64 = vdup_n_s64(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */ 4044 one128 = vdupq_n_u32(1); 4045 4046 /* 4047 Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than 4048 what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results 4049 in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted 4050 so I think there's opportunity for this to be simplified. 4051 */ 4052 { 4053 int runningOrder = order; 4054 drflac_int32 tempC[4] = {0, 0, 0, 0}; 4055 drflac_int32 tempS[4] = {0, 0, 0, 0}; 4056 4057 /* 0 - 3. */ 4058 if (runningOrder >= 4) { 4059 coefficients128_0 = vld1q_s32(coefficients + 0); 4060 samples128_0 = vld1q_s32(pSamplesOut - 4); 4061 runningOrder -= 4; 4062 } else { 4063 switch (runningOrder) { 4064 case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */ 4065 case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */ 4066 case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */ 4067 } 4068 4069 coefficients128_0 = vld1q_s32(tempC); 4070 samples128_0 = vld1q_s32(tempS); 4071 runningOrder = 0; 4072 } 4073 4074 /* 4 - 7 */ 4075 if (runningOrder >= 4) { 4076 coefficients128_4 = vld1q_s32(coefficients + 4); 4077 samples128_4 = vld1q_s32(pSamplesOut - 8); 4078 runningOrder -= 4; 4079 } else { 4080 switch (runningOrder) { 4081 case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */ 4082 case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */ 4083 case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */ 4084 } 4085 4086 coefficients128_4 = vld1q_s32(tempC); 4087 samples128_4 = vld1q_s32(tempS); 4088 runningOrder = 0; 4089 } 4090 4091 /* 8 - 11 */ 4092 if (runningOrder == 4) { 4093 coefficients128_8 = vld1q_s32(coefficients + 8); 4094 samples128_8 = vld1q_s32(pSamplesOut - 12); 4095 runningOrder -= 4; 4096 } else { 4097 switch (runningOrder) { 4098 case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */ 4099 case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */ 4100 case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */ 4101 } 4102 4103 coefficients128_8 = vld1q_s32(tempC); 4104 samples128_8 = vld1q_s32(tempS); 4105 runningOrder = 0; 4106 } 4107 4108 /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ 4109 coefficients128_0 = drflac__vrevq_s32(coefficients128_0); 4110 coefficients128_4 = drflac__vrevq_s32(coefficients128_4); 4111 coefficients128_8 = drflac__vrevq_s32(coefficients128_8); 4112 } 4113 4114 /* For this version we are doing one sample at a time. */ 4115 while (pDecodedSamples < pDecodedSamplesEnd) { 4116 int64x2_t prediction128; 4117 uint32x4_t zeroCountPart128; 4118 uint32x4_t riceParamPart128; 4119 4120 if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) || 4121 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) || 4122 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) || 4123 !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) { 4124 return DRFLAC_FALSE; 4125 } 4126 4127 zeroCountPart128 = vld1q_u32(zeroCountParts); 4128 riceParamPart128 = vld1q_u32(riceParamParts); 4129 4130 riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128); 4131 riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128)); 4132 riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128)); 4133 4134 for (i = 0; i < 4; i += 1) { 4135 int64x1_t prediction64; 4136 4137 prediction128 = veorq_s64(prediction128, prediction128); /* Reset to 0. */ 4138 switch (order) 4139 { 4140 case 12: 4141 case 11: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_8), vget_low_s32(samples128_8))); 4142 case 10: 4143 case 9: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_8), vget_high_s32(samples128_8))); 4144 case 8: 4145 case 7: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_4), vget_low_s32(samples128_4))); 4146 case 6: 4147 case 5: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_4), vget_high_s32(samples128_4))); 4148 case 4: 4149 case 3: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_0), vget_low_s32(samples128_0))); 4150 case 2: 4151 case 1: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_0), vget_high_s32(samples128_0))); 4152 } 4153 4154 /* Horizontal add and shift. */ 4155 prediction64 = drflac__vhaddq_s64(prediction128); 4156 prediction64 = vshl_s64(prediction64, shift64); 4157 prediction64 = vadd_s64(prediction64, vdup_n_s64(vgetq_lane_u32(riceParamPart128, 0))); 4158 4159 /* Our value should be sitting in prediction64[0]. We need to combine this with our SSE samples. */ 4160 samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8); 4161 samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4); 4162 samples128_0 = drflac__valignrq_s32_1(vcombine_s32(vreinterpret_s32_s64(prediction64), vdup_n_s32(0)), samples128_0); 4163 4164 /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */ 4165 riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); 4166 } 4167 4168 /* We store samples in groups of 4. */ 4169 vst1q_s32(pDecodedSamples, samples128_0); 4170 pDecodedSamples += 4; 4171 } 4172 4173 /* Make sure we process the last few samples. */ 4174 i = (count & ~3); 4175 while (i < (int)count) { 4176 /* Rice extraction. */ 4177 if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) { 4178 return DRFLAC_FALSE; 4179 } 4180 4181 /* Rice reconstruction. */ 4182 riceParamParts[0] &= riceParamMask; 4183 riceParamParts[0] |= (zeroCountParts[0] << riceParam); 4184 riceParamParts[0] = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01]; 4185 4186 /* Sample reconstruction. */ 4187 pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples); 4188 4189 i += 1; 4190 pDecodedSamples += 1; 4191 } 4192 4193 return DRFLAC_TRUE; 4194 } 4195 4196 static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) 4197 { 4198 DRFLAC_ASSERT(bs != NULL); 4199 DRFLAC_ASSERT(count > 0); 4200 DRFLAC_ASSERT(pSamplesOut != NULL); 4201 4202 /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */ 4203 if (order > 0 && order <= 12) { 4204 if (bitsPerSample+shift > 32) { 4205 return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut); 4206 } else { 4207 return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut); 4208 } 4209 } else { 4210 return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); 4211 } 4212 } 4213 #endif 4214 4215 static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) 4216 { 4217 #if defined(DRFLAC_SUPPORT_SSE41) 4218 if (drflac__gIsSSE41Supported) { 4219 return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); 4220 } else 4221 #elif defined(DRFLAC_SUPPORT_NEON) 4222 if (drflac__gIsNEONSupported) { 4223 return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); 4224 } else 4225 #endif 4226 { 4227 /* Scalar fallback. */ 4228 #if 0 4229 return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); 4230 #else 4231 return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); 4232 #endif 4233 } 4234 } 4235 4236 /* Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. */ 4237 static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam) 4238 { 4239 drflac_uint32 i; 4240 4241 DRFLAC_ASSERT(bs != NULL); 4242 DRFLAC_ASSERT(count > 0); 4243 4244 for (i = 0; i < count; ++i) { 4245 if (!drflac__seek_rice_parts(bs, riceParam)) { 4246 return DRFLAC_FALSE; 4247 } 4248 } 4249 4250 return DRFLAC_TRUE; 4251 } 4252 4253 static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) 4254 { 4255 drflac_uint32 i; 4256 4257 DRFLAC_ASSERT(bs != NULL); 4258 DRFLAC_ASSERT(count > 0); 4259 DRFLAC_ASSERT(unencodedBitsPerSample <= 31); /* <-- unencodedBitsPerSample is a 5 bit number, so cannot exceed 31. */ 4260 DRFLAC_ASSERT(pSamplesOut != NULL); 4261 4262 for (i = 0; i < count; ++i) { 4263 if (unencodedBitsPerSample > 0) { 4264 if (!drflac__read_int32(bs, unencodedBitsPerSample, pSamplesOut + i)) { 4265 return DRFLAC_FALSE; 4266 } 4267 } else { 4268 pSamplesOut[i] = 0; 4269 } 4270 4271 if (bitsPerSample >= 24) { 4272 pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i); 4273 } else { 4274 pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i); 4275 } 4276 } 4277 4278 return DRFLAC_TRUE; 4279 } 4280 4281 4282 /* 4283 Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called 4284 when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The 4285 <blockSize> and <order> parameters are used to determine how many residual values need to be decoded. 4286 */ 4287 static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) 4288 { 4289 drflac_uint8 residualMethod; 4290 drflac_uint8 partitionOrder; 4291 drflac_uint32 samplesInPartition; 4292 drflac_uint32 partitionsRemaining; 4293 4294 DRFLAC_ASSERT(bs != NULL); 4295 DRFLAC_ASSERT(blockSize != 0); 4296 DRFLAC_ASSERT(pDecodedSamples != NULL); /* <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode? */ 4297 4298 if (!drflac__read_uint8(bs, 2, &residualMethod)) { 4299 return DRFLAC_FALSE; 4300 } 4301 4302 if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { 4303 return DRFLAC_FALSE; /* Unknown or unsupported residual coding method. */ 4304 } 4305 4306 /* Ignore the first <order> values. */ 4307 pDecodedSamples += order; 4308 4309 if (!drflac__read_uint8(bs, 4, &partitionOrder)) { 4310 return DRFLAC_FALSE; 4311 } 4312 4313 /* 4314 From the FLAC spec: 4315 The Rice partition order in a Rice-coded residual section must be less than or equal to 8. 4316 */ 4317 if (partitionOrder > 8) { 4318 return DRFLAC_FALSE; 4319 } 4320 4321 /* Validation check. */ 4322 if ((blockSize / (1 << partitionOrder)) <= order) { 4323 return DRFLAC_FALSE; 4324 } 4325 4326 samplesInPartition = (blockSize / (1 << partitionOrder)) - order; 4327 partitionsRemaining = (1 << partitionOrder); 4328 for (;;) { 4329 drflac_uint8 riceParam = 0; 4330 if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) { 4331 if (!drflac__read_uint8(bs, 4, &riceParam)) { 4332 return DRFLAC_FALSE; 4333 } 4334 if (riceParam == 15) { 4335 riceParam = 0xFF; 4336 } 4337 } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { 4338 if (!drflac__read_uint8(bs, 5, &riceParam)) { 4339 return DRFLAC_FALSE; 4340 } 4341 if (riceParam == 31) { 4342 riceParam = 0xFF; 4343 } 4344 } 4345 4346 if (riceParam != 0xFF) { 4347 if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) { 4348 return DRFLAC_FALSE; 4349 } 4350 } else { 4351 unsigned char unencodedBitsPerSample = 0; 4352 if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) { 4353 return DRFLAC_FALSE; 4354 } 4355 4356 if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) { 4357 return DRFLAC_FALSE; 4358 } 4359 } 4360 4361 pDecodedSamples += samplesInPartition; 4362 4363 if (partitionsRemaining == 1) { 4364 break; 4365 } 4366 4367 partitionsRemaining -= 1; 4368 4369 if (partitionOrder != 0) { 4370 samplesInPartition = blockSize / (1 << partitionOrder); 4371 } 4372 } 4373 4374 return DRFLAC_TRUE; 4375 } 4376 4377 /* 4378 Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called 4379 when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be set to 0. The 4380 <blockSize> and <order> parameters are used to determine how many residual values need to be decoded. 4381 */ 4382 static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 order) 4383 { 4384 drflac_uint8 residualMethod; 4385 drflac_uint8 partitionOrder; 4386 drflac_uint32 samplesInPartition; 4387 drflac_uint32 partitionsRemaining; 4388 4389 DRFLAC_ASSERT(bs != NULL); 4390 DRFLAC_ASSERT(blockSize != 0); 4391 4392 if (!drflac__read_uint8(bs, 2, &residualMethod)) { 4393 return DRFLAC_FALSE; 4394 } 4395 4396 if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { 4397 return DRFLAC_FALSE; /* Unknown or unsupported residual coding method. */ 4398 } 4399 4400 if (!drflac__read_uint8(bs, 4, &partitionOrder)) { 4401 return DRFLAC_FALSE; 4402 } 4403 4404 /* 4405 From the FLAC spec: 4406 The Rice partition order in a Rice-coded residual section must be less than or equal to 8. 4407 */ 4408 if (partitionOrder > 8) { 4409 return DRFLAC_FALSE; 4410 } 4411 4412 /* Validation check. */ 4413 if ((blockSize / (1 << partitionOrder)) <= order) { 4414 return DRFLAC_FALSE; 4415 } 4416 4417 samplesInPartition = (blockSize / (1 << partitionOrder)) - order; 4418 partitionsRemaining = (1 << partitionOrder); 4419 for (;;) 4420 { 4421 drflac_uint8 riceParam = 0; 4422 if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) { 4423 if (!drflac__read_uint8(bs, 4, &riceParam)) { 4424 return DRFLAC_FALSE; 4425 } 4426 if (riceParam == 15) { 4427 riceParam = 0xFF; 4428 } 4429 } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { 4430 if (!drflac__read_uint8(bs, 5, &riceParam)) { 4431 return DRFLAC_FALSE; 4432 } 4433 if (riceParam == 31) { 4434 riceParam = 0xFF; 4435 } 4436 } 4437 4438 if (riceParam != 0xFF) { 4439 if (!drflac__read_and_seek_residual__rice(bs, samplesInPartition, riceParam)) { 4440 return DRFLAC_FALSE; 4441 } 4442 } else { 4443 unsigned char unencodedBitsPerSample = 0; 4444 if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) { 4445 return DRFLAC_FALSE; 4446 } 4447 4448 if (!drflac__seek_bits(bs, unencodedBitsPerSample * samplesInPartition)) { 4449 return DRFLAC_FALSE; 4450 } 4451 } 4452 4453 4454 if (partitionsRemaining == 1) { 4455 break; 4456 } 4457 4458 partitionsRemaining -= 1; 4459 samplesInPartition = blockSize / (1 << partitionOrder); 4460 } 4461 4462 return DRFLAC_TRUE; 4463 } 4464 4465 4466 static drflac_bool32 drflac__decode_samples__constant(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples) 4467 { 4468 drflac_uint32 i; 4469 4470 /* Only a single sample needs to be decoded here. */ 4471 drflac_int32 sample; 4472 if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) { 4473 return DRFLAC_FALSE; 4474 } 4475 4476 /* 4477 We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely) 4478 we'll want to look at a more efficient way. 4479 */ 4480 for (i = 0; i < blockSize; ++i) { 4481 pDecodedSamples[i] = sample; 4482 } 4483 4484 return DRFLAC_TRUE; 4485 } 4486 4487 static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples) 4488 { 4489 drflac_uint32 i; 4490 4491 for (i = 0; i < blockSize; ++i) { 4492 drflac_int32 sample; 4493 if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) { 4494 return DRFLAC_FALSE; 4495 } 4496 4497 pDecodedSamples[i] = sample; 4498 } 4499 4500 return DRFLAC_TRUE; 4501 } 4502 4503 static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples) 4504 { 4505 drflac_uint32 i; 4506 4507 static drflac_int32 lpcCoefficientsTable[5][4] = { 4508 {0, 0, 0, 0}, 4509 {1, 0, 0, 0}, 4510 {2, -1, 0, 0}, 4511 {3, -3, 1, 0}, 4512 {4, -6, 4, -1} 4513 }; 4514 4515 /* Warm up samples and coefficients. */ 4516 for (i = 0; i < lpcOrder; ++i) { 4517 drflac_int32 sample; 4518 if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) { 4519 return DRFLAC_FALSE; 4520 } 4521 4522 pDecodedSamples[i] = sample; 4523 } 4524 4525 if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) { 4526 return DRFLAC_FALSE; 4527 } 4528 4529 return DRFLAC_TRUE; 4530 } 4531 4532 static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples) 4533 { 4534 drflac_uint8 i; 4535 drflac_uint8 lpcPrecision; 4536 drflac_int8 lpcShift; 4537 drflac_int32 coefficients[32]; 4538 4539 /* Warm up samples. */ 4540 for (i = 0; i < lpcOrder; ++i) { 4541 drflac_int32 sample; 4542 if (!drflac__read_int32(bs, bitsPerSample, &sample)) { 4543 return DRFLAC_FALSE; 4544 } 4545 4546 pDecodedSamples[i] = sample; 4547 } 4548 4549 if (!drflac__read_uint8(bs, 4, &lpcPrecision)) { 4550 return DRFLAC_FALSE; 4551 } 4552 if (lpcPrecision == 15) { 4553 return DRFLAC_FALSE; /* Invalid. */ 4554 } 4555 lpcPrecision += 1; 4556 4557 if (!drflac__read_int8(bs, 5, &lpcShift)) { 4558 return DRFLAC_FALSE; 4559 } 4560 4561 DRFLAC_ZERO_MEMORY(coefficients, sizeof(coefficients)); 4562 for (i = 0; i < lpcOrder; ++i) { 4563 if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) { 4564 return DRFLAC_FALSE; 4565 } 4566 } 4567 4568 if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) { 4569 return DRFLAC_FALSE; 4570 } 4571 4572 return DRFLAC_TRUE; 4573 } 4574 4575 4576 static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header) 4577 { 4578 const drflac_uint32 sampleRateTable[12] = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000}; 4579 const drflac_uint8 bitsPerSampleTable[8] = {0, 8, 12, (drflac_uint8)-1, 16, 20, 24, (drflac_uint8)-1}; /* -1 = reserved. */ 4580 4581 DRFLAC_ASSERT(bs != NULL); 4582 DRFLAC_ASSERT(header != NULL); 4583 4584 /* Keep looping until we find a valid sync code. */ 4585 for (;;) { 4586 drflac_uint8 crc8 = 0xCE; /* 0xCE = drflac_crc8(0, 0x3FFE, 14); */ 4587 drflac_uint8 reserved = 0; 4588 drflac_uint8 blockingStrategy = 0; 4589 drflac_uint8 blockSize = 0; 4590 drflac_uint8 sampleRate = 0; 4591 drflac_uint8 channelAssignment = 0; 4592 drflac_uint8 bitsPerSample = 0; 4593 drflac_bool32 isVariableBlockSize; 4594 4595 if (!drflac__find_and_seek_to_next_sync_code(bs)) { 4596 return DRFLAC_FALSE; 4597 } 4598 4599 if (!drflac__read_uint8(bs, 1, &reserved)) { 4600 return DRFLAC_FALSE; 4601 } 4602 if (reserved == 1) { 4603 continue; 4604 } 4605 crc8 = drflac_crc8(crc8, reserved, 1); 4606 4607 if (!drflac__read_uint8(bs, 1, &blockingStrategy)) { 4608 return DRFLAC_FALSE; 4609 } 4610 crc8 = drflac_crc8(crc8, blockingStrategy, 1); 4611 4612 if (!drflac__read_uint8(bs, 4, &blockSize)) { 4613 return DRFLAC_FALSE; 4614 } 4615 if (blockSize == 0) { 4616 continue; 4617 } 4618 crc8 = drflac_crc8(crc8, blockSize, 4); 4619 4620 if (!drflac__read_uint8(bs, 4, &sampleRate)) { 4621 return DRFLAC_FALSE; 4622 } 4623 crc8 = drflac_crc8(crc8, sampleRate, 4); 4624 4625 if (!drflac__read_uint8(bs, 4, &channelAssignment)) { 4626 return DRFLAC_FALSE; 4627 } 4628 if (channelAssignment > 10) { 4629 continue; 4630 } 4631 crc8 = drflac_crc8(crc8, channelAssignment, 4); 4632 4633 if (!drflac__read_uint8(bs, 3, &bitsPerSample)) { 4634 return DRFLAC_FALSE; 4635 } 4636 if (bitsPerSample == 3 || bitsPerSample == 7) { 4637 continue; 4638 } 4639 crc8 = drflac_crc8(crc8, bitsPerSample, 3); 4640 4641 4642 if (!drflac__read_uint8(bs, 1, &reserved)) { 4643 return DRFLAC_FALSE; 4644 } 4645 if (reserved == 1) { 4646 continue; 4647 } 4648 crc8 = drflac_crc8(crc8, reserved, 1); 4649 4650 4651 isVariableBlockSize = blockingStrategy == 1; 4652 if (isVariableBlockSize) { 4653 drflac_uint64 pcmFrameNumber; 4654 drflac_result result = drflac__read_utf8_coded_number(bs, &pcmFrameNumber, &crc8); 4655 if (result != DRFLAC_SUCCESS) { 4656 if (result == DRFLAC_END_OF_STREAM) { 4657 return DRFLAC_FALSE; 4658 } else { 4659 continue; 4660 } 4661 } 4662 header->flacFrameNumber = 0; 4663 header->pcmFrameNumber = pcmFrameNumber; 4664 } else { 4665 drflac_uint64 flacFrameNumber = 0; 4666 drflac_result result = drflac__read_utf8_coded_number(bs, &flacFrameNumber, &crc8); 4667 if (result != DRFLAC_SUCCESS) { 4668 if (result == DRFLAC_END_OF_STREAM) { 4669 return DRFLAC_FALSE; 4670 } else { 4671 continue; 4672 } 4673 } 4674 header->flacFrameNumber = (drflac_uint32)flacFrameNumber; /* <-- Safe cast. */ 4675 header->pcmFrameNumber = 0; 4676 } 4677 4678 4679 if (blockSize == 1) { 4680 header->blockSizeInPCMFrames = 192; 4681 } else if (blockSize >= 2 && blockSize <= 5) { 4682 header->blockSizeInPCMFrames = 576 * (1 << (blockSize - 2)); 4683 } else if (blockSize == 6) { 4684 if (!drflac__read_uint16(bs, 8, &header->blockSizeInPCMFrames)) { 4685 return DRFLAC_FALSE; 4686 } 4687 crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 8); 4688 header->blockSizeInPCMFrames += 1; 4689 } else if (blockSize == 7) { 4690 if (!drflac__read_uint16(bs, 16, &header->blockSizeInPCMFrames)) { 4691 return DRFLAC_FALSE; 4692 } 4693 crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16); 4694 header->blockSizeInPCMFrames += 1; 4695 } else { 4696 header->blockSizeInPCMFrames = 256 * (1 << (blockSize - 8)); 4697 } 4698 4699 4700 if (sampleRate <= 11) { 4701 header->sampleRate = sampleRateTable[sampleRate]; 4702 } else if (sampleRate == 12) { 4703 if (!drflac__read_uint32(bs, 8, &header->sampleRate)) { 4704 return DRFLAC_FALSE; 4705 } 4706 crc8 = drflac_crc8(crc8, header->sampleRate, 8); 4707 header->sampleRate *= 1000; 4708 } else if (sampleRate == 13) { 4709 if (!drflac__read_uint32(bs, 16, &header->sampleRate)) { 4710 return DRFLAC_FALSE; 4711 } 4712 crc8 = drflac_crc8(crc8, header->sampleRate, 16); 4713 } else if (sampleRate == 14) { 4714 if (!drflac__read_uint32(bs, 16, &header->sampleRate)) { 4715 return DRFLAC_FALSE; 4716 } 4717 crc8 = drflac_crc8(crc8, header->sampleRate, 16); 4718 header->sampleRate *= 10; 4719 } else { 4720 continue; /* Invalid. Assume an invalid block. */ 4721 } 4722 4723 4724 header->channelAssignment = channelAssignment; 4725 4726 header->bitsPerSample = bitsPerSampleTable[bitsPerSample]; 4727 if (header->bitsPerSample == 0) { 4728 header->bitsPerSample = streaminfoBitsPerSample; 4729 } 4730 4731 if (!drflac__read_uint8(bs, 8, &header->crc8)) { 4732 return DRFLAC_FALSE; 4733 } 4734 4735 #ifndef DR_FLAC_NO_CRC 4736 if (header->crc8 != crc8) { 4737 continue; /* CRC mismatch. Loop back to the top and find the next sync code. */ 4738 } 4739 #endif 4740 return DRFLAC_TRUE; 4741 } 4742 } 4743 4744 static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pSubframe) 4745 { 4746 drflac_uint8 header; 4747 int type; 4748 4749 if (!drflac__read_uint8(bs, 8, &header)) { 4750 return DRFLAC_FALSE; 4751 } 4752 4753 /* First bit should always be 0. */ 4754 if ((header & 0x80) != 0) { 4755 return DRFLAC_FALSE; 4756 } 4757 4758 type = (header & 0x7E) >> 1; 4759 if (type == 0) { 4760 pSubframe->subframeType = DRFLAC_SUBFRAME_CONSTANT; 4761 } else if (type == 1) { 4762 pSubframe->subframeType = DRFLAC_SUBFRAME_VERBATIM; 4763 } else { 4764 if ((type & 0x20) != 0) { 4765 pSubframe->subframeType = DRFLAC_SUBFRAME_LPC; 4766 pSubframe->lpcOrder = (type & 0x1F) + 1; 4767 } else if ((type & 0x08) != 0) { 4768 pSubframe->subframeType = DRFLAC_SUBFRAME_FIXED; 4769 pSubframe->lpcOrder = (type & 0x07); 4770 if (pSubframe->lpcOrder > 4) { 4771 pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED; 4772 pSubframe->lpcOrder = 0; 4773 } 4774 } else { 4775 pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED; 4776 } 4777 } 4778 4779 if (pSubframe->subframeType == DRFLAC_SUBFRAME_RESERVED) { 4780 return DRFLAC_FALSE; 4781 } 4782 4783 /* Wasted bits per sample. */ 4784 pSubframe->wastedBitsPerSample = 0; 4785 if ((header & 0x01) == 1) { 4786 unsigned int wastedBitsPerSample; 4787 if (!drflac__seek_past_next_set_bit(bs, &wastedBitsPerSample)) { 4788 return DRFLAC_FALSE; 4789 } 4790 pSubframe->wastedBitsPerSample = (unsigned char)wastedBitsPerSample + 1; 4791 } 4792 4793 return DRFLAC_TRUE; 4794 } 4795 4796 static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex, drflac_int32* pDecodedSamplesOut) 4797 { 4798 drflac_subframe* pSubframe; 4799 drflac_uint32 subframeBitsPerSample; 4800 4801 DRFLAC_ASSERT(bs != NULL); 4802 DRFLAC_ASSERT(frame != NULL); 4803 4804 pSubframe = frame->subframes + subframeIndex; 4805 if (!drflac__read_subframe_header(bs, pSubframe)) { 4806 return DRFLAC_FALSE; 4807 } 4808 4809 /* Side channels require an extra bit per sample. Took a while to figure that one out... */ 4810 subframeBitsPerSample = frame->header.bitsPerSample; 4811 if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) { 4812 subframeBitsPerSample += 1; 4813 } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) { 4814 subframeBitsPerSample += 1; 4815 } 4816 4817 /* Need to handle wasted bits per sample. */ 4818 if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) { 4819 return DRFLAC_FALSE; 4820 } 4821 subframeBitsPerSample -= pSubframe->wastedBitsPerSample; 4822 4823 pSubframe->pSamplesS32 = pDecodedSamplesOut; 4824 4825 switch (pSubframe->subframeType) 4826 { 4827 case DRFLAC_SUBFRAME_CONSTANT: 4828 { 4829 drflac__decode_samples__constant(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32); 4830 } break; 4831 4832 case DRFLAC_SUBFRAME_VERBATIM: 4833 { 4834 drflac__decode_samples__verbatim(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32); 4835 } break; 4836 4837 case DRFLAC_SUBFRAME_FIXED: 4838 { 4839 drflac__decode_samples__fixed(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32); 4840 } break; 4841 4842 case DRFLAC_SUBFRAME_LPC: 4843 { 4844 drflac__decode_samples__lpc(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32); 4845 } break; 4846 4847 default: return DRFLAC_FALSE; 4848 } 4849 4850 return DRFLAC_TRUE; 4851 } 4852 4853 static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex) 4854 { 4855 drflac_subframe* pSubframe; 4856 drflac_uint32 subframeBitsPerSample; 4857 4858 DRFLAC_ASSERT(bs != NULL); 4859 DRFLAC_ASSERT(frame != NULL); 4860 4861 pSubframe = frame->subframes + subframeIndex; 4862 if (!drflac__read_subframe_header(bs, pSubframe)) { 4863 return DRFLAC_FALSE; 4864 } 4865 4866 /* Side channels require an extra bit per sample. Took a while to figure that one out... */ 4867 subframeBitsPerSample = frame->header.bitsPerSample; 4868 if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) { 4869 subframeBitsPerSample += 1; 4870 } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) { 4871 subframeBitsPerSample += 1; 4872 } 4873 4874 /* Need to handle wasted bits per sample. */ 4875 if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) { 4876 return DRFLAC_FALSE; 4877 } 4878 subframeBitsPerSample -= pSubframe->wastedBitsPerSample; 4879 4880 pSubframe->pSamplesS32 = NULL; 4881 4882 switch (pSubframe->subframeType) 4883 { 4884 case DRFLAC_SUBFRAME_CONSTANT: 4885 { 4886 if (!drflac__seek_bits(bs, subframeBitsPerSample)) { 4887 return DRFLAC_FALSE; 4888 } 4889 } break; 4890 4891 case DRFLAC_SUBFRAME_VERBATIM: 4892 { 4893 unsigned int bitsToSeek = frame->header.blockSizeInPCMFrames * subframeBitsPerSample; 4894 if (!drflac__seek_bits(bs, bitsToSeek)) { 4895 return DRFLAC_FALSE; 4896 } 4897 } break; 4898 4899 case DRFLAC_SUBFRAME_FIXED: 4900 { 4901 unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample; 4902 if (!drflac__seek_bits(bs, bitsToSeek)) { 4903 return DRFLAC_FALSE; 4904 } 4905 4906 if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) { 4907 return DRFLAC_FALSE; 4908 } 4909 } break; 4910 4911 case DRFLAC_SUBFRAME_LPC: 4912 { 4913 unsigned char lpcPrecision; 4914 4915 unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample; 4916 if (!drflac__seek_bits(bs, bitsToSeek)) { 4917 return DRFLAC_FALSE; 4918 } 4919 4920 if (!drflac__read_uint8(bs, 4, &lpcPrecision)) { 4921 return DRFLAC_FALSE; 4922 } 4923 if (lpcPrecision == 15) { 4924 return DRFLAC_FALSE; /* Invalid. */ 4925 } 4926 lpcPrecision += 1; 4927 4928 4929 bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5; /* +5 for shift. */ 4930 if (!drflac__seek_bits(bs, bitsToSeek)) { 4931 return DRFLAC_FALSE; 4932 } 4933 4934 if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) { 4935 return DRFLAC_FALSE; 4936 } 4937 } break; 4938 4939 default: return DRFLAC_FALSE; 4940 } 4941 4942 return DRFLAC_TRUE; 4943 } 4944 4945 4946 static DRFLAC_INLINE drflac_uint8 drflac__get_channel_count_from_channel_assignment(drflac_int8 channelAssignment) 4947 { 4948 drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2}; 4949 4950 DRFLAC_ASSERT(channelAssignment <= 10); 4951 return lookup[channelAssignment]; 4952 } 4953 4954 static drflac_result drflac__decode_flac_frame(drflac* pFlac) 4955 { 4956 int channelCount; 4957 int i; 4958 drflac_uint8 paddingSizeInBits; 4959 drflac_uint16 desiredCRC16; 4960 #ifndef DR_FLAC_NO_CRC 4961 drflac_uint16 actualCRC16; 4962 #endif 4963 4964 /* This function should be called while the stream is sitting on the first byte after the frame header. */ 4965 DRFLAC_ZERO_MEMORY(pFlac->currentFLACFrame.subframes, sizeof(pFlac->currentFLACFrame.subframes)); 4966 4967 /* The frame block size must never be larger than the maximum block size defined by the FLAC stream. */ 4968 if (pFlac->currentFLACFrame.header.blockSizeInPCMFrames > pFlac->maxBlockSizeInPCMFrames) { 4969 return DRFLAC_ERROR; 4970 } 4971 4972 /* The number of channels in the frame must match the channel count from the STREAMINFO block. */ 4973 channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); 4974 if (channelCount != (int)pFlac->channels) { 4975 return DRFLAC_ERROR; 4976 } 4977 4978 for (i = 0; i < channelCount; ++i) { 4979 if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i, pFlac->pDecodedSamples + (pFlac->currentFLACFrame.header.blockSizeInPCMFrames * i))) { 4980 return DRFLAC_ERROR; 4981 } 4982 } 4983 4984 paddingSizeInBits = DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7; 4985 if (paddingSizeInBits > 0) { 4986 drflac_uint8 padding = 0; 4987 if (!drflac__read_uint8(&pFlac->bs, paddingSizeInBits, &padding)) { 4988 return DRFLAC_END_OF_STREAM; 4989 } 4990 } 4991 4992 #ifndef DR_FLAC_NO_CRC 4993 actualCRC16 = drflac__flush_crc16(&pFlac->bs); 4994 #endif 4995 if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) { 4996 return DRFLAC_END_OF_STREAM; 4997 } 4998 4999 #ifndef DR_FLAC_NO_CRC 5000 if (actualCRC16 != desiredCRC16) { 5001 return DRFLAC_CRC_MISMATCH; /* CRC mismatch. */ 5002 } 5003 #endif 5004 5005 pFlac->currentFLACFrame.pcmFramesRemaining = pFlac->currentFLACFrame.header.blockSizeInPCMFrames; 5006 5007 return DRFLAC_SUCCESS; 5008 } 5009 5010 static drflac_result drflac__seek_flac_frame(drflac* pFlac) 5011 { 5012 int channelCount; 5013 int i; 5014 drflac_uint16 desiredCRC16; 5015 #ifndef DR_FLAC_NO_CRC 5016 drflac_uint16 actualCRC16; 5017 #endif 5018 5019 channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); 5020 for (i = 0; i < channelCount; ++i) { 5021 if (!drflac__seek_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i)) { 5022 return DRFLAC_ERROR; 5023 } 5024 } 5025 5026 /* Padding. */ 5027 if (!drflac__seek_bits(&pFlac->bs, DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7)) { 5028 return DRFLAC_ERROR; 5029 } 5030 5031 /* CRC. */ 5032 #ifndef DR_FLAC_NO_CRC 5033 actualCRC16 = drflac__flush_crc16(&pFlac->bs); 5034 #endif 5035 if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) { 5036 return DRFLAC_END_OF_STREAM; 5037 } 5038 5039 #ifndef DR_FLAC_NO_CRC 5040 if (actualCRC16 != desiredCRC16) { 5041 return DRFLAC_CRC_MISMATCH; /* CRC mismatch. */ 5042 } 5043 #endif 5044 5045 return DRFLAC_SUCCESS; 5046 } 5047 5048 static drflac_bool32 drflac__read_and_decode_next_flac_frame(drflac* pFlac) 5049 { 5050 DRFLAC_ASSERT(pFlac != NULL); 5051 5052 for (;;) { 5053 drflac_result result; 5054 5055 if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { 5056 return DRFLAC_FALSE; 5057 } 5058 5059 result = drflac__decode_flac_frame(pFlac); 5060 if (result != DRFLAC_SUCCESS) { 5061 if (result == DRFLAC_CRC_MISMATCH) { 5062 continue; /* CRC mismatch. Skip to the next frame. */ 5063 } else { 5064 return DRFLAC_FALSE; 5065 } 5066 } 5067 5068 return DRFLAC_TRUE; 5069 } 5070 } 5071 5072 static void drflac__get_pcm_frame_range_of_current_flac_frame(drflac* pFlac, drflac_uint64* pFirstPCMFrame, drflac_uint64* pLastPCMFrame) 5073 { 5074 drflac_uint64 firstPCMFrame; 5075 drflac_uint64 lastPCMFrame; 5076 5077 DRFLAC_ASSERT(pFlac != NULL); 5078 5079 firstPCMFrame = pFlac->currentFLACFrame.header.pcmFrameNumber; 5080 if (firstPCMFrame == 0) { 5081 firstPCMFrame = pFlac->currentFLACFrame.header.flacFrameNumber * pFlac->maxBlockSizeInPCMFrames; 5082 } 5083 5084 lastPCMFrame = firstPCMFrame + (pFlac->currentFLACFrame.header.blockSizeInPCMFrames); 5085 if (lastPCMFrame > 0) { 5086 lastPCMFrame -= 1; /* Needs to be zero based. */ 5087 } 5088 5089 if (pFirstPCMFrame) { 5090 *pFirstPCMFrame = firstPCMFrame; 5091 } 5092 if (pLastPCMFrame) { 5093 *pLastPCMFrame = lastPCMFrame; 5094 } 5095 } 5096 5097 static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac) 5098 { 5099 drflac_bool32 result; 5100 5101 DRFLAC_ASSERT(pFlac != NULL); 5102 5103 result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes); 5104 5105 DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame)); 5106 pFlac->currentPCMFrame = 0; 5107 5108 return result; 5109 } 5110 5111 static DRFLAC_INLINE drflac_result drflac__seek_to_next_flac_frame(drflac* pFlac) 5112 { 5113 /* This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section. */ 5114 DRFLAC_ASSERT(pFlac != NULL); 5115 return drflac__seek_flac_frame(pFlac); 5116 } 5117 5118 5119 drflac_uint64 drflac__seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 pcmFramesToSeek) 5120 { 5121 drflac_uint64 pcmFramesRead = 0; 5122 while (pcmFramesToSeek > 0) { 5123 if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { 5124 if (!drflac__read_and_decode_next_flac_frame(pFlac)) { 5125 break; /* Couldn't read the next frame, so just break from the loop and return. */ 5126 } 5127 } else { 5128 if (pFlac->currentFLACFrame.pcmFramesRemaining > pcmFramesToSeek) { 5129 pcmFramesRead += pcmFramesToSeek; 5130 pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)pcmFramesToSeek; /* <-- Safe cast. Will always be < currentFrame.pcmFramesRemaining < 65536. */ 5131 pcmFramesToSeek = 0; 5132 } else { 5133 pcmFramesRead += pFlac->currentFLACFrame.pcmFramesRemaining; 5134 pcmFramesToSeek -= pFlac->currentFLACFrame.pcmFramesRemaining; 5135 pFlac->currentFLACFrame.pcmFramesRemaining = 0; 5136 } 5137 } 5138 } 5139 5140 pFlac->currentPCMFrame += pcmFramesRead; 5141 return pcmFramesRead; 5142 } 5143 5144 5145 static drflac_bool32 drflac__seek_to_pcm_frame__brute_force(drflac* pFlac, drflac_uint64 pcmFrameIndex) 5146 { 5147 drflac_bool32 isMidFrame = DRFLAC_FALSE; 5148 drflac_uint64 runningPCMFrameCount; 5149 5150 DRFLAC_ASSERT(pFlac != NULL); 5151 5152 /* If we are seeking forward we start from the current position. Otherwise we need to start all the way from the start of the file. */ 5153 if (pcmFrameIndex >= pFlac->currentPCMFrame) { 5154 /* Seeking forward. Need to seek from the current position. */ 5155 runningPCMFrameCount = pFlac->currentPCMFrame; 5156 5157 /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */ 5158 if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) { 5159 if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { 5160 return DRFLAC_FALSE; 5161 } 5162 } else { 5163 isMidFrame = DRFLAC_TRUE; 5164 } 5165 } else { 5166 /* Seeking backwards. Need to seek from the start of the file. */ 5167 runningPCMFrameCount = 0; 5168 5169 /* Move back to the start. */ 5170 if (!drflac__seek_to_first_frame(pFlac)) { 5171 return DRFLAC_FALSE; 5172 } 5173 5174 /* Decode the first frame in preparation for sample-exact seeking below. */ 5175 if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { 5176 return DRFLAC_FALSE; 5177 } 5178 } 5179 5180 /* 5181 We need to as quickly as possible find the frame that contains the target sample. To do this, we iterate over each frame and inspect its 5182 header. If based on the header we can determine that the frame contains the sample, we do a full decode of that frame. 5183 */ 5184 for (;;) { 5185 drflac_uint64 pcmFrameCountInThisFLACFrame; 5186 drflac_uint64 firstPCMFrameInFLACFrame = 0; 5187 drflac_uint64 lastPCMFrameInFLACFrame = 0; 5188 5189 drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame); 5190 5191 pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1; 5192 if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) { 5193 /* 5194 The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend 5195 it never existed and keep iterating. 5196 */ 5197 drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount; 5198 5199 if (!isMidFrame) { 5200 drflac_result result = drflac__decode_flac_frame(pFlac); 5201 if (result == DRFLAC_SUCCESS) { 5202 /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */ 5203 return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; /* <-- If this fails, something bad has happened (it should never fail). */ 5204 } else { 5205 if (result == DRFLAC_CRC_MISMATCH) { 5206 goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ 5207 } else { 5208 return DRFLAC_FALSE; 5209 } 5210 } 5211 } else { 5212 /* We started seeking mid-frame which means we need to skip the frame decoding part. */ 5213 return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; 5214 } 5215 } else { 5216 /* 5217 It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this 5218 frame never existed and leave the running sample count untouched. 5219 */ 5220 if (!isMidFrame) { 5221 drflac_result result = drflac__seek_to_next_flac_frame(pFlac); 5222 if (result == DRFLAC_SUCCESS) { 5223 runningPCMFrameCount += pcmFrameCountInThisFLACFrame; 5224 } else { 5225 if (result == DRFLAC_CRC_MISMATCH) { 5226 goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ 5227 } else { 5228 return DRFLAC_FALSE; 5229 } 5230 } 5231 } else { 5232 /* 5233 We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with 5234 drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header. 5235 */ 5236 runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining; 5237 pFlac->currentFLACFrame.pcmFramesRemaining = 0; 5238 isMidFrame = DRFLAC_FALSE; 5239 } 5240 5241 /* If we are seeking to the end of the file and we've just hit it, we're done. */ 5242 if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) { 5243 return DRFLAC_TRUE; 5244 } 5245 } 5246 5247 next_iteration: 5248 /* Grab the next frame in preparation for the next iteration. */ 5249 if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { 5250 return DRFLAC_FALSE; 5251 } 5252 } 5253 } 5254 5255 5256 #if !defined(DR_FLAC_NO_CRC) 5257 /* 5258 We use an average compression ratio to determine our approximate start location. FLAC files are generally about 50%-70% the size of their 5259 uncompressed counterparts so we'll use this as a basis. I'm going to split the middle and use a factor of 0.6 to determine the starting 5260 location. 5261 */ 5262 #define DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO 0.6f 5263 5264 static drflac_bool32 drflac__seek_to_approximate_flac_frame_to_byte(drflac* pFlac, drflac_uint64 targetByte, drflac_uint64 rangeLo, drflac_uint64 rangeHi, drflac_uint64* pLastSuccessfulSeekOffset) 5265 { 5266 DRFLAC_ASSERT(pFlac != NULL); 5267 DRFLAC_ASSERT(pLastSuccessfulSeekOffset != NULL); 5268 DRFLAC_ASSERT(targetByte >= rangeLo); 5269 DRFLAC_ASSERT(targetByte <= rangeHi); 5270 5271 *pLastSuccessfulSeekOffset = pFlac->firstFLACFramePosInBytes; 5272 5273 for (;;) { 5274 /* When seeking to a byte, failure probably means we've attempted to seek beyond the end of the stream. To counter this we just halve it each attempt. */ 5275 if (!drflac__seek_to_byte(&pFlac->bs, targetByte)) { 5276 /* If we couldn't even seek to the first byte in the stream we have a problem. Just abandon the whole thing. */ 5277 if (targetByte == 0) { 5278 drflac__seek_to_first_frame(pFlac); /* Try to recover. */ 5279 return DRFLAC_FALSE; 5280 } 5281 5282 /* Halve the byte location and continue. */ 5283 targetByte = rangeLo + ((rangeHi - rangeLo)/2); 5284 rangeHi = targetByte; 5285 } else { 5286 /* Getting here should mean that we have seeked to an appropriate byte. */ 5287 5288 /* Clear the details of the FLAC frame so we don't misreport data. */ 5289 DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame)); 5290 5291 /* 5292 Now seek to the next FLAC frame. We need to decode the entire frame (not just the header) because it's possible for the header to incorrectly pass the 5293 CRC check and return bad data. We need to decode the entire frame to be more certain. Although this seems unlikely, this has happened to me in testing 5294 to it needs to stay this way for now. 5295 */ 5296 #if 1 5297 if (!drflac__read_and_decode_next_flac_frame(pFlac)) { 5298 /* Halve the byte location and continue. */ 5299 targetByte = rangeLo + ((rangeHi - rangeLo)/2); 5300 rangeHi = targetByte; 5301 } else { 5302 break; 5303 } 5304 #else 5305 if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { 5306 /* Halve the byte location and continue. */ 5307 targetByte = rangeLo + ((rangeHi - rangeLo)/2); 5308 rangeHi = targetByte; 5309 } else { 5310 break; 5311 } 5312 #endif 5313 } 5314 } 5315 5316 /* The current PCM frame needs to be updated based on the frame we just seeked to. */ 5317 drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL); 5318 5319 DRFLAC_ASSERT(targetByte <= rangeHi); 5320 5321 *pLastSuccessfulSeekOffset = targetByte; 5322 return DRFLAC_TRUE; 5323 } 5324 5325 static drflac_bool32 drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 offset) 5326 { 5327 /* This section of code would be used if we were only decoding the FLAC frame header when calling drflac__seek_to_approximate_flac_frame_to_byte(). */ 5328 #if 0 5329 if (drflac__decode_flac_frame(pFlac) != DRFLAC_SUCCESS) { 5330 /* We failed to decode this frame which may be due to it being corrupt. We'll just use the next valid FLAC frame. */ 5331 if (drflac__read_and_decode_next_flac_frame(pFlac) == DRFLAC_FALSE) { 5332 return DRFLAC_FALSE; 5333 } 5334 } 5335 #endif 5336 5337 return drflac__seek_forward_by_pcm_frames(pFlac, offset) == offset; 5338 } 5339 5340 5341 static drflac_bool32 drflac__seek_to_pcm_frame__binary_search_internal(drflac* pFlac, drflac_uint64 pcmFrameIndex, drflac_uint64 byteRangeLo, drflac_uint64 byteRangeHi) 5342 { 5343 /* This assumes pFlac->currentPCMFrame is sitting on byteRangeLo upon entry. */ 5344 5345 drflac_uint64 targetByte; 5346 drflac_uint64 pcmRangeLo = pFlac->totalPCMFrameCount; 5347 drflac_uint64 pcmRangeHi = 0; 5348 drflac_uint64 lastSuccessfulSeekOffset = (drflac_uint64)-1; 5349 drflac_uint64 closestSeekOffsetBeforeTargetPCMFrame = byteRangeLo; 5350 drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096; 5351 5352 targetByte = byteRangeLo + (drflac_uint64)((pcmFrameIndex - pFlac->currentPCMFrame) * pFlac->channels * pFlac->bitsPerSample/8 * DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO); 5353 if (targetByte > byteRangeHi) { 5354 targetByte = byteRangeHi; 5355 } 5356 5357 for (;;) { 5358 if (drflac__seek_to_approximate_flac_frame_to_byte(pFlac, targetByte, byteRangeLo, byteRangeHi, &lastSuccessfulSeekOffset)) { 5359 /* We found a FLAC frame. We need to check if it contains the sample we're looking for. */ 5360 drflac_uint64 newPCMRangeLo; 5361 drflac_uint64 newPCMRangeHi; 5362 drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &newPCMRangeLo, &newPCMRangeHi); 5363 5364 /* If we selected the same frame, it means we should be pretty close. Just decode the rest. */ 5365 if (pcmRangeLo == newPCMRangeLo) { 5366 if (!drflac__seek_to_approximate_flac_frame_to_byte(pFlac, closestSeekOffsetBeforeTargetPCMFrame, closestSeekOffsetBeforeTargetPCMFrame, byteRangeHi, &lastSuccessfulSeekOffset)) { 5367 break; /* Failed to seek to closest frame. */ 5368 } 5369 5370 if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) { 5371 return DRFLAC_TRUE; 5372 } else { 5373 break; /* Failed to seek forward. */ 5374 } 5375 } 5376 5377 pcmRangeLo = newPCMRangeLo; 5378 pcmRangeHi = newPCMRangeHi; 5379 5380 if (pcmRangeLo <= pcmFrameIndex && pcmRangeHi >= pcmFrameIndex) { 5381 /* The target PCM frame is in this FLAC frame. */ 5382 if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame) ) { 5383 return DRFLAC_TRUE; 5384 } else { 5385 break; /* Failed to seek to FLAC frame. */ 5386 } 5387 } else { 5388 const float approxCompressionRatio = (lastSuccessfulSeekOffset - pFlac->firstFLACFramePosInBytes) / (pcmRangeLo * pFlac->channels * pFlac->bitsPerSample/8.0f); 5389 5390 if (pcmRangeLo > pcmFrameIndex) { 5391 /* We seeked too far forward. We need to move our target byte backward and try again. */ 5392 byteRangeHi = lastSuccessfulSeekOffset; 5393 if (byteRangeLo > byteRangeHi) { 5394 byteRangeLo = byteRangeHi; 5395 } 5396 5397 /*targetByte = lastSuccessfulSeekOffset - (drflac_uint64)((pcmRangeLo-pcmFrameIndex) * pFlac->channels * pFlac->bitsPerSample/8 * approxCompressionRatio);*/ 5398 targetByte = byteRangeLo + ((byteRangeHi - byteRangeLo) / 2); 5399 if (targetByte < byteRangeLo) { 5400 targetByte = byteRangeLo; 5401 } 5402 } else /*if (pcmRangeHi < pcmFrameIndex)*/ { 5403 /* We didn't seek far enough. We need to move our target byte forward and try again. */ 5404 5405 /* If we're close enough we can just seek forward. */ 5406 if ((pcmFrameIndex - pcmRangeLo) < seekForwardThreshold) { 5407 if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) { 5408 return DRFLAC_TRUE; 5409 } else { 5410 break; /* Failed to seek to FLAC frame. */ 5411 } 5412 } else { 5413 byteRangeLo = lastSuccessfulSeekOffset; 5414 if (byteRangeHi < byteRangeLo) { 5415 byteRangeHi = byteRangeLo; 5416 } 5417 5418 /*targetByte = byteRangeLo + (drflac_uint64)((pcmFrameIndex-pcmRangeLo) * pFlac->channels * pFlac->bitsPerSample/8 * approxCompressionRatio);*/ 5419 targetByte = lastSuccessfulSeekOffset + (drflac_uint64)((pcmFrameIndex-pcmRangeLo) * pFlac->channels * pFlac->bitsPerSample/8 * approxCompressionRatio); 5420 /*targetByte = byteRangeLo + ((byteRangeHi - byteRangeLo) / 2);*/ 5421 5422 if (targetByte > byteRangeHi) { 5423 targetByte = byteRangeHi; 5424 } 5425 5426 if (closestSeekOffsetBeforeTargetPCMFrame < lastSuccessfulSeekOffset) { 5427 closestSeekOffsetBeforeTargetPCMFrame = lastSuccessfulSeekOffset; 5428 } 5429 } 5430 } 5431 } 5432 } else { 5433 /* Getting here is really bad. We just recover as best we can, but moving to the first frame in the stream, and then abort. */ 5434 break; 5435 } 5436 } 5437 5438 drflac__seek_to_first_frame(pFlac); /* <-- Try to recover. */ 5439 return DRFLAC_FALSE; 5440 } 5441 5442 static drflac_bool32 drflac__seek_to_pcm_frame__binary_search(drflac* pFlac, drflac_uint64 pcmFrameIndex) 5443 { 5444 drflac_uint64 byteRangeLo; 5445 drflac_uint64 byteRangeHi; 5446 drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096; 5447 5448 /* Our algorithm currently assumes the PCM frame */ 5449 if (drflac__seek_to_first_frame(pFlac) == DRFLAC_FALSE) { 5450 return DRFLAC_FALSE; 5451 } 5452 5453 /* If we're close enough to the start, just move to the start and seek forward. */ 5454 if (pcmFrameIndex < seekForwardThreshold) { 5455 return drflac__seek_forward_by_pcm_frames(pFlac, pcmFrameIndex) == pcmFrameIndex; 5456 } 5457 5458 /* 5459 Our starting byte range is the byte position of the first FLAC frame and the approximate end of the file as if it were completely uncompressed. This ensures 5460 the entire file is included, even though most of the time it'll exceed the end of the actual stream. This is OK as the frame searching logic will handle it. 5461 */ 5462 byteRangeLo = pFlac->firstFLACFramePosInBytes; 5463 byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample/8); 5464 5465 return drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi); 5466 } 5467 #endif /* !DR_FLAC_NO_CRC */ 5468 5469 static drflac_bool32 drflac__seek_to_pcm_frame__seek_table(drflac* pFlac, drflac_uint64 pcmFrameIndex) 5470 { 5471 drflac_uint32 iClosestSeekpoint = 0; 5472 drflac_bool32 isMidFrame = DRFLAC_FALSE; 5473 drflac_uint64 runningPCMFrameCount; 5474 drflac_uint32 iSeekpoint; 5475 5476 5477 DRFLAC_ASSERT(pFlac != NULL); 5478 5479 if (pFlac->pSeekpoints == NULL || pFlac->seekpointCount == 0) { 5480 return DRFLAC_FALSE; 5481 } 5482 5483 for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) { 5484 if (pFlac->pSeekpoints[iSeekpoint].firstPCMFrame >= pcmFrameIndex) { 5485 break; 5486 } 5487 5488 iClosestSeekpoint = iSeekpoint; 5489 } 5490 5491 #if !defined(DR_FLAC_NO_CRC) 5492 /* At this point we should know the closest seek point. We can use a binary search for this. We need to know the total sample count for this. */ 5493 if (pFlac->totalPCMFrameCount > 0) { 5494 drflac_uint64 byteRangeLo; 5495 drflac_uint64 byteRangeHi; 5496 5497 byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample/8); 5498 byteRangeLo = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset; 5499 5500 if (iClosestSeekpoint < pFlac->seekpointCount-1) { 5501 if (pFlac->pSeekpoints[iClosestSeekpoint+1].firstPCMFrame != (((drflac_uint64)0xFFFFFFFF << 32) | 0xFFFFFFFF)) { /* Is it a placeholder seekpoint. */ 5502 byteRangeHi = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint+1].flacFrameOffset-1; /* Must be zero based. */ 5503 } 5504 } 5505 5506 if (drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) { 5507 if (drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { 5508 drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL); 5509 5510 if (drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi)) { 5511 return DRFLAC_TRUE; 5512 } 5513 } 5514 } 5515 } 5516 #endif /* !DR_FLAC_NO_CRC */ 5517 5518 /* Getting here means we need to use a slower algorithm because the binary search method failed or cannot be used. */ 5519 5520 /* 5521 If we are seeking forward and the closest seekpoint is _before_ the current sample, we just seek forward from where we are. Otherwise we start seeking 5522 from the seekpoint's first sample. 5523 */ 5524 if (pcmFrameIndex >= pFlac->currentPCMFrame && pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame <= pFlac->currentPCMFrame) { 5525 /* Optimized case. Just seek forward from where we are. */ 5526 runningPCMFrameCount = pFlac->currentPCMFrame; 5527 5528 /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */ 5529 if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) { 5530 if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { 5531 return DRFLAC_FALSE; 5532 } 5533 } else { 5534 isMidFrame = DRFLAC_TRUE; 5535 } 5536 } else { 5537 /* Slower case. Seek to the start of the seekpoint and then seek forward from there. */ 5538 runningPCMFrameCount = pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame; 5539 5540 if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) { 5541 return DRFLAC_FALSE; 5542 } 5543 5544 /* Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below. */ 5545 if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { 5546 return DRFLAC_FALSE; 5547 } 5548 } 5549 5550 for (;;) { 5551 drflac_uint64 pcmFrameCountInThisFLACFrame; 5552 drflac_uint64 firstPCMFrameInFLACFrame = 0; 5553 drflac_uint64 lastPCMFrameInFLACFrame = 0; 5554 5555 drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame); 5556 5557 pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1; 5558 if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) { 5559 /* 5560 The sample should be in this frame. We need to fully decode it, but if it's an invalid frame (a CRC mismatch) we need to pretend 5561 it never existed and keep iterating. 5562 */ 5563 drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount; 5564 5565 if (!isMidFrame) { 5566 drflac_result result = drflac__decode_flac_frame(pFlac); 5567 if (result == DRFLAC_SUCCESS) { 5568 /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */ 5569 return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; /* <-- If this fails, something bad has happened (it should never fail). */ 5570 } else { 5571 if (result == DRFLAC_CRC_MISMATCH) { 5572 goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ 5573 } else { 5574 return DRFLAC_FALSE; 5575 } 5576 } 5577 } else { 5578 /* We started seeking mid-frame which means we need to skip the frame decoding part. */ 5579 return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; 5580 } 5581 } else { 5582 /* 5583 It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this 5584 frame never existed and leave the running sample count untouched. 5585 */ 5586 if (!isMidFrame) { 5587 drflac_result result = drflac__seek_to_next_flac_frame(pFlac); 5588 if (result == DRFLAC_SUCCESS) { 5589 runningPCMFrameCount += pcmFrameCountInThisFLACFrame; 5590 } else { 5591 if (result == DRFLAC_CRC_MISMATCH) { 5592 goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ 5593 } else { 5594 return DRFLAC_FALSE; 5595 } 5596 } 5597 } else { 5598 /* 5599 We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with 5600 drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header. 5601 */ 5602 runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining; 5603 pFlac->currentFLACFrame.pcmFramesRemaining = 0; 5604 isMidFrame = DRFLAC_FALSE; 5605 } 5606 5607 /* If we are seeking to the end of the file and we've just hit it, we're done. */ 5608 if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) { 5609 return DRFLAC_TRUE; 5610 } 5611 } 5612 5613 next_iteration: 5614 /* Grab the next frame in preparation for the next iteration. */ 5615 if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { 5616 return DRFLAC_FALSE; 5617 } 5618 } 5619 } 5620 5621 5622 #ifndef DR_FLAC_NO_OGG 5623 typedef struct 5624 { 5625 drflac_uint8 capturePattern[4]; /* Should be "OggS" */ 5626 drflac_uint8 structureVersion; /* Always 0. */ 5627 drflac_uint8 headerType; 5628 drflac_uint64 granulePosition; 5629 drflac_uint32 serialNumber; 5630 drflac_uint32 sequenceNumber; 5631 drflac_uint32 checksum; 5632 drflac_uint8 segmentCount; 5633 drflac_uint8 segmentTable[255]; 5634 } drflac_ogg_page_header; 5635 #endif 5636 5637 typedef struct 5638 { 5639 drflac_read_proc onRead; 5640 drflac_seek_proc onSeek; 5641 drflac_meta_proc onMeta; 5642 drflac_container container; 5643 void* pUserData; 5644 void* pUserDataMD; 5645 drflac_uint32 sampleRate; 5646 drflac_uint8 channels; 5647 drflac_uint8 bitsPerSample; 5648 drflac_uint64 totalPCMFrameCount; 5649 drflac_uint16 maxBlockSizeInPCMFrames; 5650 drflac_uint64 runningFilePos; 5651 drflac_bool32 hasStreamInfoBlock; 5652 drflac_bool32 hasMetadataBlocks; 5653 drflac_bs bs; /* <-- A bit streamer is required for loading data during initialization. */ 5654 drflac_frame_header firstFrameHeader; /* <-- The header of the first frame that was read during relaxed initalization. Only set if there is no STREAMINFO block. */ 5655 5656 #ifndef DR_FLAC_NO_OGG 5657 drflac_uint32 oggSerial; 5658 drflac_uint64 oggFirstBytePos; 5659 drflac_ogg_page_header oggBosHeader; 5660 #endif 5661 } drflac_init_info; 5662 5663 static DRFLAC_INLINE void drflac__decode_block_header(drflac_uint32 blockHeader, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize) 5664 { 5665 blockHeader = drflac__be2host_32(blockHeader); 5666 *isLastBlock = (blockHeader & 0x80000000UL) >> 31; 5667 *blockType = (blockHeader & 0x7F000000UL) >> 24; 5668 *blockSize = (blockHeader & 0x00FFFFFFUL); 5669 } 5670 5671 static DRFLAC_INLINE drflac_bool32 drflac__read_and_decode_block_header(drflac_read_proc onRead, void* pUserData, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize) 5672 { 5673 drflac_uint32 blockHeader; 5674 if (onRead(pUserData, &blockHeader, 4) != 4) { 5675 return DRFLAC_FALSE; 5676 } 5677 5678 drflac__decode_block_header(blockHeader, isLastBlock, blockType, blockSize); 5679 return DRFLAC_TRUE; 5680 } 5681 5682 drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drflac_streaminfo* pStreamInfo) 5683 { 5684 drflac_uint32 blockSizes; 5685 drflac_uint64 frameSizes = 0; 5686 drflac_uint64 importantProps; 5687 drflac_uint8 md5[16]; 5688 5689 /* min/max block size. */ 5690 if (onRead(pUserData, &blockSizes, 4) != 4) { 5691 return DRFLAC_FALSE; 5692 } 5693 5694 /* min/max frame size. */ 5695 if (onRead(pUserData, &frameSizes, 6) != 6) { 5696 return DRFLAC_FALSE; 5697 } 5698 5699 /* Sample rate, channels, bits per sample and total sample count. */ 5700 if (onRead(pUserData, &importantProps, 8) != 8) { 5701 return DRFLAC_FALSE; 5702 } 5703 5704 /* MD5 */ 5705 if (onRead(pUserData, md5, sizeof(md5)) != sizeof(md5)) { 5706 return DRFLAC_FALSE; 5707 } 5708 5709 blockSizes = drflac__be2host_32(blockSizes); 5710 frameSizes = drflac__be2host_64(frameSizes); 5711 importantProps = drflac__be2host_64(importantProps); 5712 5713 pStreamInfo->minBlockSizeInPCMFrames = (blockSizes & 0xFFFF0000) >> 16; 5714 pStreamInfo->maxBlockSizeInPCMFrames = (blockSizes & 0x0000FFFF); 5715 pStreamInfo->minFrameSizeInPCMFrames = (drflac_uint32)((frameSizes & (((drflac_uint64)0x00FFFFFF << 16) << 24)) >> 40); 5716 pStreamInfo->maxFrameSizeInPCMFrames = (drflac_uint32)((frameSizes & (((drflac_uint64)0x00FFFFFF << 16) << 0)) >> 16); 5717 pStreamInfo->sampleRate = (drflac_uint32)((importantProps & (((drflac_uint64)0x000FFFFF << 16) << 28)) >> 44); 5718 pStreamInfo->channels = (drflac_uint8 )((importantProps & (((drflac_uint64)0x0000000E << 16) << 24)) >> 41) + 1; 5719 pStreamInfo->bitsPerSample = (drflac_uint8 )((importantProps & (((drflac_uint64)0x0000001F << 16) << 20)) >> 36) + 1; 5720 pStreamInfo->totalPCMFrameCount = ((importantProps & ((((drflac_uint64)0x0000000F << 16) << 16) | 0xFFFFFFFF))); 5721 DRFLAC_COPY_MEMORY(pStreamInfo->md5, md5, sizeof(md5)); 5722 5723 return DRFLAC_TRUE; 5724 } 5725 5726 5727 static void* drflac__malloc_default(size_t sz, void* pUserData) 5728 { 5729 (void)pUserData; 5730 return DRFLAC_MALLOC(sz); 5731 } 5732 5733 static void* drflac__realloc_default(void* p, size_t sz, void* pUserData) 5734 { 5735 (void)pUserData; 5736 return DRFLAC_REALLOC(p, sz); 5737 } 5738 5739 static void drflac__free_default(void* p, void* pUserData) 5740 { 5741 (void)pUserData; 5742 DRFLAC_FREE(p); 5743 } 5744 5745 5746 static void* drflac__malloc_from_callbacks(size_t sz, const drflac_allocation_callbacks* pAllocationCallbacks) 5747 { 5748 if (pAllocationCallbacks == NULL) { 5749 return NULL; 5750 } 5751 5752 if (pAllocationCallbacks->onMalloc != NULL) { 5753 return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData); 5754 } 5755 5756 /* Try using realloc(). */ 5757 if (pAllocationCallbacks->onRealloc != NULL) { 5758 return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData); 5759 } 5760 5761 return NULL; 5762 } 5763 5764 static void* drflac__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drflac_allocation_callbacks* pAllocationCallbacks) 5765 { 5766 if (pAllocationCallbacks == NULL) { 5767 return NULL; 5768 } 5769 5770 if (pAllocationCallbacks->onRealloc != NULL) { 5771 return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData); 5772 } 5773 5774 /* Try emulating realloc() in terms of malloc()/free(). */ 5775 if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) { 5776 void* p2; 5777 5778 p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData); 5779 if (p2 == NULL) { 5780 return NULL; 5781 } 5782 5783 DRFLAC_COPY_MEMORY(p2, p, szOld); 5784 pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData); 5785 5786 return p2; 5787 } 5788 5789 return NULL; 5790 } 5791 5792 static void drflac__free_from_callbacks(void* p, const drflac_allocation_callbacks* pAllocationCallbacks) 5793 { 5794 if (p == NULL || pAllocationCallbacks == NULL) { 5795 return; 5796 } 5797 5798 if (pAllocationCallbacks->onFree != NULL) { 5799 pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData); 5800 } 5801 } 5802 5803 5804 drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeektableSize, drflac_allocation_callbacks* pAllocationCallbacks) 5805 { 5806 /* 5807 We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that 5808 we'll be sitting on byte 42. 5809 */ 5810 drflac_uint64 runningFilePos = 42; 5811 drflac_uint64 seektablePos = 0; 5812 drflac_uint32 seektableSize = 0; 5813 5814 for (;;) { 5815 drflac_metadata metadata; 5816 drflac_uint8 isLastBlock = 0; 5817 drflac_uint8 blockType; 5818 drflac_uint32 blockSize; 5819 if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) { 5820 return DRFLAC_FALSE; 5821 } 5822 runningFilePos += 4; 5823 5824 metadata.type = blockType; 5825 metadata.pRawData = NULL; 5826 metadata.rawDataSize = 0; 5827 5828 switch (blockType) 5829 { 5830 case DRFLAC_METADATA_BLOCK_TYPE_APPLICATION: 5831 { 5832 if (blockSize < 4) { 5833 return DRFLAC_FALSE; 5834 } 5835 5836 if (onMeta) { 5837 void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); 5838 if (pRawData == NULL) { 5839 return DRFLAC_FALSE; 5840 } 5841 5842 if (onRead(pUserData, pRawData, blockSize) != blockSize) { 5843 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 5844 return DRFLAC_FALSE; 5845 } 5846 5847 metadata.pRawData = pRawData; 5848 metadata.rawDataSize = blockSize; 5849 metadata.data.application.id = drflac__be2host_32(*(drflac_uint32*)pRawData); 5850 metadata.data.application.pData = (const void*)((drflac_uint8*)pRawData + sizeof(drflac_uint32)); 5851 metadata.data.application.dataSize = blockSize - sizeof(drflac_uint32); 5852 onMeta(pUserDataMD, &metadata); 5853 5854 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 5855 } 5856 } break; 5857 5858 case DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE: 5859 { 5860 seektablePos = runningFilePos; 5861 seektableSize = blockSize; 5862 5863 if (onMeta) { 5864 drflac_uint32 iSeekpoint; 5865 void* pRawData; 5866 5867 pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); 5868 if (pRawData == NULL) { 5869 return DRFLAC_FALSE; 5870 } 5871 5872 if (onRead(pUserData, pRawData, blockSize) != blockSize) { 5873 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 5874 return DRFLAC_FALSE; 5875 } 5876 5877 metadata.pRawData = pRawData; 5878 metadata.rawDataSize = blockSize; 5879 metadata.data.seektable.seekpointCount = blockSize/sizeof(drflac_seekpoint); 5880 metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData; 5881 5882 /* Endian swap. */ 5883 for (iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) { 5884 drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint; 5885 pSeekpoint->firstPCMFrame = drflac__be2host_64(pSeekpoint->firstPCMFrame); 5886 pSeekpoint->flacFrameOffset = drflac__be2host_64(pSeekpoint->flacFrameOffset); 5887 pSeekpoint->pcmFrameCount = drflac__be2host_16(pSeekpoint->pcmFrameCount); 5888 } 5889 5890 onMeta(pUserDataMD, &metadata); 5891 5892 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 5893 } 5894 } break; 5895 5896 case DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT: 5897 { 5898 if (blockSize < 8) { 5899 return DRFLAC_FALSE; 5900 } 5901 5902 if (onMeta) { 5903 void* pRawData; 5904 const char* pRunningData; 5905 const char* pRunningDataEnd; 5906 drflac_uint32 i; 5907 5908 pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); 5909 if (pRawData == NULL) { 5910 return DRFLAC_FALSE; 5911 } 5912 5913 if (onRead(pUserData, pRawData, blockSize) != blockSize) { 5914 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 5915 return DRFLAC_FALSE; 5916 } 5917 5918 metadata.pRawData = pRawData; 5919 metadata.rawDataSize = blockSize; 5920 5921 pRunningData = (const char*)pRawData; 5922 pRunningDataEnd = (const char*)pRawData + blockSize; 5923 5924 metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; 5925 5926 /* Need space for the rest of the block */ 5927 if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ 5928 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 5929 return DRFLAC_FALSE; 5930 } 5931 metadata.data.vorbis_comment.vendor = pRunningData; pRunningData += metadata.data.vorbis_comment.vendorLength; 5932 metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; 5933 5934 /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */ 5935 if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */ 5936 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 5937 return DRFLAC_FALSE; 5938 } 5939 metadata.data.vorbis_comment.pComments = pRunningData; 5940 5941 /* Check that the comments section is valid before passing it to the callback */ 5942 for (i = 0; i < metadata.data.vorbis_comment.commentCount; ++i) { 5943 drflac_uint32 commentLength; 5944 5945 if (pRunningDataEnd - pRunningData < 4) { 5946 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 5947 return DRFLAC_FALSE; 5948 } 5949 5950 commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; 5951 if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ 5952 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 5953 return DRFLAC_FALSE; 5954 } 5955 pRunningData += commentLength; 5956 } 5957 5958 onMeta(pUserDataMD, &metadata); 5959 5960 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 5961 } 5962 } break; 5963 5964 case DRFLAC_METADATA_BLOCK_TYPE_CUESHEET: 5965 { 5966 if (blockSize < 396) { 5967 return DRFLAC_FALSE; 5968 } 5969 5970 if (onMeta) { 5971 void* pRawData; 5972 const char* pRunningData; 5973 const char* pRunningDataEnd; 5974 drflac_uint8 iTrack; 5975 drflac_uint8 iIndex; 5976 5977 pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); 5978 if (pRawData == NULL) { 5979 return DRFLAC_FALSE; 5980 } 5981 5982 if (onRead(pUserData, pRawData, blockSize) != blockSize) { 5983 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 5984 return DRFLAC_FALSE; 5985 } 5986 5987 metadata.pRawData = pRawData; 5988 metadata.rawDataSize = blockSize; 5989 5990 pRunningData = (const char*)pRawData; 5991 pRunningDataEnd = (const char*)pRawData + blockSize; 5992 5993 DRFLAC_COPY_MEMORY(metadata.data.cuesheet.catalog, pRunningData, 128); pRunningData += 128; 5994 metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8; 5995 metadata.data.cuesheet.isCD = (pRunningData[0] & 0x80) != 0; pRunningData += 259; 5996 metadata.data.cuesheet.trackCount = pRunningData[0]; pRunningData += 1; 5997 metadata.data.cuesheet.pTrackData = pRunningData; 5998 5999 /* Check that the cuesheet tracks are valid before passing it to the callback */ 6000 for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) { 6001 drflac_uint8 indexCount; 6002 drflac_uint32 indexPointSize; 6003 6004 if (pRunningDataEnd - pRunningData < 36) { 6005 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 6006 return DRFLAC_FALSE; 6007 } 6008 6009 /* Skip to the index point count */ 6010 pRunningData += 35; 6011 indexCount = pRunningData[0]; pRunningData += 1; 6012 indexPointSize = indexCount * sizeof(drflac_cuesheet_track_index); 6013 if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) { 6014 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 6015 return DRFLAC_FALSE; 6016 } 6017 6018 /* Endian swap. */ 6019 for (iIndex = 0; iIndex < indexCount; ++iIndex) { 6020 drflac_cuesheet_track_index* pTrack = (drflac_cuesheet_track_index*)pRunningData; 6021 pRunningData += sizeof(drflac_cuesheet_track_index); 6022 pTrack->offset = drflac__be2host_64(pTrack->offset); 6023 } 6024 } 6025 6026 onMeta(pUserDataMD, &metadata); 6027 6028 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 6029 } 6030 } break; 6031 6032 case DRFLAC_METADATA_BLOCK_TYPE_PICTURE: 6033 { 6034 if (blockSize < 32) { 6035 return DRFLAC_FALSE; 6036 } 6037 6038 if (onMeta) { 6039 void* pRawData; 6040 const char* pRunningData; 6041 const char* pRunningDataEnd; 6042 6043 pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); 6044 if (pRawData == NULL) { 6045 return DRFLAC_FALSE; 6046 } 6047 6048 if (onRead(pUserData, pRawData, blockSize) != blockSize) { 6049 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 6050 return DRFLAC_FALSE; 6051 } 6052 6053 metadata.pRawData = pRawData; 6054 metadata.rawDataSize = blockSize; 6055 6056 pRunningData = (const char*)pRawData; 6057 pRunningDataEnd = (const char*)pRawData + blockSize; 6058 6059 metadata.data.picture.type = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; 6060 metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; 6061 6062 /* Need space for the rest of the block */ 6063 if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ 6064 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 6065 return DRFLAC_FALSE; 6066 } 6067 metadata.data.picture.mime = pRunningData; pRunningData += metadata.data.picture.mimeLength; 6068 metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; 6069 6070 /* Need space for the rest of the block */ 6071 if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ 6072 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 6073 return DRFLAC_FALSE; 6074 } 6075 metadata.data.picture.description = pRunningData; pRunningData += metadata.data.picture.descriptionLength; 6076 metadata.data.picture.width = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; 6077 metadata.data.picture.height = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; 6078 metadata.data.picture.colorDepth = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; 6079 metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; 6080 metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; 6081 metadata.data.picture.pPictureData = (const drflac_uint8*)pRunningData; 6082 6083 /* Need space for the picture after the block */ 6084 if (pRunningDataEnd - pRunningData < (drflac_int64)metadata.data.picture.pictureDataSize) { /* <-- Note the order of operations to avoid overflow to a valid value */ 6085 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 6086 return DRFLAC_FALSE; 6087 } 6088 6089 onMeta(pUserDataMD, &metadata); 6090 6091 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 6092 } 6093 } break; 6094 6095 case DRFLAC_METADATA_BLOCK_TYPE_PADDING: 6096 { 6097 if (onMeta) { 6098 metadata.data.padding.unused = 0; 6099 6100 /* Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback. */ 6101 if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { 6102 isLastBlock = DRFLAC_TRUE; /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */ 6103 } else { 6104 onMeta(pUserDataMD, &metadata); 6105 } 6106 } 6107 } break; 6108 6109 case DRFLAC_METADATA_BLOCK_TYPE_INVALID: 6110 { 6111 /* Invalid chunk. Just skip over this one. */ 6112 if (onMeta) { 6113 if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { 6114 isLastBlock = DRFLAC_TRUE; /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */ 6115 } 6116 } 6117 } break; 6118 6119 default: 6120 { 6121 /* 6122 It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we 6123 can at the very least report the chunk to the application and let it look at the raw data. 6124 */ 6125 if (onMeta) { 6126 void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); 6127 if (pRawData == NULL) { 6128 return DRFLAC_FALSE; 6129 } 6130 6131 if (onRead(pUserData, pRawData, blockSize) != blockSize) { 6132 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 6133 return DRFLAC_FALSE; 6134 } 6135 6136 metadata.pRawData = pRawData; 6137 metadata.rawDataSize = blockSize; 6138 onMeta(pUserDataMD, &metadata); 6139 6140 drflac__free_from_callbacks(pRawData, pAllocationCallbacks); 6141 } 6142 } break; 6143 } 6144 6145 /* If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above. */ 6146 if (onMeta == NULL && blockSize > 0) { 6147 if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { 6148 isLastBlock = DRFLAC_TRUE; 6149 } 6150 } 6151 6152 runningFilePos += blockSize; 6153 if (isLastBlock) { 6154 break; 6155 } 6156 } 6157 6158 *pSeektablePos = seektablePos; 6159 *pSeektableSize = seektableSize; 6160 *pFirstFramePos = runningFilePos; 6161 6162 return DRFLAC_TRUE; 6163 } 6164 6165 drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed) 6166 { 6167 /* Pre Condition: The bit stream should be sitting just past the 4-byte id header. */ 6168 6169 drflac_uint8 isLastBlock; 6170 drflac_uint8 blockType; 6171 drflac_uint32 blockSize; 6172 6173 (void)onSeek; 6174 6175 pInit->container = drflac_container_native; 6176 6177 /* The first metadata block should be the STREAMINFO block. */ 6178 if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) { 6179 return DRFLAC_FALSE; 6180 } 6181 6182 if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) { 6183 if (!relaxed) { 6184 /* We're opening in strict mode and the first block is not the STREAMINFO block. Error. */ 6185 return DRFLAC_FALSE; 6186 } else { 6187 /* 6188 Relaxed mode. To open from here we need to just find the first frame and set the sample rate, etc. to whatever is defined 6189 for that frame. 6190 */ 6191 pInit->hasStreamInfoBlock = DRFLAC_FALSE; 6192 pInit->hasMetadataBlocks = DRFLAC_FALSE; 6193 6194 if (!drflac__read_next_flac_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) { 6195 return DRFLAC_FALSE; /* Couldn't find a frame. */ 6196 } 6197 6198 if (pInit->firstFrameHeader.bitsPerSample == 0) { 6199 return DRFLAC_FALSE; /* Failed to initialize because the first frame depends on the STREAMINFO block, which does not exist. */ 6200 } 6201 6202 pInit->sampleRate = pInit->firstFrameHeader.sampleRate; 6203 pInit->channels = drflac__get_channel_count_from_channel_assignment(pInit->firstFrameHeader.channelAssignment); 6204 pInit->bitsPerSample = pInit->firstFrameHeader.bitsPerSample; 6205 pInit->maxBlockSizeInPCMFrames = 65535; /* <-- See notes here: https://xiph.org/flac/format.html#metadata_block_streaminfo */ 6206 return DRFLAC_TRUE; 6207 } 6208 } else { 6209 drflac_streaminfo streaminfo; 6210 if (!drflac__read_streaminfo(onRead, pUserData, &streaminfo)) { 6211 return DRFLAC_FALSE; 6212 } 6213 6214 pInit->hasStreamInfoBlock = DRFLAC_TRUE; 6215 pInit->sampleRate = streaminfo.sampleRate; 6216 pInit->channels = streaminfo.channels; 6217 pInit->bitsPerSample = streaminfo.bitsPerSample; 6218 pInit->totalPCMFrameCount = streaminfo.totalPCMFrameCount; 6219 pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames; /* Don't care about the min block size - only the max (used for determining the size of the memory allocation). */ 6220 pInit->hasMetadataBlocks = !isLastBlock; 6221 6222 if (onMeta) { 6223 drflac_metadata metadata; 6224 metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO; 6225 metadata.pRawData = NULL; 6226 metadata.rawDataSize = 0; 6227 metadata.data.streaminfo = streaminfo; 6228 onMeta(pUserDataMD, &metadata); 6229 } 6230 6231 return DRFLAC_TRUE; 6232 } 6233 } 6234 6235 #ifndef DR_FLAC_NO_OGG 6236 #define DRFLAC_OGG_MAX_PAGE_SIZE 65307 6237 #define DRFLAC_OGG_CAPTURE_PATTERN_CRC32 1605413199 /* CRC-32 of "OggS". */ 6238 6239 typedef enum 6240 { 6241 drflac_ogg_recover_on_crc_mismatch, 6242 drflac_ogg_fail_on_crc_mismatch 6243 } drflac_ogg_crc_mismatch_recovery; 6244 6245 #ifndef DR_FLAC_NO_CRC 6246 static drflac_uint32 drflac__crc32_table[] = { 6247 0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L, 6248 0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L, 6249 0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L, 6250 0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL, 6251 0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L, 6252 0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L, 6253 0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L, 6254 0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL, 6255 0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L, 6256 0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L, 6257 0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L, 6258 0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL, 6259 0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L, 6260 0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L, 6261 0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L, 6262 0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL, 6263 0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL, 6264 0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L, 6265 0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L, 6266 0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL, 6267 0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL, 6268 0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L, 6269 0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L, 6270 0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL, 6271 0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL, 6272 0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L, 6273 0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L, 6274 0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL, 6275 0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL, 6276 0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L, 6277 0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L, 6278 0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL, 6279 0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L, 6280 0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL, 6281 0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL, 6282 0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L, 6283 0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L, 6284 0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL, 6285 0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL, 6286 0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L, 6287 0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L, 6288 0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL, 6289 0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL, 6290 0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L, 6291 0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L, 6292 0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL, 6293 0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL, 6294 0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L, 6295 0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L, 6296 0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL, 6297 0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L, 6298 0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L, 6299 0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L, 6300 0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL, 6301 0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L, 6302 0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L, 6303 0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L, 6304 0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL, 6305 0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L, 6306 0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L, 6307 0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L, 6308 0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL, 6309 0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L, 6310 0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L 6311 }; 6312 #endif 6313 6314 static DRFLAC_INLINE drflac_uint32 drflac_crc32_byte(drflac_uint32 crc32, drflac_uint8 data) 6315 { 6316 #ifndef DR_FLAC_NO_CRC 6317 return (crc32 << 8) ^ drflac__crc32_table[(drflac_uint8)((crc32 >> 24) & 0xFF) ^ data]; 6318 #else 6319 (void)data; 6320 return crc32; 6321 #endif 6322 } 6323 6324 #if 0 6325 static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint32(drflac_uint32 crc32, drflac_uint32 data) 6326 { 6327 crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 24) & 0xFF)); 6328 crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 16) & 0xFF)); 6329 crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 8) & 0xFF)); 6330 crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 0) & 0xFF)); 6331 return crc32; 6332 } 6333 6334 static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint64(drflac_uint32 crc32, drflac_uint64 data) 6335 { 6336 crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 32) & 0xFFFFFFFF)); 6337 crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 0) & 0xFFFFFFFF)); 6338 return crc32; 6339 } 6340 #endif 6341 6342 static DRFLAC_INLINE drflac_uint32 drflac_crc32_buffer(drflac_uint32 crc32, drflac_uint8* pData, drflac_uint32 dataSize) 6343 { 6344 /* This can be optimized. */ 6345 drflac_uint32 i; 6346 for (i = 0; i < dataSize; ++i) { 6347 crc32 = drflac_crc32_byte(crc32, pData[i]); 6348 } 6349 return crc32; 6350 } 6351 6352 6353 static DRFLAC_INLINE drflac_bool32 drflac_ogg__is_capture_pattern(drflac_uint8 pattern[4]) 6354 { 6355 return pattern[0] == 'O' && pattern[1] == 'g' && pattern[2] == 'g' && pattern[3] == 'S'; 6356 } 6357 6358 static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_header_size(drflac_ogg_page_header* pHeader) 6359 { 6360 return 27 + pHeader->segmentCount; 6361 } 6362 6363 static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_body_size(drflac_ogg_page_header* pHeader) 6364 { 6365 drflac_uint32 pageBodySize = 0; 6366 int i; 6367 6368 for (i = 0; i < pHeader->segmentCount; ++i) { 6369 pageBodySize += pHeader->segmentTable[i]; 6370 } 6371 6372 return pageBodySize; 6373 } 6374 6375 drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32) 6376 { 6377 drflac_uint8 data[23]; 6378 drflac_uint32 i; 6379 6380 DRFLAC_ASSERT(*pCRC32 == DRFLAC_OGG_CAPTURE_PATTERN_CRC32); 6381 6382 if (onRead(pUserData, data, 23) != 23) { 6383 return DRFLAC_END_OF_STREAM; 6384 } 6385 *pBytesRead += 23; 6386 6387 pHeader->structureVersion = data[0]; 6388 pHeader->headerType = data[1]; 6389 DRFLAC_COPY_MEMORY(&pHeader->granulePosition, &data[ 2], 8); 6390 DRFLAC_COPY_MEMORY(&pHeader->serialNumber, &data[10], 4); 6391 DRFLAC_COPY_MEMORY(&pHeader->sequenceNumber, &data[14], 4); 6392 DRFLAC_COPY_MEMORY(&pHeader->checksum, &data[18], 4); 6393 pHeader->segmentCount = data[22]; 6394 6395 /* Calculate the CRC. Note that for the calculation the checksum part of the page needs to be set to 0. */ 6396 data[18] = 0; 6397 data[19] = 0; 6398 data[20] = 0; 6399 data[21] = 0; 6400 6401 for (i = 0; i < 23; ++i) { 6402 *pCRC32 = drflac_crc32_byte(*pCRC32, data[i]); 6403 } 6404 6405 6406 if (onRead(pUserData, pHeader->segmentTable, pHeader->segmentCount) != pHeader->segmentCount) { 6407 return DRFLAC_END_OF_STREAM; 6408 } 6409 *pBytesRead += pHeader->segmentCount; 6410 6411 for (i = 0; i < pHeader->segmentCount; ++i) { 6412 *pCRC32 = drflac_crc32_byte(*pCRC32, pHeader->segmentTable[i]); 6413 } 6414 6415 return DRFLAC_SUCCESS; 6416 } 6417 6418 drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32) 6419 { 6420 drflac_uint8 id[4]; 6421 6422 *pBytesRead = 0; 6423 6424 if (onRead(pUserData, id, 4) != 4) { 6425 return DRFLAC_END_OF_STREAM; 6426 } 6427 *pBytesRead += 4; 6428 6429 /* We need to read byte-by-byte until we find the OggS capture pattern. */ 6430 for (;;) { 6431 if (drflac_ogg__is_capture_pattern(id)) { 6432 drflac_result result; 6433 6434 *pCRC32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32; 6435 6436 result = drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pBytesRead, pCRC32); 6437 if (result == DRFLAC_SUCCESS) { 6438 return DRFLAC_SUCCESS; 6439 } else { 6440 if (result == DRFLAC_CRC_MISMATCH) { 6441 continue; 6442 } else { 6443 return result; 6444 } 6445 } 6446 } else { 6447 /* The first 4 bytes did not equal the capture pattern. Read the next byte and try again. */ 6448 id[0] = id[1]; 6449 id[1] = id[2]; 6450 id[2] = id[3]; 6451 if (onRead(pUserData, &id[3], 1) != 1) { 6452 return DRFLAC_END_OF_STREAM; 6453 } 6454 *pBytesRead += 1; 6455 } 6456 } 6457 } 6458 6459 6460 /* 6461 The main part of the Ogg encapsulation is the conversion from the physical Ogg bitstream to the native FLAC bitstream. It works 6462 in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is designed 6463 in such a way that the core sections assume everything is delivered in native format. Therefore, for each encapsulation type 6464 dr_flac is supporting there needs to be a layer sitting on top of the onRead and onSeek callbacks that ensures the bits read from 6465 the physical Ogg bitstream are converted and delivered in native FLAC format. 6466 */ 6467 typedef struct 6468 { 6469 drflac_read_proc onRead; /* The original onRead callback from drflac_open() and family. */ 6470 drflac_seek_proc onSeek; /* The original onSeek callback from drflac_open() and family. */ 6471 void* pUserData; /* The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family. */ 6472 drflac_uint64 currentBytePos; /* The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking. */ 6473 drflac_uint64 firstBytePos; /* The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page. */ 6474 drflac_uint32 serialNumber; /* The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization. */ 6475 drflac_ogg_page_header bosPageHeader; /* Used for seeking. */ 6476 drflac_ogg_page_header currentPageHeader; 6477 drflac_uint32 bytesRemainingInPage; 6478 drflac_uint32 pageDataSize; 6479 drflac_uint8 pageData[DRFLAC_OGG_MAX_PAGE_SIZE]; 6480 } drflac_oggbs; /* oggbs = Ogg Bitstream */ 6481 6482 static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut, size_t bytesToRead) 6483 { 6484 size_t bytesActuallyRead = oggbs->onRead(oggbs->pUserData, bufferOut, bytesToRead); 6485 oggbs->currentBytePos += bytesActuallyRead; 6486 6487 return bytesActuallyRead; 6488 } 6489 6490 static drflac_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, drflac_uint64 offset, drflac_seek_origin origin) 6491 { 6492 if (origin == drflac_seek_origin_start) { 6493 if (offset <= 0x7FFFFFFF) { 6494 if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_start)) { 6495 return DRFLAC_FALSE; 6496 } 6497 oggbs->currentBytePos = offset; 6498 6499 return DRFLAC_TRUE; 6500 } else { 6501 if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) { 6502 return DRFLAC_FALSE; 6503 } 6504 oggbs->currentBytePos = offset; 6505 6506 return drflac_oggbs__seek_physical(oggbs, offset - 0x7FFFFFFF, drflac_seek_origin_current); 6507 } 6508 } else { 6509 while (offset > 0x7FFFFFFF) { 6510 if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) { 6511 return DRFLAC_FALSE; 6512 } 6513 oggbs->currentBytePos += 0x7FFFFFFF; 6514 offset -= 0x7FFFFFFF; 6515 } 6516 6517 if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_current)) { /* <-- Safe cast thanks to the loop above. */ 6518 return DRFLAC_FALSE; 6519 } 6520 oggbs->currentBytePos += offset; 6521 6522 return DRFLAC_TRUE; 6523 } 6524 } 6525 6526 static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_ogg_crc_mismatch_recovery recoveryMethod) 6527 { 6528 drflac_ogg_page_header header; 6529 for (;;) { 6530 drflac_uint32 crc32 = 0; 6531 drflac_uint32 bytesRead; 6532 drflac_uint32 pageBodySize; 6533 #ifndef DR_FLAC_NO_CRC 6534 drflac_uint32 actualCRC32; 6535 #endif 6536 6537 if (drflac_ogg__read_page_header(oggbs->onRead, oggbs->pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { 6538 return DRFLAC_FALSE; 6539 } 6540 oggbs->currentBytePos += bytesRead; 6541 6542 pageBodySize = drflac_ogg__get_page_body_size(&header); 6543 if (pageBodySize > DRFLAC_OGG_MAX_PAGE_SIZE) { 6544 continue; /* Invalid page size. Assume it's corrupted and just move to the next page. */ 6545 } 6546 6547 if (header.serialNumber != oggbs->serialNumber) { 6548 /* It's not a FLAC page. Skip it. */ 6549 if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, drflac_seek_origin_current)) { 6550 return DRFLAC_FALSE; 6551 } 6552 continue; 6553 } 6554 6555 6556 /* We need to read the entire page and then do a CRC check on it. If there's a CRC mismatch we need to skip this page. */ 6557 if (drflac_oggbs__read_physical(oggbs, oggbs->pageData, pageBodySize) != pageBodySize) { 6558 return DRFLAC_FALSE; 6559 } 6560 oggbs->pageDataSize = pageBodySize; 6561 6562 #ifndef DR_FLAC_NO_CRC 6563 actualCRC32 = drflac_crc32_buffer(crc32, oggbs->pageData, oggbs->pageDataSize); 6564 if (actualCRC32 != header.checksum) { 6565 if (recoveryMethod == drflac_ogg_recover_on_crc_mismatch) { 6566 continue; /* CRC mismatch. Skip this page. */ 6567 } else { 6568 /* 6569 Even though we are failing on a CRC mismatch, we still want our stream to be in a good state. Therefore we 6570 go to the next valid page to ensure we're in a good state, but return false to let the caller know that the 6571 seek did not fully complete. 6572 */ 6573 drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch); 6574 return DRFLAC_FALSE; 6575 } 6576 } 6577 #else 6578 (void)recoveryMethod; /* <-- Silence a warning. */ 6579 #endif 6580 6581 oggbs->currentPageHeader = header; 6582 oggbs->bytesRemainingInPage = pageBodySize; 6583 return DRFLAC_TRUE; 6584 } 6585 } 6586 6587 /* Function below is unused at the moment, but I might be re-adding it later. */ 6588 #if 0 6589 static drflac_uint8 drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs, drflac_uint8* pBytesRemainingInSeg) 6590 { 6591 drflac_uint32 bytesConsumedInPage = drflac_ogg__get_page_body_size(&oggbs->currentPageHeader) - oggbs->bytesRemainingInPage; 6592 drflac_uint8 iSeg = 0; 6593 drflac_uint32 iByte = 0; 6594 while (iByte < bytesConsumedInPage) { 6595 drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg]; 6596 if (iByte + segmentSize > bytesConsumedInPage) { 6597 break; 6598 } else { 6599 iSeg += 1; 6600 iByte += segmentSize; 6601 } 6602 } 6603 6604 *pBytesRemainingInSeg = oggbs->currentPageHeader.segmentTable[iSeg] - (drflac_uint8)(bytesConsumedInPage - iByte); 6605 return iSeg; 6606 } 6607 6608 static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs) 6609 { 6610 /* The current packet ends when we get to the segment with a lacing value of < 255 which is not at the end of a page. */ 6611 for (;;) { 6612 drflac_bool32 atEndOfPage = DRFLAC_FALSE; 6613 6614 drflac_uint8 bytesRemainingInSeg; 6615 drflac_uint8 iFirstSeg = drflac_oggbs__get_current_segment_index(oggbs, &bytesRemainingInSeg); 6616 6617 drflac_uint32 bytesToEndOfPacketOrPage = bytesRemainingInSeg; 6618 for (drflac_uint8 iSeg = iFirstSeg; iSeg < oggbs->currentPageHeader.segmentCount; ++iSeg) { 6619 drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg]; 6620 if (segmentSize < 255) { 6621 if (iSeg == oggbs->currentPageHeader.segmentCount-1) { 6622 atEndOfPage = DRFLAC_TRUE; 6623 } 6624 6625 break; 6626 } 6627 6628 bytesToEndOfPacketOrPage += segmentSize; 6629 } 6630 6631 /* 6632 At this point we will have found either the packet or the end of the page. If were at the end of the page we'll 6633 want to load the next page and keep searching for the end of the packet. 6634 */ 6635 drflac_oggbs__seek_physical(oggbs, bytesToEndOfPacketOrPage, drflac_seek_origin_current); 6636 oggbs->bytesRemainingInPage -= bytesToEndOfPacketOrPage; 6637 6638 if (atEndOfPage) { 6639 /* 6640 We're potentially at the next packet, but we need to check the next page first to be sure because the packet may 6641 straddle pages. 6642 */ 6643 if (!drflac_oggbs__goto_next_page(oggbs)) { 6644 return DRFLAC_FALSE; 6645 } 6646 6647 /* If it's a fresh packet it most likely means we're at the next packet. */ 6648 if ((oggbs->currentPageHeader.headerType & 0x01) == 0) { 6649 return DRFLAC_TRUE; 6650 } 6651 } else { 6652 /* We're at the next packet. */ 6653 return DRFLAC_TRUE; 6654 } 6655 } 6656 } 6657 6658 static drflac_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs) 6659 { 6660 /* The bitstream should be sitting on the first byte just after the header of the frame. */ 6661 6662 /* What we're actually doing here is seeking to the start of the next packet. */ 6663 return drflac_oggbs__seek_to_next_packet(oggbs); 6664 } 6665 #endif 6666 6667 static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytesToRead) 6668 { 6669 drflac_oggbs* oggbs = (drflac_oggbs*)pUserData; 6670 drflac_uint8* pRunningBufferOut = (drflac_uint8*)bufferOut; 6671 size_t bytesRead = 0; 6672 6673 DRFLAC_ASSERT(oggbs != NULL); 6674 DRFLAC_ASSERT(pRunningBufferOut != NULL); 6675 6676 /* Reading is done page-by-page. If we've run out of bytes in the page we need to move to the next one. */ 6677 while (bytesRead < bytesToRead) { 6678 size_t bytesRemainingToRead = bytesToRead - bytesRead; 6679 6680 if (oggbs->bytesRemainingInPage >= bytesRemainingToRead) { 6681 DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), bytesRemainingToRead); 6682 bytesRead += bytesRemainingToRead; 6683 oggbs->bytesRemainingInPage -= (drflac_uint32)bytesRemainingToRead; 6684 break; 6685 } 6686 6687 /* If we get here it means some of the requested data is contained in the next pages. */ 6688 if (oggbs->bytesRemainingInPage > 0) { 6689 DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), oggbs->bytesRemainingInPage); 6690 bytesRead += oggbs->bytesRemainingInPage; 6691 pRunningBufferOut += oggbs->bytesRemainingInPage; 6692 oggbs->bytesRemainingInPage = 0; 6693 } 6694 6695 DRFLAC_ASSERT(bytesRemainingToRead > 0); 6696 if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { 6697 break; /* Failed to go to the next page. Might have simply hit the end of the stream. */ 6698 } 6699 } 6700 6701 return bytesRead; 6702 } 6703 6704 static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_origin origin) 6705 { 6706 drflac_oggbs* oggbs = (drflac_oggbs*)pUserData; 6707 int bytesSeeked = 0; 6708 6709 DRFLAC_ASSERT(oggbs != NULL); 6710 DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */ 6711 6712 /* Seeking is always forward which makes things a lot simpler. */ 6713 if (origin == drflac_seek_origin_start) { 6714 if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, drflac_seek_origin_start)) { 6715 return DRFLAC_FALSE; 6716 } 6717 6718 if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) { 6719 return DRFLAC_FALSE; 6720 } 6721 6722 return drflac__on_seek_ogg(pUserData, offset, drflac_seek_origin_current); 6723 } 6724 6725 DRFLAC_ASSERT(origin == drflac_seek_origin_current); 6726 6727 while (bytesSeeked < offset) { 6728 int bytesRemainingToSeek = offset - bytesSeeked; 6729 DRFLAC_ASSERT(bytesRemainingToSeek >= 0); 6730 6731 if (oggbs->bytesRemainingInPage >= (size_t)bytesRemainingToSeek) { 6732 bytesSeeked += bytesRemainingToSeek; 6733 (void)bytesSeeked; /* <-- Silence a dead store warning emitted by Clang Static Analyzer. */ 6734 oggbs->bytesRemainingInPage -= bytesRemainingToSeek; 6735 break; 6736 } 6737 6738 /* If we get here it means some of the requested data is contained in the next pages. */ 6739 if (oggbs->bytesRemainingInPage > 0) { 6740 bytesSeeked += (int)oggbs->bytesRemainingInPage; 6741 oggbs->bytesRemainingInPage = 0; 6742 } 6743 6744 DRFLAC_ASSERT(bytesRemainingToSeek > 0); 6745 if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) { 6746 /* Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch. */ 6747 return DRFLAC_FALSE; 6748 } 6749 } 6750 6751 return DRFLAC_TRUE; 6752 } 6753 6754 6755 drflac_bool32 drflac_ogg__seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex) 6756 { 6757 drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; 6758 drflac_uint64 originalBytePos; 6759 drflac_uint64 runningGranulePosition; 6760 drflac_uint64 runningFrameBytePos; 6761 drflac_uint64 runningPCMFrameCount; 6762 6763 DRFLAC_ASSERT(oggbs != NULL); 6764 6765 originalBytePos = oggbs->currentBytePos; /* For recovery. Points to the OggS identifier. */ 6766 6767 /* First seek to the first frame. */ 6768 if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes)) { 6769 return DRFLAC_FALSE; 6770 } 6771 oggbs->bytesRemainingInPage = 0; 6772 6773 runningGranulePosition = 0; 6774 for (;;) { 6775 if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { 6776 drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start); 6777 return DRFLAC_FALSE; /* Never did find that sample... */ 6778 } 6779 6780 runningFrameBytePos = oggbs->currentBytePos - drflac_ogg__get_page_header_size(&oggbs->currentPageHeader) - oggbs->pageDataSize; 6781 if (oggbs->currentPageHeader.granulePosition >= pcmFrameIndex) { 6782 break; /* The sample is somewhere in the previous page. */ 6783 } 6784 6785 /* 6786 At this point we know the sample is not in the previous page. It could possibly be in this page. For simplicity we 6787 disregard any pages that do not begin a fresh packet. 6788 */ 6789 if ((oggbs->currentPageHeader.headerType & 0x01) == 0) { /* <-- Is it a fresh page? */ 6790 if (oggbs->currentPageHeader.segmentTable[0] >= 2) { 6791 drflac_uint8 firstBytesInPage[2]; 6792 firstBytesInPage[0] = oggbs->pageData[0]; 6793 firstBytesInPage[1] = oggbs->pageData[1]; 6794 6795 if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) { /* <-- Does the page begin with a frame's sync code? */ 6796 runningGranulePosition = oggbs->currentPageHeader.granulePosition; 6797 } 6798 6799 continue; 6800 } 6801 } 6802 } 6803 6804 /* 6805 We found the page that that is closest to the sample, so now we need to find it. The first thing to do is seek to the 6806 start of that page. In the loop above we checked that it was a fresh page which means this page is also the start of 6807 a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until 6808 we find the one containing the target sample. 6809 */ 6810 if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, drflac_seek_origin_start)) { 6811 return DRFLAC_FALSE; 6812 } 6813 if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { 6814 return DRFLAC_FALSE; 6815 } 6816 6817 /* 6818 At this point we'll be sitting on the first byte of the frame header of the first frame in the page. We just keep 6819 looping over these frames until we find the one containing the sample we're after. 6820 */ 6821 runningPCMFrameCount = runningGranulePosition; 6822 for (;;) { 6823 /* 6824 There are two ways to find the sample and seek past irrelevant frames: 6825 1) Use the native FLAC decoder. 6826 2) Use Ogg's framing system. 6827 6828 Both of these options have their own pros and cons. Using the native FLAC decoder is slower because it needs to 6829 do a full decode of the frame. Using Ogg's framing system is faster, but more complicated and involves some code 6830 duplication for the decoding of frame headers. 6831 6832 Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg 6833 bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the 6834 standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks 6835 the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read 6836 using the native FLAC decoding APIs, such as drflac__read_next_flac_frame_header(), need to be re-implemented so as to 6837 avoid the use of the drflac_bs object. 6838 6839 Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons: 6840 1) Seeking is already partially accelerated using Ogg's paging system in the code block above. 6841 2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon. 6842 3) Simplicity. 6843 */ 6844 drflac_uint64 firstPCMFrameInFLACFrame = 0; 6845 drflac_uint64 lastPCMFrameInFLACFrame = 0; 6846 drflac_uint64 pcmFrameCountInThisFrame; 6847 6848 if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { 6849 return DRFLAC_FALSE; 6850 } 6851 6852 drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame); 6853 6854 pcmFrameCountInThisFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1; 6855 6856 /* If we are seeking to the end of the file and we've just hit it, we're done. */ 6857 if (pcmFrameIndex == pFlac->totalPCMFrameCount && (runningPCMFrameCount + pcmFrameCountInThisFrame) == pFlac->totalPCMFrameCount) { 6858 drflac_result result = drflac__decode_flac_frame(pFlac); 6859 if (result == DRFLAC_SUCCESS) { 6860 pFlac->currentPCMFrame = pcmFrameIndex; 6861 pFlac->currentFLACFrame.pcmFramesRemaining = 0; 6862 return DRFLAC_TRUE; 6863 } else { 6864 return DRFLAC_FALSE; 6865 } 6866 } 6867 6868 if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFrame)) { 6869 /* 6870 The sample should be in this FLAC frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend 6871 it never existed and keep iterating. 6872 */ 6873 drflac_result result = drflac__decode_flac_frame(pFlac); 6874 if (result == DRFLAC_SUCCESS) { 6875 /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */ 6876 drflac_uint64 pcmFramesToDecode = (size_t)(pcmFrameIndex - runningPCMFrameCount); /* <-- Safe cast because the maximum number of samples in a frame is 65535. */ 6877 if (pcmFramesToDecode == 0) { 6878 return DRFLAC_TRUE; 6879 } 6880 6881 pFlac->currentPCMFrame = runningPCMFrameCount; 6882 6883 return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; /* <-- If this fails, something bad has happened (it should never fail). */ 6884 } else { 6885 if (result == DRFLAC_CRC_MISMATCH) { 6886 continue; /* CRC mismatch. Pretend this frame never existed. */ 6887 } else { 6888 return DRFLAC_FALSE; 6889 } 6890 } 6891 } else { 6892 /* 6893 It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this 6894 frame never existed and leave the running sample count untouched. 6895 */ 6896 drflac_result result = drflac__seek_to_next_flac_frame(pFlac); 6897 if (result == DRFLAC_SUCCESS) { 6898 runningPCMFrameCount += pcmFrameCountInThisFrame; 6899 } else { 6900 if (result == DRFLAC_CRC_MISMATCH) { 6901 continue; /* CRC mismatch. Pretend this frame never existed. */ 6902 } else { 6903 return DRFLAC_FALSE; 6904 } 6905 } 6906 } 6907 } 6908 } 6909 6910 6911 6912 drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed) 6913 { 6914 drflac_ogg_page_header header; 6915 drflac_uint32 crc32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32; 6916 drflac_uint32 bytesRead = 0; 6917 6918 /* Pre Condition: The bit stream should be sitting just past the 4-byte OggS capture pattern. */ 6919 (void)relaxed; 6920 6921 pInit->container = drflac_container_ogg; 6922 pInit->oggFirstBytePos = 0; 6923 6924 /* 6925 We'll get here if the first 4 bytes of the stream were the OggS capture pattern, however it doesn't necessarily mean the 6926 stream includes FLAC encoded audio. To check for this we need to scan the beginning-of-stream page markers and check if 6927 any match the FLAC specification. Important to keep in mind that the stream may be multiplexed. 6928 */ 6929 if (drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { 6930 return DRFLAC_FALSE; 6931 } 6932 pInit->runningFilePos += bytesRead; 6933 6934 for (;;) { 6935 int pageBodySize; 6936 6937 /* Break if we're past the beginning of stream page. */ 6938 if ((header.headerType & 0x02) == 0) { 6939 return DRFLAC_FALSE; 6940 } 6941 6942 /* Check if it's a FLAC header. */ 6943 pageBodySize = drflac_ogg__get_page_body_size(&header); 6944 if (pageBodySize == 51) { /* 51 = the lacing value of the FLAC header packet. */ 6945 /* It could be a FLAC page... */ 6946 drflac_uint32 bytesRemainingInPage = pageBodySize; 6947 drflac_uint8 packetType; 6948 6949 if (onRead(pUserData, &packetType, 1) != 1) { 6950 return DRFLAC_FALSE; 6951 } 6952 6953 bytesRemainingInPage -= 1; 6954 if (packetType == 0x7F) { 6955 /* Increasingly more likely to be a FLAC page... */ 6956 drflac_uint8 sig[4]; 6957 if (onRead(pUserData, sig, 4) != 4) { 6958 return DRFLAC_FALSE; 6959 } 6960 6961 bytesRemainingInPage -= 4; 6962 if (sig[0] == 'F' && sig[1] == 'L' && sig[2] == 'A' && sig[3] == 'C') { 6963 /* Almost certainly a FLAC page... */ 6964 drflac_uint8 mappingVersion[2]; 6965 if (onRead(pUserData, mappingVersion, 2) != 2) { 6966 return DRFLAC_FALSE; 6967 } 6968 6969 if (mappingVersion[0] != 1) { 6970 return DRFLAC_FALSE; /* Only supporting version 1.x of the Ogg mapping. */ 6971 } 6972 6973 /* 6974 The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to 6975 be handling it in a generic way based on the serial number and packet types. 6976 */ 6977 if (!onSeek(pUserData, 2, drflac_seek_origin_current)) { 6978 return DRFLAC_FALSE; 6979 } 6980 6981 /* Expecting the native FLAC signature "fLaC". */ 6982 if (onRead(pUserData, sig, 4) != 4) { 6983 return DRFLAC_FALSE; 6984 } 6985 6986 if (sig[0] == 'f' && sig[1] == 'L' && sig[2] == 'a' && sig[3] == 'C') { 6987 /* The remaining data in the page should be the STREAMINFO block. */ 6988 drflac_streaminfo streaminfo; 6989 drflac_uint8 isLastBlock; 6990 drflac_uint8 blockType; 6991 drflac_uint32 blockSize; 6992 if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) { 6993 return DRFLAC_FALSE; 6994 } 6995 6996 if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) { 6997 return DRFLAC_FALSE; /* Invalid block type. First block must be the STREAMINFO block. */ 6998 } 6999 7000 if (drflac__read_streaminfo(onRead, pUserData, &streaminfo)) { 7001 /* Success! */ 7002 pInit->hasStreamInfoBlock = DRFLAC_TRUE; 7003 pInit->sampleRate = streaminfo.sampleRate; 7004 pInit->channels = streaminfo.channels; 7005 pInit->bitsPerSample = streaminfo.bitsPerSample; 7006 pInit->totalPCMFrameCount = streaminfo.totalPCMFrameCount; 7007 pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames; 7008 pInit->hasMetadataBlocks = !isLastBlock; 7009 7010 if (onMeta) { 7011 drflac_metadata metadata; 7012 metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO; 7013 metadata.pRawData = NULL; 7014 metadata.rawDataSize = 0; 7015 metadata.data.streaminfo = streaminfo; 7016 onMeta(pUserDataMD, &metadata); 7017 } 7018 7019 pInit->runningFilePos += pageBodySize; 7020 pInit->oggFirstBytePos = pInit->runningFilePos - 79; /* Subtracting 79 will place us right on top of the "OggS" identifier of the FLAC bos page. */ 7021 pInit->oggSerial = header.serialNumber; 7022 pInit->oggBosHeader = header; 7023 break; 7024 } else { 7025 /* Failed to read STREAMINFO block. Aww, so close... */ 7026 return DRFLAC_FALSE; 7027 } 7028 } else { 7029 /* Invalid file. */ 7030 return DRFLAC_FALSE; 7031 } 7032 } else { 7033 /* Not a FLAC header. Skip it. */ 7034 if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) { 7035 return DRFLAC_FALSE; 7036 } 7037 } 7038 } else { 7039 /* Not a FLAC header. Seek past the entire page and move on to the next. */ 7040 if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) { 7041 return DRFLAC_FALSE; 7042 } 7043 } 7044 } else { 7045 if (!onSeek(pUserData, pageBodySize, drflac_seek_origin_current)) { 7046 return DRFLAC_FALSE; 7047 } 7048 } 7049 7050 pInit->runningFilePos += pageBodySize; 7051 7052 7053 /* Read the header of the next page. */ 7054 if (drflac_ogg__read_page_header(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { 7055 return DRFLAC_FALSE; 7056 } 7057 pInit->runningFilePos += bytesRead; 7058 } 7059 7060 /* 7061 If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next 7062 packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialization phase for Ogg is to create the 7063 Ogg bistream object. 7064 */ 7065 pInit->hasMetadataBlocks = DRFLAC_TRUE; /* <-- Always have at least VORBIS_COMMENT metadata block. */ 7066 return DRFLAC_TRUE; 7067 } 7068 #endif 7069 7070 drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD) 7071 { 7072 drflac_bool32 relaxed; 7073 drflac_uint8 id[4]; 7074 7075 if (pInit == NULL || onRead == NULL || onSeek == NULL) { 7076 return DRFLAC_FALSE; 7077 } 7078 7079 DRFLAC_ZERO_MEMORY(pInit, sizeof(*pInit)); 7080 pInit->onRead = onRead; 7081 pInit->onSeek = onSeek; 7082 pInit->onMeta = onMeta; 7083 pInit->container = container; 7084 pInit->pUserData = pUserData; 7085 pInit->pUserDataMD = pUserDataMD; 7086 7087 pInit->bs.onRead = onRead; 7088 pInit->bs.onSeek = onSeek; 7089 pInit->bs.pUserData = pUserData; 7090 drflac__reset_cache(&pInit->bs); 7091 7092 7093 /* If the container is explicitly defined then we can try opening in relaxed mode. */ 7094 relaxed = container != drflac_container_unknown; 7095 7096 /* Skip over any ID3 tags. */ 7097 for (;;) { 7098 if (onRead(pUserData, id, 4) != 4) { 7099 return DRFLAC_FALSE; /* Ran out of data. */ 7100 } 7101 pInit->runningFilePos += 4; 7102 7103 if (id[0] == 'I' && id[1] == 'D' && id[2] == '3') { 7104 drflac_uint8 header[6]; 7105 drflac_uint8 flags; 7106 drflac_uint32 headerSize; 7107 7108 if (onRead(pUserData, header, 6) != 6) { 7109 return DRFLAC_FALSE; /* Ran out of data. */ 7110 } 7111 pInit->runningFilePos += 6; 7112 7113 flags = header[1]; 7114 7115 DRFLAC_COPY_MEMORY(&headerSize, header+2, 4); 7116 headerSize = drflac__unsynchsafe_32(drflac__be2host_32(headerSize)); 7117 if (flags & 0x10) { 7118 headerSize += 10; 7119 } 7120 7121 if (!onSeek(pUserData, headerSize, drflac_seek_origin_current)) { 7122 return DRFLAC_FALSE; /* Failed to seek past the tag. */ 7123 } 7124 pInit->runningFilePos += headerSize; 7125 } else { 7126 break; 7127 } 7128 } 7129 7130 if (id[0] == 'f' && id[1] == 'L' && id[2] == 'a' && id[3] == 'C') { 7131 return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); 7132 } 7133 #ifndef DR_FLAC_NO_OGG 7134 if (id[0] == 'O' && id[1] == 'g' && id[2] == 'g' && id[3] == 'S') { 7135 return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); 7136 } 7137 #endif 7138 7139 /* If we get here it means we likely don't have a header. Try opening in relaxed mode, if applicable. */ 7140 if (relaxed) { 7141 if (container == drflac_container_native) { 7142 return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); 7143 } 7144 #ifndef DR_FLAC_NO_OGG 7145 if (container == drflac_container_ogg) { 7146 return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); 7147 } 7148 #endif 7149 } 7150 7151 /* Unsupported container. */ 7152 return DRFLAC_FALSE; 7153 } 7154 7155 void drflac__init_from_info(drflac* pFlac, drflac_init_info* pInit) 7156 { 7157 DRFLAC_ASSERT(pFlac != NULL); 7158 DRFLAC_ASSERT(pInit != NULL); 7159 7160 DRFLAC_ZERO_MEMORY(pFlac, sizeof(*pFlac)); 7161 pFlac->bs = pInit->bs; 7162 pFlac->onMeta = pInit->onMeta; 7163 pFlac->pUserDataMD = pInit->pUserDataMD; 7164 pFlac->maxBlockSizeInPCMFrames = pInit->maxBlockSizeInPCMFrames; 7165 pFlac->sampleRate = pInit->sampleRate; 7166 pFlac->channels = (drflac_uint8)pInit->channels; 7167 pFlac->bitsPerSample = (drflac_uint8)pInit->bitsPerSample; 7168 pFlac->totalPCMFrameCount = pInit->totalPCMFrameCount; 7169 pFlac->container = pInit->container; 7170 } 7171 7172 7173 drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD, const drflac_allocation_callbacks* pAllocationCallbacks) 7174 { 7175 drflac_init_info init; 7176 drflac_uint32 allocationSize; 7177 drflac_uint32 wholeSIMDVectorCountPerChannel; 7178 drflac_uint32 decodedSamplesAllocationSize; 7179 #ifndef DR_FLAC_NO_OGG 7180 drflac_oggbs oggbs; 7181 #endif 7182 drflac_uint64 firstFramePos; 7183 drflac_uint64 seektablePos; 7184 drflac_uint32 seektableSize; 7185 drflac_allocation_callbacks allocationCallbacks; 7186 drflac* pFlac; 7187 7188 /* CPU support first. */ 7189 drflac__init_cpu_caps(); 7190 7191 if (!drflac__init_private(&init, onRead, onSeek, onMeta, container, pUserData, pUserDataMD)) { 7192 return NULL; 7193 } 7194 7195 if (pAllocationCallbacks != NULL) { 7196 allocationCallbacks = *pAllocationCallbacks; 7197 if (allocationCallbacks.onFree == NULL || (allocationCallbacks.onMalloc == NULL && allocationCallbacks.onRealloc == NULL)) { 7198 return NULL; /* Invalid allocation callbacks. */ 7199 } 7200 } else { 7201 allocationCallbacks.pUserData = NULL; 7202 allocationCallbacks.onMalloc = drflac__malloc_default; 7203 allocationCallbacks.onRealloc = drflac__realloc_default; 7204 allocationCallbacks.onFree = drflac__free_default; 7205 } 7206 7207 7208 /* 7209 The size of the allocation for the drflac object needs to be large enough to fit the following: 7210 1) The main members of the drflac structure 7211 2) A block of memory large enough to store the decoded samples of the largest frame in the stream 7212 3) If the container is Ogg, a drflac_oggbs object 7213 7214 The complicated part of the allocation is making sure there's enough room the decoded samples, taking into consideration 7215 the different SIMD instruction sets. 7216 */ 7217 allocationSize = sizeof(drflac); 7218 7219 /* 7220 The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector 7221 we are supporting. 7222 */ 7223 if ((init.maxBlockSizeInPCMFrames % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) { 7224 wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))); 7225 } else { 7226 wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1; 7227 } 7228 7229 decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels; 7230 7231 allocationSize += decodedSamplesAllocationSize; 7232 allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE; /* Allocate extra bytes to ensure we have enough for alignment. */ 7233 7234 #ifndef DR_FLAC_NO_OGG 7235 /* There's additional data required for Ogg streams. */ 7236 if (init.container == drflac_container_ogg) { 7237 allocationSize += sizeof(drflac_oggbs); 7238 } 7239 7240 DRFLAC_ZERO_MEMORY(&oggbs, sizeof(oggbs)); 7241 if (init.container == drflac_container_ogg) { 7242 oggbs.onRead = onRead; 7243 oggbs.onSeek = onSeek; 7244 oggbs.pUserData = pUserData; 7245 oggbs.currentBytePos = init.oggFirstBytePos; 7246 oggbs.firstBytePos = init.oggFirstBytePos; 7247 oggbs.serialNumber = init.oggSerial; 7248 oggbs.bosPageHeader = init.oggBosHeader; 7249 oggbs.bytesRemainingInPage = 0; 7250 } 7251 #endif 7252 7253 /* 7254 This part is a bit awkward. We need to load the seektable so that it can be referenced in-memory, but I want the drflac object to 7255 consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading 7256 and decoding the metadata. 7257 */ 7258 firstFramePos = 42; /* <-- We know we are at byte 42 at this point. */ 7259 seektablePos = 0; 7260 seektableSize = 0; 7261 if (init.hasMetadataBlocks) { 7262 drflac_read_proc onReadOverride = onRead; 7263 drflac_seek_proc onSeekOverride = onSeek; 7264 void* pUserDataOverride = pUserData; 7265 7266 #ifndef DR_FLAC_NO_OGG 7267 if (init.container == drflac_container_ogg) { 7268 onReadOverride = drflac__on_read_ogg; 7269 onSeekOverride = drflac__on_seek_ogg; 7270 pUserDataOverride = (void*)&oggbs; 7271 } 7272 #endif 7273 7274 if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize, &allocationCallbacks)) { 7275 return NULL; 7276 } 7277 7278 allocationSize += seektableSize; 7279 } 7280 7281 7282 pFlac = (drflac*)drflac__malloc_from_callbacks(allocationSize, &allocationCallbacks); 7283 drflac__init_from_info(pFlac, &init); 7284 pFlac->allocationCallbacks = allocationCallbacks; 7285 pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE); 7286 7287 #ifndef DR_FLAC_NO_OGG 7288 if (init.container == drflac_container_ogg) { 7289 drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize); 7290 *pInternalOggbs = oggbs; 7291 7292 /* The Ogg bistream needs to be layered on top of the original bitstream. */ 7293 pFlac->bs.onRead = drflac__on_read_ogg; 7294 pFlac->bs.onSeek = drflac__on_seek_ogg; 7295 pFlac->bs.pUserData = (void*)pInternalOggbs; 7296 pFlac->_oggbs = (void*)pInternalOggbs; 7297 } 7298 #endif 7299 7300 pFlac->firstFLACFramePosInBytes = firstFramePos; 7301 7302 /* NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now. */ 7303 #ifndef DR_FLAC_NO_OGG 7304 if (init.container == drflac_container_ogg) 7305 { 7306 pFlac->pSeekpoints = NULL; 7307 pFlac->seekpointCount = 0; 7308 } 7309 else 7310 #endif 7311 { 7312 /* If we have a seektable we need to load it now, making sure we move back to where we were previously. */ 7313 if (seektablePos != 0) { 7314 pFlac->seekpointCount = seektableSize / sizeof(*pFlac->pSeekpoints); 7315 pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize); 7316 7317 /* Seek to the seektable, then just read directly into our seektable buffer. */ 7318 if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) { 7319 if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints, seektableSize) == seektableSize) { 7320 /* Endian swap. */ 7321 drflac_uint32 iSeekpoint; 7322 for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) { 7323 pFlac->pSeekpoints[iSeekpoint].firstPCMFrame = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstPCMFrame); 7324 pFlac->pSeekpoints[iSeekpoint].flacFrameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].flacFrameOffset); 7325 pFlac->pSeekpoints[iSeekpoint].pcmFrameCount = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].pcmFrameCount); 7326 } 7327 } else { 7328 /* Failed to read the seektable. Pretend we don't have one. */ 7329 pFlac->pSeekpoints = NULL; 7330 pFlac->seekpointCount = 0; 7331 } 7332 7333 /* We need to seek back to where we were. If this fails it's a critical error. */ 7334 if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFLACFramePosInBytes, drflac_seek_origin_start)) { 7335 drflac__free_from_callbacks(pFlac, &allocationCallbacks); 7336 return NULL; 7337 } 7338 } else { 7339 /* Failed to seek to the seektable. Ominous sign, but for now we can just pretend we don't have one. */ 7340 pFlac->pSeekpoints = NULL; 7341 pFlac->seekpointCount = 0; 7342 } 7343 } 7344 } 7345 7346 7347 /* 7348 If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode 7349 the first frame. 7350 */ 7351 if (!init.hasStreamInfoBlock) { 7352 pFlac->currentFLACFrame.header = init.firstFrameHeader; 7353 do 7354 { 7355 drflac_result result = drflac__decode_flac_frame(pFlac); 7356 if (result == DRFLAC_SUCCESS) { 7357 break; 7358 } else { 7359 if (result == DRFLAC_CRC_MISMATCH) { 7360 if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { 7361 drflac__free_from_callbacks(pFlac, &allocationCallbacks); 7362 return NULL; 7363 } 7364 continue; 7365 } else { 7366 drflac__free_from_callbacks(pFlac, &allocationCallbacks); 7367 return NULL; 7368 } 7369 } 7370 } while (1); 7371 } 7372 7373 return pFlac; 7374 } 7375 7376 7377 7378 #ifndef DR_FLAC_NO_STDIO 7379 #include <stdio.h> 7380 7381 static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead) 7382 { 7383 return fread(bufferOut, 1, bytesToRead, (FILE*)pUserData); 7384 } 7385 7386 static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin) 7387 { 7388 DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */ 7389 7390 return fseek((FILE*)pUserData, offset, (origin == drflac_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0; 7391 } 7392 7393 static FILE* drflac__fopen(const char* filename) 7394 { 7395 FILE* pFile; 7396 #if defined(_MSC_VER) && _MSC_VER >= 1400 7397 if (fopen_s(&pFile, filename, "rb") != 0) { 7398 return NULL; 7399 } 7400 #else 7401 pFile = fopen(filename, "rb"); 7402 if (pFile == NULL) { 7403 return NULL; 7404 } 7405 #endif 7406 7407 return pFile; 7408 } 7409 7410 7411 drflac* drflac_open_file(const char* filename, const drflac_allocation_callbacks* pAllocationCallbacks) 7412 { 7413 drflac* pFlac; 7414 FILE* pFile; 7415 7416 pFile = drflac__fopen(filename); 7417 if (pFile == NULL) { 7418 return NULL; 7419 } 7420 7421 pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks); 7422 if (pFlac == NULL) { 7423 fclose(pFile); 7424 return NULL; 7425 } 7426 7427 return pFlac; 7428 } 7429 7430 drflac* drflac_open_file_with_metadata(const char* filename, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) 7431 { 7432 drflac* pFlac; 7433 FILE* pFile; 7434 7435 pFile = drflac__fopen(filename); 7436 if (pFile == NULL) { 7437 return NULL; 7438 } 7439 7440 pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks); 7441 if (pFlac == NULL) { 7442 fclose(pFile); 7443 return pFlac; 7444 } 7445 7446 return pFlac; 7447 } 7448 #endif /* DR_FLAC_NO_STDIO */ 7449 7450 static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead) 7451 { 7452 drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData; 7453 size_t bytesRemaining; 7454 7455 DRFLAC_ASSERT(memoryStream != NULL); 7456 DRFLAC_ASSERT(memoryStream->dataSize >= memoryStream->currentReadPos); 7457 7458 bytesRemaining = memoryStream->dataSize - memoryStream->currentReadPos; 7459 if (bytesToRead > bytesRemaining) { 7460 bytesToRead = bytesRemaining; 7461 } 7462 7463 if (bytesToRead > 0) { 7464 DRFLAC_COPY_MEMORY(bufferOut, memoryStream->data + memoryStream->currentReadPos, bytesToRead); 7465 memoryStream->currentReadPos += bytesToRead; 7466 } 7467 7468 return bytesToRead; 7469 } 7470 7471 static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin) 7472 { 7473 drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData; 7474 7475 DRFLAC_ASSERT(memoryStream != NULL); 7476 DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */ 7477 7478 if (offset > (drflac_int64)memoryStream->dataSize) { 7479 return DRFLAC_FALSE; 7480 } 7481 7482 if (origin == drflac_seek_origin_current) { 7483 if (memoryStream->currentReadPos + offset <= memoryStream->dataSize) { 7484 memoryStream->currentReadPos += offset; 7485 } else { 7486 return DRFLAC_FALSE; /* Trying to seek too far forward. */ 7487 } 7488 } else { 7489 if ((drflac_uint32)offset <= memoryStream->dataSize) { 7490 memoryStream->currentReadPos = offset; 7491 } else { 7492 return DRFLAC_FALSE; /* Trying to seek too far forward. */ 7493 } 7494 } 7495 7496 return DRFLAC_TRUE; 7497 } 7498 7499 drflac* drflac_open_memory(const void* data, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks) 7500 { 7501 drflac__memory_stream memoryStream; 7502 drflac* pFlac; 7503 7504 memoryStream.data = (const unsigned char*)data; 7505 memoryStream.dataSize = dataSize; 7506 memoryStream.currentReadPos = 0; 7507 pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, &memoryStream, pAllocationCallbacks); 7508 if (pFlac == NULL) { 7509 return NULL; 7510 } 7511 7512 pFlac->memoryStream = memoryStream; 7513 7514 /* This is an awful hack... */ 7515 #ifndef DR_FLAC_NO_OGG 7516 if (pFlac->container == drflac_container_ogg) 7517 { 7518 drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; 7519 oggbs->pUserData = &pFlac->memoryStream; 7520 } 7521 else 7522 #endif 7523 { 7524 pFlac->bs.pUserData = &pFlac->memoryStream; 7525 } 7526 7527 return pFlac; 7528 } 7529 7530 drflac* drflac_open_memory_with_metadata(const void* data, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) 7531 { 7532 drflac__memory_stream memoryStream; 7533 drflac* pFlac; 7534 7535 memoryStream.data = (const unsigned char*)data; 7536 memoryStream.dataSize = dataSize; 7537 memoryStream.currentReadPos = 0; 7538 pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData, pAllocationCallbacks); 7539 if (pFlac == NULL) { 7540 return NULL; 7541 } 7542 7543 pFlac->memoryStream = memoryStream; 7544 7545 /* This is an awful hack... */ 7546 #ifndef DR_FLAC_NO_OGG 7547 if (pFlac->container == drflac_container_ogg) 7548 { 7549 drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; 7550 oggbs->pUserData = &pFlac->memoryStream; 7551 } 7552 else 7553 #endif 7554 { 7555 pFlac->bs.pUserData = &pFlac->memoryStream; 7556 } 7557 7558 return pFlac; 7559 } 7560 7561 7562 7563 drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) 7564 { 7565 return drflac_open_with_metadata_private(onRead, onSeek, NULL, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks); 7566 } 7567 drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) 7568 { 7569 return drflac_open_with_metadata_private(onRead, onSeek, NULL, container, pUserData, pUserData, pAllocationCallbacks); 7570 } 7571 7572 drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) 7573 { 7574 return drflac_open_with_metadata_private(onRead, onSeek, onMeta, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks); 7575 } 7576 drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) 7577 { 7578 return drflac_open_with_metadata_private(onRead, onSeek, onMeta, container, pUserData, pUserData, pAllocationCallbacks); 7579 } 7580 7581 void drflac_close(drflac* pFlac) 7582 { 7583 if (pFlac == NULL) { 7584 return; 7585 } 7586 7587 #ifndef DR_FLAC_NO_STDIO 7588 /* 7589 If we opened the file with drflac_open_file() we will want to close the file handle. We can know whether or not drflac_open_file() 7590 was used by looking at the callbacks. 7591 */ 7592 if (pFlac->bs.onRead == drflac__on_read_stdio) { 7593 fclose((FILE*)pFlac->bs.pUserData); 7594 } 7595 7596 #ifndef DR_FLAC_NO_OGG 7597 /* Need to clean up Ogg streams a bit differently due to the way the bit streaming is chained. */ 7598 if (pFlac->container == drflac_container_ogg) { 7599 drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; 7600 DRFLAC_ASSERT(pFlac->bs.onRead == drflac__on_read_ogg); 7601 7602 if (oggbs->onRead == drflac__on_read_stdio) { 7603 fclose((FILE*)oggbs->pUserData); 7604 } 7605 } 7606 #endif 7607 #endif 7608 7609 drflac__free_from_callbacks(pFlac, &pFlac->allocationCallbacks); 7610 } 7611 7612 7613 #if 0 7614 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 7615 { 7616 drflac_uint64 i; 7617 for (i = 0; i < frameCount; ++i) { 7618 drflac_int32 left = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 7619 drflac_int32 side = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 7620 drflac_int32 right = left - side; 7621 7622 pOutputSamples[i*2+0] = left; 7623 pOutputSamples[i*2+1] = right; 7624 } 7625 } 7626 #endif 7627 7628 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 7629 { 7630 drflac_uint64 i; 7631 drflac_uint64 frameCount4 = frameCount >> 2; 7632 7633 drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 7634 drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 7635 for (i = 0; i < frameCount4; ++i) { 7636 drflac_int32 left0 = pInputSamples0[i*4+0] << shift0; 7637 drflac_int32 left1 = pInputSamples0[i*4+1] << shift0; 7638 drflac_int32 left2 = pInputSamples0[i*4+2] << shift0; 7639 drflac_int32 left3 = pInputSamples0[i*4+3] << shift0; 7640 7641 drflac_int32 side0 = pInputSamples1[i*4+0] << shift1; 7642 drflac_int32 side1 = pInputSamples1[i*4+1] << shift1; 7643 drflac_int32 side2 = pInputSamples1[i*4+2] << shift1; 7644 drflac_int32 side3 = pInputSamples1[i*4+3] << shift1; 7645 7646 drflac_int32 right0 = left0 - side0; 7647 drflac_int32 right1 = left1 - side1; 7648 drflac_int32 right2 = left2 - side2; 7649 drflac_int32 right3 = left3 - side3; 7650 7651 pOutputSamples[i*8+0] = left0; 7652 pOutputSamples[i*8+1] = right0; 7653 pOutputSamples[i*8+2] = left1; 7654 pOutputSamples[i*8+3] = right1; 7655 pOutputSamples[i*8+4] = left2; 7656 pOutputSamples[i*8+5] = right2; 7657 pOutputSamples[i*8+6] = left3; 7658 pOutputSamples[i*8+7] = right3; 7659 } 7660 7661 for (i = (frameCount4 << 2); i < frameCount; ++i) { 7662 int left = pInputSamples0[i] << shift0; 7663 int side = pInputSamples1[i] << shift1; 7664 int right = left - side; 7665 7666 pOutputSamples[i*2+0] = left; 7667 pOutputSamples[i*2+1] = right; 7668 } 7669 } 7670 7671 #if defined(DRFLAC_SUPPORT_SSE2) 7672 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 7673 { 7674 drflac_uint64 frameCount4; 7675 drflac_int32 shift0; 7676 drflac_int32 shift1; 7677 drflac_uint64 i; 7678 7679 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 7680 7681 frameCount4 = frameCount >> 2; 7682 7683 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 7684 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 7685 7686 for (i = 0; i < frameCount4; ++i) { 7687 __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); 7688 __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); 7689 __m128i right = _mm_sub_epi32(left, side); 7690 7691 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); 7692 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); 7693 } 7694 7695 for (i = (frameCount4 << 2); i < frameCount; ++i) { 7696 drflac_int32 left = pInputSamples0[i] << shift0; 7697 drflac_int32 side = pInputSamples1[i] << shift1; 7698 drflac_int32 right = left - side; 7699 7700 pOutputSamples[i*2+0] = left; 7701 pOutputSamples[i*2+1] = right; 7702 } 7703 } 7704 #endif 7705 7706 #if defined(DRFLAC_SUPPORT_NEON) 7707 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 7708 { 7709 drflac_uint64 frameCount4; 7710 drflac_int32 shift0; 7711 drflac_int32 shift1; 7712 drflac_uint64 i; 7713 int32x4_t shift0_4; 7714 int32x4_t shift1_4; 7715 7716 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 7717 7718 frameCount4 = frameCount >> 2; 7719 7720 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 7721 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 7722 7723 shift0_4 = vdupq_n_s32(shift0); 7724 shift1_4 = vdupq_n_s32(shift1); 7725 7726 for (i = 0; i < frameCount4; ++i) { 7727 int32x4_t left; 7728 int32x4_t side; 7729 int32x4_t right; 7730 7731 left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); 7732 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); 7733 right = vsubq_s32(left, side); 7734 7735 drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); 7736 } 7737 7738 for (i = (frameCount4 << 2); i < frameCount; ++i) { 7739 drflac_int32 left = pInputSamples0[i] << shift0; 7740 drflac_int32 side = pInputSamples1[i] << shift1; 7741 drflac_int32 right = left - side; 7742 7743 pOutputSamples[i*2+0] = left; 7744 pOutputSamples[i*2+1] = right; 7745 } 7746 } 7747 #endif 7748 7749 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 7750 { 7751 #if defined(DRFLAC_SUPPORT_SSE2) 7752 if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { 7753 drflac_read_pcm_frames_s32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 7754 } else 7755 #elif defined(DRFLAC_SUPPORT_NEON) 7756 if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { 7757 drflac_read_pcm_frames_s32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 7758 } else 7759 #endif 7760 { 7761 /* Scalar fallback. */ 7762 #if 0 7763 drflac_read_pcm_frames_s32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 7764 #else 7765 drflac_read_pcm_frames_s32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 7766 #endif 7767 } 7768 } 7769 7770 7771 #if 0 7772 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 7773 { 7774 drflac_uint64 i; 7775 for (i = 0; i < frameCount; ++i) { 7776 drflac_int32 side = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 7777 drflac_int32 right = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 7778 drflac_int32 left = right + side; 7779 7780 pOutputSamples[i*2+0] = left; 7781 pOutputSamples[i*2+1] = right; 7782 } 7783 } 7784 #endif 7785 7786 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 7787 { 7788 drflac_uint64 i; 7789 drflac_uint64 frameCount4 = frameCount >> 2; 7790 7791 drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 7792 drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 7793 for (i = 0; i < frameCount4; ++i) { 7794 drflac_int32 side0 = pInputSamples0[i*4+0] << shift0; 7795 drflac_int32 side1 = pInputSamples0[i*4+1] << shift0; 7796 drflac_int32 side2 = pInputSamples0[i*4+2] << shift0; 7797 drflac_int32 side3 = pInputSamples0[i*4+3] << shift0; 7798 7799 drflac_int32 right0 = pInputSamples1[i*4+0] << shift1; 7800 drflac_int32 right1 = pInputSamples1[i*4+1] << shift1; 7801 drflac_int32 right2 = pInputSamples1[i*4+2] << shift1; 7802 drflac_int32 right3 = pInputSamples1[i*4+3] << shift1; 7803 7804 drflac_int32 left0 = right0 + side0; 7805 drflac_int32 left1 = right1 + side1; 7806 drflac_int32 left2 = right2 + side2; 7807 drflac_int32 left3 = right3 + side3; 7808 7809 pOutputSamples[i*8+0] = left0; 7810 pOutputSamples[i*8+1] = right0; 7811 pOutputSamples[i*8+2] = left1; 7812 pOutputSamples[i*8+3] = right1; 7813 pOutputSamples[i*8+4] = left2; 7814 pOutputSamples[i*8+5] = right2; 7815 pOutputSamples[i*8+6] = left3; 7816 pOutputSamples[i*8+7] = right3; 7817 } 7818 7819 for (i = (frameCount4 << 2); i < frameCount; ++i) { 7820 drflac_int32 side = pInputSamples0[i] << shift0; 7821 drflac_int32 right = pInputSamples1[i] << shift1; 7822 drflac_int32 left = right + side; 7823 7824 pOutputSamples[i*2+0] = left; 7825 pOutputSamples[i*2+1] = right; 7826 } 7827 } 7828 7829 #if defined(DRFLAC_SUPPORT_SSE2) 7830 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 7831 { 7832 drflac_uint64 frameCount4; 7833 drflac_int32 shift0; 7834 drflac_int32 shift1; 7835 drflac_uint64 i; 7836 7837 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 7838 7839 frameCount4 = frameCount >> 2; 7840 7841 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 7842 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 7843 7844 for (i = 0; i < frameCount4; ++i) { 7845 __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); 7846 __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); 7847 __m128i left = _mm_add_epi32(right, side); 7848 7849 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); 7850 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); 7851 } 7852 7853 for (i = (frameCount4 << 2); i < frameCount; ++i) { 7854 drflac_int32 side = pInputSamples0[i] << shift0; 7855 drflac_int32 right = pInputSamples1[i] << shift1; 7856 drflac_int32 left = right + side; 7857 7858 pOutputSamples[i*2+0] = left; 7859 pOutputSamples[i*2+1] = right; 7860 } 7861 } 7862 #endif 7863 7864 #if defined(DRFLAC_SUPPORT_NEON) 7865 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 7866 { 7867 drflac_uint64 frameCount4; 7868 drflac_int32 shift0; 7869 drflac_int32 shift1; 7870 drflac_uint64 i; 7871 int32x4_t shift0_4; 7872 int32x4_t shift1_4; 7873 7874 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 7875 7876 frameCount4 = frameCount >> 2; 7877 7878 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 7879 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 7880 7881 shift0_4 = vdupq_n_s32(shift0); 7882 shift1_4 = vdupq_n_s32(shift1); 7883 7884 for (i = 0; i < frameCount4; ++i) { 7885 int32x4_t side; 7886 int32x4_t right; 7887 int32x4_t left; 7888 7889 side = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); 7890 right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); 7891 left = vaddq_s32(right, side); 7892 7893 drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); 7894 } 7895 7896 for (i = (frameCount4 << 2); i < frameCount; ++i) { 7897 drflac_int32 side = pInputSamples0[i] << shift0; 7898 drflac_int32 right = pInputSamples1[i] << shift1; 7899 drflac_int32 left = right + side; 7900 7901 pOutputSamples[i*2+0] = left; 7902 pOutputSamples[i*2+1] = right; 7903 } 7904 } 7905 #endif 7906 7907 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 7908 { 7909 #if defined(DRFLAC_SUPPORT_SSE2) 7910 if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { 7911 drflac_read_pcm_frames_s32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 7912 } else 7913 #elif defined(DRFLAC_SUPPORT_NEON) 7914 if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { 7915 drflac_read_pcm_frames_s32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 7916 } else 7917 #endif 7918 { 7919 /* Scalar fallback. */ 7920 #if 0 7921 drflac_read_pcm_frames_s32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 7922 #else 7923 drflac_read_pcm_frames_s32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 7924 #endif 7925 } 7926 } 7927 7928 7929 #if 0 7930 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 7931 { 7932 for (drflac_uint64 i = 0; i < frameCount; ++i) { 7933 int mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 7934 int side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 7935 7936 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 7937 7938 pOutputSamples[i*2+0] = ((mid + side) >> 1) << unusedBitsPerSample; 7939 pOutputSamples[i*2+1] = ((mid - side) >> 1) << unusedBitsPerSample; 7940 } 7941 } 7942 #endif 7943 7944 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 7945 { 7946 drflac_uint64 i; 7947 drflac_uint64 frameCount4 = frameCount >> 2; 7948 7949 drflac_int32 shift = unusedBitsPerSample; 7950 if (shift > 0) { 7951 shift -= 1; 7952 for (i = 0; i < frameCount4; ++i) { 7953 drflac_int32 temp0L; 7954 drflac_int32 temp1L; 7955 drflac_int32 temp2L; 7956 drflac_int32 temp3L; 7957 drflac_int32 temp0R; 7958 drflac_int32 temp1R; 7959 drflac_int32 temp2R; 7960 drflac_int32 temp3R; 7961 7962 drflac_int32 mid0 = pInputSamples0[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 7963 drflac_int32 mid1 = pInputSamples0[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 7964 drflac_int32 mid2 = pInputSamples0[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 7965 drflac_int32 mid3 = pInputSamples0[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 7966 7967 drflac_int32 side0 = pInputSamples1[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 7968 drflac_int32 side1 = pInputSamples1[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 7969 drflac_int32 side2 = pInputSamples1[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 7970 drflac_int32 side3 = pInputSamples1[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 7971 7972 mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01); 7973 mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01); 7974 mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01); 7975 mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01); 7976 7977 temp0L = ((mid0 + side0) << shift); 7978 temp1L = ((mid1 + side1) << shift); 7979 temp2L = ((mid2 + side2) << shift); 7980 temp3L = ((mid3 + side3) << shift); 7981 7982 temp0R = ((mid0 - side0) << shift); 7983 temp1R = ((mid1 - side1) << shift); 7984 temp2R = ((mid2 - side2) << shift); 7985 temp3R = ((mid3 - side3) << shift); 7986 7987 pOutputSamples[i*8+0] = temp0L; 7988 pOutputSamples[i*8+1] = temp0R; 7989 pOutputSamples[i*8+2] = temp1L; 7990 pOutputSamples[i*8+3] = temp1R; 7991 pOutputSamples[i*8+4] = temp2L; 7992 pOutputSamples[i*8+5] = temp2R; 7993 pOutputSamples[i*8+6] = temp3L; 7994 pOutputSamples[i*8+7] = temp3R; 7995 } 7996 } else { 7997 for (i = 0; i < frameCount4; ++i) { 7998 drflac_int32 temp0L; 7999 drflac_int32 temp1L; 8000 drflac_int32 temp2L; 8001 drflac_int32 temp3L; 8002 drflac_int32 temp0R; 8003 drflac_int32 temp1R; 8004 drflac_int32 temp2R; 8005 drflac_int32 temp3R; 8006 8007 drflac_int32 mid0 = pInputSamples0[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8008 drflac_int32 mid1 = pInputSamples0[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8009 drflac_int32 mid2 = pInputSamples0[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8010 drflac_int32 mid3 = pInputSamples0[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8011 8012 drflac_int32 side0 = pInputSamples1[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8013 drflac_int32 side1 = pInputSamples1[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8014 drflac_int32 side2 = pInputSamples1[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8015 drflac_int32 side3 = pInputSamples1[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8016 8017 mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01); 8018 mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01); 8019 mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01); 8020 mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01); 8021 8022 temp0L = ((mid0 + side0) >> 1); 8023 temp1L = ((mid1 + side1) >> 1); 8024 temp2L = ((mid2 + side2) >> 1); 8025 temp3L = ((mid3 + side3) >> 1); 8026 8027 temp0R = ((mid0 - side0) >> 1); 8028 temp1R = ((mid1 - side1) >> 1); 8029 temp2R = ((mid2 - side2) >> 1); 8030 temp3R = ((mid3 - side3) >> 1); 8031 8032 pOutputSamples[i*8+0] = temp0L; 8033 pOutputSamples[i*8+1] = temp0R; 8034 pOutputSamples[i*8+2] = temp1L; 8035 pOutputSamples[i*8+3] = temp1R; 8036 pOutputSamples[i*8+4] = temp2L; 8037 pOutputSamples[i*8+5] = temp2R; 8038 pOutputSamples[i*8+6] = temp3L; 8039 pOutputSamples[i*8+7] = temp3R; 8040 } 8041 } 8042 8043 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8044 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8045 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8046 8047 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 8048 8049 pOutputSamples[i*2+0] = ((mid + side) >> 1) << unusedBitsPerSample; 8050 pOutputSamples[i*2+1] = ((mid - side) >> 1) << unusedBitsPerSample; 8051 } 8052 } 8053 8054 #if defined(DRFLAC_SUPPORT_SSE2) 8055 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 8056 { 8057 drflac_uint64 i; 8058 drflac_uint64 frameCount4; 8059 int shift; 8060 8061 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 8062 8063 frameCount4 = frameCount >> 2; 8064 8065 shift = unusedBitsPerSample; 8066 if (shift == 0) { 8067 for (i = 0; i < frameCount4; ++i) { 8068 __m128i mid; 8069 __m128i side; 8070 __m128i left; 8071 __m128i right; 8072 8073 mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 8074 side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 8075 8076 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); 8077 8078 left = _mm_srai_epi32(_mm_add_epi32(mid, side), 1); 8079 right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1); 8080 8081 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); 8082 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); 8083 } 8084 8085 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8086 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8087 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8088 8089 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 8090 8091 pOutputSamples[i*2+0] = ((mid + side) >> 1); 8092 pOutputSamples[i*2+1] = ((mid - side) >> 1); 8093 } 8094 } else { 8095 shift -= 1; 8096 for (i = 0; i < frameCount4; ++i) { 8097 __m128i mid; 8098 __m128i side; 8099 __m128i left; 8100 __m128i right; 8101 8102 mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 8103 side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 8104 8105 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); 8106 8107 left = _mm_slli_epi32(_mm_add_epi32(mid, side), shift); 8108 right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift); 8109 8110 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); 8111 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); 8112 } 8113 8114 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8115 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8116 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8117 8118 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 8119 8120 pOutputSamples[i*2+0] = ((mid + side) << shift); 8121 pOutputSamples[i*2+1] = ((mid - side) << shift); 8122 } 8123 } 8124 } 8125 #endif 8126 8127 #if defined(DRFLAC_SUPPORT_NEON) 8128 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 8129 { 8130 drflac_uint64 i; 8131 drflac_uint64 frameCount4; 8132 int shift; 8133 int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */ 8134 int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */ 8135 int32x4_t one4; 8136 8137 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 8138 8139 frameCount4 = frameCount >> 2; 8140 8141 wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 8142 wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 8143 one4 = vdupq_n_s32(1); 8144 8145 shift = unusedBitsPerSample; 8146 if (shift == 0) { 8147 for (i = 0; i < frameCount4; ++i) { 8148 int32x4_t mid; 8149 int32x4_t side; 8150 int32x4_t left; 8151 int32x4_t right; 8152 8153 mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4); 8154 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4); 8155 8156 mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, one4)); 8157 8158 left = vshrq_n_s32(vaddq_s32(mid, side), 1); 8159 right = vshrq_n_s32(vsubq_s32(mid, side), 1); 8160 8161 drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); 8162 } 8163 8164 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8165 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8166 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8167 8168 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 8169 8170 pOutputSamples[i*2+0] = ((mid + side) >> 1); 8171 pOutputSamples[i*2+1] = ((mid - side) >> 1); 8172 } 8173 } else { 8174 int32x4_t shift4; 8175 8176 shift -= 1; 8177 shift4 = vdupq_n_s32(shift); 8178 8179 for (i = 0; i < frameCount4; ++i) { 8180 int32x4_t mid; 8181 int32x4_t side; 8182 int32x4_t left; 8183 int32x4_t right; 8184 8185 mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4); 8186 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4); 8187 8188 mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, one4)); 8189 8190 left = vshlq_s32(vaddq_s32(mid, side), shift4); 8191 right = vshlq_s32(vsubq_s32(mid, side), shift4); 8192 8193 drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); 8194 } 8195 8196 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8197 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8198 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8199 8200 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 8201 8202 pOutputSamples[i*2+0] = ((mid + side) << shift); 8203 pOutputSamples[i*2+1] = ((mid - side) << shift); 8204 } 8205 } 8206 } 8207 #endif 8208 8209 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 8210 { 8211 #if defined(DRFLAC_SUPPORT_SSE2) 8212 if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { 8213 drflac_read_pcm_frames_s32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8214 } else 8215 #elif defined(DRFLAC_SUPPORT_NEON) 8216 if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { 8217 drflac_read_pcm_frames_s32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8218 } else 8219 #endif 8220 { 8221 /* Scalar fallback. */ 8222 #if 0 8223 drflac_read_pcm_frames_s32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8224 #else 8225 drflac_read_pcm_frames_s32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8226 #endif 8227 } 8228 } 8229 8230 8231 #if 0 8232 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 8233 { 8234 for (drflac_uint64 i = 0; i < frameCount; ++i) { 8235 pOutputSamples[i*2+0] = (pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)); 8236 pOutputSamples[i*2+1] = (pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)); 8237 } 8238 } 8239 #endif 8240 8241 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 8242 { 8243 drflac_uint64 i; 8244 drflac_uint64 frameCount4 = frameCount >> 2; 8245 8246 drflac_int32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 8247 drflac_int32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 8248 8249 for (i = 0; i < frameCount4; ++i) { 8250 drflac_int32 tempL0 = pInputSamples0[i*4+0] << shift0; 8251 drflac_int32 tempL1 = pInputSamples0[i*4+1] << shift0; 8252 drflac_int32 tempL2 = pInputSamples0[i*4+2] << shift0; 8253 drflac_int32 tempL3 = pInputSamples0[i*4+3] << shift0; 8254 8255 drflac_int32 tempR0 = pInputSamples1[i*4+0] << shift1; 8256 drflac_int32 tempR1 = pInputSamples1[i*4+1] << shift1; 8257 drflac_int32 tempR2 = pInputSamples1[i*4+2] << shift1; 8258 drflac_int32 tempR3 = pInputSamples1[i*4+3] << shift1; 8259 8260 pOutputSamples[i*8+0] = tempL0; 8261 pOutputSamples[i*8+1] = tempR0; 8262 pOutputSamples[i*8+2] = tempL1; 8263 pOutputSamples[i*8+3] = tempR1; 8264 pOutputSamples[i*8+4] = tempL2; 8265 pOutputSamples[i*8+5] = tempR2; 8266 pOutputSamples[i*8+6] = tempL3; 8267 pOutputSamples[i*8+7] = tempR3; 8268 } 8269 8270 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8271 pOutputSamples[i*2+0] = (pInputSamples0[i] << shift0); 8272 pOutputSamples[i*2+1] = (pInputSamples1[i] << shift1); 8273 } 8274 } 8275 8276 #if defined(DRFLAC_SUPPORT_SSE2) 8277 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 8278 { 8279 drflac_uint64 i; 8280 drflac_uint64 frameCount4 = frameCount >> 2; 8281 8282 int shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 8283 int shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 8284 8285 for (i = 0; i < frameCount4; ++i) { 8286 __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); 8287 __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); 8288 8289 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); 8290 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); 8291 } 8292 8293 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8294 pOutputSamples[i*2+0] = (pInputSamples0[i] << shift0); 8295 pOutputSamples[i*2+1] = (pInputSamples1[i] << shift1); 8296 } 8297 } 8298 #endif 8299 8300 #if defined(DRFLAC_SUPPORT_NEON) 8301 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 8302 { 8303 drflac_uint64 i; 8304 drflac_uint64 frameCount4 = frameCount >> 2; 8305 8306 int shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 8307 int shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 8308 8309 int32x4_t shift4_0 = vdupq_n_s32(shift0); 8310 int32x4_t shift4_1 = vdupq_n_s32(shift1); 8311 8312 for (i = 0; i < frameCount4; ++i) { 8313 int32x4_t left; 8314 int32x4_t right; 8315 8316 left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift4_0); 8317 right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift4_1); 8318 8319 drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); 8320 } 8321 8322 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8323 pOutputSamples[i*2+0] = (pInputSamples0[i] << shift0); 8324 pOutputSamples[i*2+1] = (pInputSamples1[i] << shift1); 8325 } 8326 } 8327 #endif 8328 8329 static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) 8330 { 8331 #if defined(DRFLAC_SUPPORT_SSE2) 8332 if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { 8333 drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8334 } else 8335 #elif defined(DRFLAC_SUPPORT_NEON) 8336 if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { 8337 drflac_read_pcm_frames_s32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8338 } else 8339 #endif 8340 { 8341 /* Scalar fallback. */ 8342 #if 0 8343 drflac_read_pcm_frames_s32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8344 #else 8345 drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8346 #endif 8347 } 8348 } 8349 8350 8351 drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut) 8352 { 8353 drflac_uint64 framesRead; 8354 drflac_int32 unusedBitsPerSample; 8355 8356 if (pFlac == NULL || framesToRead == 0) { 8357 return 0; 8358 } 8359 8360 if (pBufferOut == NULL) { 8361 return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead); 8362 } 8363 8364 unusedBitsPerSample = 32 - pFlac->bitsPerSample; 8365 8366 framesRead = 0; 8367 while (framesToRead > 0) { 8368 /* If we've run out of samples in this frame, go to the next. */ 8369 if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { 8370 if (!drflac__read_and_decode_next_flac_frame(pFlac)) { 8371 break; /* Couldn't read the next frame, so just break from the loop and return. */ 8372 } 8373 } else { 8374 unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); 8375 drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining; 8376 drflac_uint64 frameCountThisIteration = framesToRead; 8377 8378 if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) { 8379 frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining; 8380 } 8381 8382 if (channelCount == 2) { 8383 const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame; 8384 const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame; 8385 8386 switch (pFlac->currentFLACFrame.header.channelAssignment) 8387 { 8388 case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: 8389 { 8390 drflac_read_pcm_frames_s32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); 8391 } break; 8392 8393 case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: 8394 { 8395 drflac_read_pcm_frames_s32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); 8396 } break; 8397 8398 case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: 8399 { 8400 drflac_read_pcm_frames_s32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); 8401 } break; 8402 8403 case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: 8404 default: 8405 { 8406 drflac_read_pcm_frames_s32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); 8407 } break; 8408 } 8409 } else { 8410 /* Generic interleaving. */ 8411 drflac_uint64 i; 8412 for (i = 0; i < frameCountThisIteration; ++i) { 8413 unsigned int j; 8414 for (j = 0; j < channelCount; ++j) { 8415 pBufferOut[(i*channelCount)+j] = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample)); 8416 } 8417 } 8418 } 8419 8420 framesRead += frameCountThisIteration; 8421 pBufferOut += frameCountThisIteration * channelCount; 8422 framesToRead -= frameCountThisIteration; 8423 pFlac->currentPCMFrame += frameCountThisIteration; 8424 pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration; 8425 } 8426 } 8427 8428 return framesRead; 8429 } 8430 8431 8432 #if 0 8433 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 8434 { 8435 drflac_uint64 i; 8436 for (i = 0; i < frameCount; ++i) { 8437 drflac_int32 left = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 8438 drflac_int32 side = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 8439 drflac_int32 right = left - side; 8440 8441 left >>= 16; 8442 right >>= 16; 8443 8444 pOutputSamples[i*2+0] = (drflac_int16)left; 8445 pOutputSamples[i*2+1] = (drflac_int16)right; 8446 } 8447 } 8448 #endif 8449 8450 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 8451 { 8452 drflac_uint64 i; 8453 drflac_uint64 frameCount4 = frameCount >> 2; 8454 8455 drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8456 drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8457 for (i = 0; i < frameCount4; ++i) { 8458 drflac_int32 left0 = pInputSamples0[i*4+0] << shift0; 8459 drflac_int32 left1 = pInputSamples0[i*4+1] << shift0; 8460 drflac_int32 left2 = pInputSamples0[i*4+2] << shift0; 8461 drflac_int32 left3 = pInputSamples0[i*4+3] << shift0; 8462 8463 drflac_int32 side0 = pInputSamples1[i*4+0] << shift1; 8464 drflac_int32 side1 = pInputSamples1[i*4+1] << shift1; 8465 drflac_int32 side2 = pInputSamples1[i*4+2] << shift1; 8466 drflac_int32 side3 = pInputSamples1[i*4+3] << shift1; 8467 8468 drflac_int32 right0 = left0 - side0; 8469 drflac_int32 right1 = left1 - side1; 8470 drflac_int32 right2 = left2 - side2; 8471 drflac_int32 right3 = left3 - side3; 8472 8473 left0 >>= 16; 8474 left1 >>= 16; 8475 left2 >>= 16; 8476 left3 >>= 16; 8477 8478 right0 >>= 16; 8479 right1 >>= 16; 8480 right2 >>= 16; 8481 right3 >>= 16; 8482 8483 pOutputSamples[i*8+0] = (drflac_int16)left0; 8484 pOutputSamples[i*8+1] = (drflac_int16)right0; 8485 pOutputSamples[i*8+2] = (drflac_int16)left1; 8486 pOutputSamples[i*8+3] = (drflac_int16)right1; 8487 pOutputSamples[i*8+4] = (drflac_int16)left2; 8488 pOutputSamples[i*8+5] = (drflac_int16)right2; 8489 pOutputSamples[i*8+6] = (drflac_int16)left3; 8490 pOutputSamples[i*8+7] = (drflac_int16)right3; 8491 } 8492 8493 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8494 drflac_int32 left = pInputSamples0[i] << shift0; 8495 drflac_int32 side = pInputSamples1[i] << shift1; 8496 drflac_int32 right = left - side; 8497 8498 left >>= 16; 8499 right >>= 16; 8500 8501 pOutputSamples[i*2+0] = (drflac_int16)left; 8502 pOutputSamples[i*2+1] = (drflac_int16)right; 8503 } 8504 } 8505 8506 #if defined(DRFLAC_SUPPORT_SSE2) 8507 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 8508 { 8509 drflac_uint64 frameCount4; 8510 drflac_int32 shift0; 8511 drflac_int32 shift1; 8512 drflac_uint64 i; 8513 8514 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 8515 8516 frameCount4 = frameCount >> 2; 8517 8518 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 8519 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 8520 8521 for (i = 0; i < frameCount4; ++i) { 8522 __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); 8523 __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); 8524 __m128i right = _mm_sub_epi32(left, side); 8525 8526 left = _mm_srai_epi32(left, 16); 8527 right = _mm_srai_epi32(right, 16); 8528 8529 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); 8530 } 8531 8532 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8533 drflac_int32 left = pInputSamples0[i] << shift0; 8534 drflac_int32 side = pInputSamples1[i] << shift1; 8535 drflac_int32 right = left - side; 8536 8537 left >>= 16; 8538 right >>= 16; 8539 8540 pOutputSamples[i*2+0] = (drflac_int16)left; 8541 pOutputSamples[i*2+1] = (drflac_int16)right; 8542 } 8543 } 8544 #endif 8545 8546 #if defined(DRFLAC_SUPPORT_NEON) 8547 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 8548 { 8549 drflac_uint64 frameCount4; 8550 drflac_int32 shift0; 8551 drflac_int32 shift1; 8552 drflac_uint64 i; 8553 int32x4_t shift0_4; 8554 int32x4_t shift1_4; 8555 8556 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 8557 8558 frameCount4 = frameCount >> 2; 8559 8560 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 8561 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 8562 8563 shift0_4 = vdupq_n_s32(shift0); 8564 shift1_4 = vdupq_n_s32(shift1); 8565 8566 for (i = 0; i < frameCount4; ++i) { 8567 int32x4_t left; 8568 int32x4_t side; 8569 int32x4_t right; 8570 8571 left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); 8572 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); 8573 right = vsubq_s32(left, side); 8574 8575 left = vshrq_n_s32(left, 16); 8576 right = vshrq_n_s32(right, 16); 8577 8578 drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); 8579 } 8580 8581 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8582 drflac_int32 left = pInputSamples0[i] << shift0; 8583 drflac_int32 side = pInputSamples1[i] << shift1; 8584 drflac_int32 right = left - side; 8585 8586 left >>= 16; 8587 right >>= 16; 8588 8589 pOutputSamples[i*2+0] = (drflac_int16)left; 8590 pOutputSamples[i*2+1] = (drflac_int16)right; 8591 } 8592 } 8593 #endif 8594 8595 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 8596 { 8597 #if defined(DRFLAC_SUPPORT_SSE2) 8598 if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { 8599 drflac_read_pcm_frames_s16__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8600 } else 8601 #elif defined(DRFLAC_SUPPORT_NEON) 8602 if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { 8603 drflac_read_pcm_frames_s16__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8604 } else 8605 #endif 8606 { 8607 /* Scalar fallback. */ 8608 #if 0 8609 drflac_read_pcm_frames_s16__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8610 #else 8611 drflac_read_pcm_frames_s16__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8612 #endif 8613 } 8614 } 8615 8616 8617 #if 0 8618 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 8619 { 8620 drflac_uint64 i; 8621 for (i = 0; i < frameCount; ++i) { 8622 drflac_int32 side = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 8623 drflac_int32 right = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 8624 drflac_int32 left = right + side; 8625 8626 left >>= 16; 8627 right >>= 16; 8628 8629 pOutputSamples[i*2+0] = (drflac_int16)left; 8630 pOutputSamples[i*2+1] = (drflac_int16)right; 8631 } 8632 } 8633 #endif 8634 8635 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 8636 { 8637 drflac_uint64 i; 8638 drflac_uint64 frameCount4 = frameCount >> 2; 8639 8640 drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8641 drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8642 for (i = 0; i < frameCount4; ++i) { 8643 drflac_int32 side0 = pInputSamples0[i*4+0] << shift0; 8644 drflac_int32 side1 = pInputSamples0[i*4+1] << shift0; 8645 drflac_int32 side2 = pInputSamples0[i*4+2] << shift0; 8646 drflac_int32 side3 = pInputSamples0[i*4+3] << shift0; 8647 8648 drflac_int32 right0 = pInputSamples1[i*4+0] << shift1; 8649 drflac_int32 right1 = pInputSamples1[i*4+1] << shift1; 8650 drflac_int32 right2 = pInputSamples1[i*4+2] << shift1; 8651 drflac_int32 right3 = pInputSamples1[i*4+3] << shift1; 8652 8653 drflac_int32 left0 = right0 + side0; 8654 drflac_int32 left1 = right1 + side1; 8655 drflac_int32 left2 = right2 + side2; 8656 drflac_int32 left3 = right3 + side3; 8657 8658 left0 >>= 16; 8659 left1 >>= 16; 8660 left2 >>= 16; 8661 left3 >>= 16; 8662 8663 right0 >>= 16; 8664 right1 >>= 16; 8665 right2 >>= 16; 8666 right3 >>= 16; 8667 8668 pOutputSamples[i*8+0] = (drflac_int16)left0; 8669 pOutputSamples[i*8+1] = (drflac_int16)right0; 8670 pOutputSamples[i*8+2] = (drflac_int16)left1; 8671 pOutputSamples[i*8+3] = (drflac_int16)right1; 8672 pOutputSamples[i*8+4] = (drflac_int16)left2; 8673 pOutputSamples[i*8+5] = (drflac_int16)right2; 8674 pOutputSamples[i*8+6] = (drflac_int16)left3; 8675 pOutputSamples[i*8+7] = (drflac_int16)right3; 8676 } 8677 8678 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8679 drflac_int32 side = pInputSamples0[i] << shift0; 8680 drflac_int32 right = pInputSamples1[i] << shift1; 8681 drflac_int32 left = right + side; 8682 8683 left >>= 16; 8684 right >>= 16; 8685 8686 pOutputSamples[i*2+0] = (drflac_int16)left; 8687 pOutputSamples[i*2+1] = (drflac_int16)right; 8688 } 8689 } 8690 8691 #if defined(DRFLAC_SUPPORT_SSE2) 8692 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 8693 { 8694 drflac_uint64 frameCount4; 8695 drflac_int32 shift0; 8696 drflac_int32 shift1; 8697 drflac_uint64 i; 8698 8699 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 8700 8701 frameCount4 = frameCount >> 2; 8702 8703 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 8704 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 8705 8706 for (i = 0; i < frameCount4; ++i) { 8707 __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); 8708 __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); 8709 __m128i left = _mm_add_epi32(right, side); 8710 8711 left = _mm_srai_epi32(left, 16); 8712 right = _mm_srai_epi32(right, 16); 8713 8714 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); 8715 } 8716 8717 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8718 drflac_int32 side = pInputSamples0[i] << shift0; 8719 drflac_int32 right = pInputSamples1[i] << shift1; 8720 drflac_int32 left = right + side; 8721 8722 left >>= 16; 8723 right >>= 16; 8724 8725 pOutputSamples[i*2+0] = (drflac_int16)left; 8726 pOutputSamples[i*2+1] = (drflac_int16)right; 8727 } 8728 } 8729 #endif 8730 8731 #if defined(DRFLAC_SUPPORT_NEON) 8732 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 8733 { 8734 drflac_uint64 frameCount4; 8735 drflac_int32 shift0; 8736 drflac_int32 shift1; 8737 drflac_uint64 i; 8738 int32x4_t shift0_4; 8739 int32x4_t shift1_4; 8740 8741 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 8742 8743 frameCount4 = frameCount >> 2; 8744 8745 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 8746 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 8747 8748 shift0_4 = vdupq_n_s32(shift0); 8749 shift1_4 = vdupq_n_s32(shift1); 8750 8751 for (i = 0; i < frameCount4; ++i) { 8752 int32x4_t side; 8753 int32x4_t right; 8754 int32x4_t left; 8755 8756 side = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); 8757 right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); 8758 left = vaddq_s32(right, side); 8759 8760 left = vshrq_n_s32(left, 16); 8761 right = vshrq_n_s32(right, 16); 8762 8763 drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); 8764 } 8765 8766 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8767 drflac_int32 side = pInputSamples0[i] << shift0; 8768 drflac_int32 right = pInputSamples1[i] << shift1; 8769 drflac_int32 left = right + side; 8770 8771 left >>= 16; 8772 right >>= 16; 8773 8774 pOutputSamples[i*2+0] = (drflac_int16)left; 8775 pOutputSamples[i*2+1] = (drflac_int16)right; 8776 } 8777 } 8778 #endif 8779 8780 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 8781 { 8782 #if defined(DRFLAC_SUPPORT_SSE2) 8783 if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { 8784 drflac_read_pcm_frames_s16__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8785 } else 8786 #elif defined(DRFLAC_SUPPORT_NEON) 8787 if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { 8788 drflac_read_pcm_frames_s16__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8789 } else 8790 #endif 8791 { 8792 /* Scalar fallback. */ 8793 #if 0 8794 drflac_read_pcm_frames_s16__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8795 #else 8796 drflac_read_pcm_frames_s16__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 8797 #endif 8798 } 8799 } 8800 8801 8802 #if 0 8803 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 8804 { 8805 for (drflac_uint64 i = 0; i < frameCount; ++i) { 8806 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8807 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8808 8809 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 8810 8811 pOutputSamples[i*2+0] = (drflac_int16)((((mid + side) >> 1) << unusedBitsPerSample) >> 16); 8812 pOutputSamples[i*2+1] = (drflac_int16)((((mid - side) >> 1) << unusedBitsPerSample) >> 16); 8813 } 8814 } 8815 #endif 8816 8817 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 8818 { 8819 drflac_uint64 i; 8820 drflac_uint64 frameCount4 = frameCount >> 2; 8821 8822 int shift = unusedBitsPerSample; 8823 if (shift > 0) { 8824 shift -= 1; 8825 for (i = 0; i < frameCount4; ++i) { 8826 drflac_int32 temp0L; 8827 drflac_int32 temp1L; 8828 drflac_int32 temp2L; 8829 drflac_int32 temp3L; 8830 drflac_int32 temp0R; 8831 drflac_int32 temp1R; 8832 drflac_int32 temp2R; 8833 drflac_int32 temp3R; 8834 8835 drflac_int32 mid0 = pInputSamples0[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8836 drflac_int32 mid1 = pInputSamples0[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8837 drflac_int32 mid2 = pInputSamples0[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8838 drflac_int32 mid3 = pInputSamples0[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8839 8840 drflac_int32 side0 = pInputSamples1[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8841 drflac_int32 side1 = pInputSamples1[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8842 drflac_int32 side2 = pInputSamples1[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8843 drflac_int32 side3 = pInputSamples1[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8844 8845 mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01); 8846 mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01); 8847 mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01); 8848 mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01); 8849 8850 temp0L = ((mid0 + side0) << shift); 8851 temp1L = ((mid1 + side1) << shift); 8852 temp2L = ((mid2 + side2) << shift); 8853 temp3L = ((mid3 + side3) << shift); 8854 8855 temp0R = ((mid0 - side0) << shift); 8856 temp1R = ((mid1 - side1) << shift); 8857 temp2R = ((mid2 - side2) << shift); 8858 temp3R = ((mid3 - side3) << shift); 8859 8860 temp0L >>= 16; 8861 temp1L >>= 16; 8862 temp2L >>= 16; 8863 temp3L >>= 16; 8864 8865 temp0R >>= 16; 8866 temp1R >>= 16; 8867 temp2R >>= 16; 8868 temp3R >>= 16; 8869 8870 pOutputSamples[i*8+0] = (drflac_int16)temp0L; 8871 pOutputSamples[i*8+1] = (drflac_int16)temp0R; 8872 pOutputSamples[i*8+2] = (drflac_int16)temp1L; 8873 pOutputSamples[i*8+3] = (drflac_int16)temp1R; 8874 pOutputSamples[i*8+4] = (drflac_int16)temp2L; 8875 pOutputSamples[i*8+5] = (drflac_int16)temp2R; 8876 pOutputSamples[i*8+6] = (drflac_int16)temp3L; 8877 pOutputSamples[i*8+7] = (drflac_int16)temp3R; 8878 } 8879 } else { 8880 for (i = 0; i < frameCount4; ++i) { 8881 drflac_int32 temp0L; 8882 drflac_int32 temp1L; 8883 drflac_int32 temp2L; 8884 drflac_int32 temp3L; 8885 drflac_int32 temp0R; 8886 drflac_int32 temp1R; 8887 drflac_int32 temp2R; 8888 drflac_int32 temp3R; 8889 8890 drflac_int32 mid0 = pInputSamples0[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8891 drflac_int32 mid1 = pInputSamples0[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8892 drflac_int32 mid2 = pInputSamples0[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8893 drflac_int32 mid3 = pInputSamples0[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8894 8895 drflac_int32 side0 = pInputSamples1[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8896 drflac_int32 side1 = pInputSamples1[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8897 drflac_int32 side2 = pInputSamples1[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8898 drflac_int32 side3 = pInputSamples1[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8899 8900 mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01); 8901 mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01); 8902 mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01); 8903 mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01); 8904 8905 temp0L = ((mid0 + side0) >> 1); 8906 temp1L = ((mid1 + side1) >> 1); 8907 temp2L = ((mid2 + side2) >> 1); 8908 temp3L = ((mid3 + side3) >> 1); 8909 8910 temp0R = ((mid0 - side0) >> 1); 8911 temp1R = ((mid1 - side1) >> 1); 8912 temp2R = ((mid2 - side2) >> 1); 8913 temp3R = ((mid3 - side3) >> 1); 8914 8915 temp0L >>= 16; 8916 temp1L >>= 16; 8917 temp2L >>= 16; 8918 temp3L >>= 16; 8919 8920 temp0R >>= 16; 8921 temp1R >>= 16; 8922 temp2R >>= 16; 8923 temp3R >>= 16; 8924 8925 pOutputSamples[i*8+0] = (drflac_int16)temp0L; 8926 pOutputSamples[i*8+1] = (drflac_int16)temp0R; 8927 pOutputSamples[i*8+2] = (drflac_int16)temp1L; 8928 pOutputSamples[i*8+3] = (drflac_int16)temp1R; 8929 pOutputSamples[i*8+4] = (drflac_int16)temp2L; 8930 pOutputSamples[i*8+5] = (drflac_int16)temp2R; 8931 pOutputSamples[i*8+6] = (drflac_int16)temp3L; 8932 pOutputSamples[i*8+7] = (drflac_int16)temp3R; 8933 } 8934 } 8935 8936 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8937 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8938 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8939 8940 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 8941 8942 pOutputSamples[i*2+0] = (drflac_int16)((((mid + side) >> 1) << unusedBitsPerSample) >> 16); 8943 pOutputSamples[i*2+1] = (drflac_int16)((((mid - side) >> 1) << unusedBitsPerSample) >> 16); 8944 } 8945 } 8946 8947 #if defined(DRFLAC_SUPPORT_SSE2) 8948 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 8949 { 8950 drflac_uint64 i; 8951 drflac_uint64 frameCount4; 8952 drflac_int32 shift; 8953 8954 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 8955 8956 frameCount4 = frameCount >> 2; 8957 8958 shift = unusedBitsPerSample; 8959 if (shift == 0) { 8960 for (i = 0; i < frameCount4; ++i) { 8961 __m128i mid; 8962 __m128i side; 8963 __m128i left; 8964 __m128i right; 8965 8966 mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 8967 side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 8968 8969 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); 8970 8971 left = _mm_srai_epi32(_mm_add_epi32(mid, side), 1); 8972 right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1); 8973 8974 left = _mm_srai_epi32(left, 16); 8975 right = _mm_srai_epi32(right, 16); 8976 8977 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); 8978 } 8979 8980 for (i = (frameCount4 << 2); i < frameCount; ++i) { 8981 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 8982 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 8983 8984 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 8985 8986 pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) >> 1) >> 16); 8987 pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) >> 1) >> 16); 8988 } 8989 } else { 8990 shift -= 1; 8991 for (i = 0; i < frameCount4; ++i) { 8992 __m128i mid; 8993 __m128i side; 8994 __m128i left; 8995 __m128i right; 8996 8997 mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 8998 side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 8999 9000 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); 9001 9002 left = _mm_slli_epi32(_mm_add_epi32(mid, side), shift); 9003 right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift); 9004 9005 left = _mm_srai_epi32(left, 16); 9006 right = _mm_srai_epi32(right, 16); 9007 9008 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); 9009 } 9010 9011 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9012 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9013 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9014 9015 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 9016 9017 pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16); 9018 pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16); 9019 } 9020 } 9021 } 9022 #endif 9023 9024 #if defined(DRFLAC_SUPPORT_NEON) 9025 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 9026 { 9027 drflac_uint64 i; 9028 drflac_uint64 frameCount4; 9029 int shift; 9030 int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */ 9031 int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */ 9032 9033 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 9034 9035 frameCount4 = frameCount >> 2; 9036 9037 wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 9038 wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 9039 9040 shift = unusedBitsPerSample; 9041 if (shift == 0) { 9042 for (i = 0; i < frameCount4; ++i) { 9043 int32x4_t mid; 9044 int32x4_t side; 9045 int32x4_t left; 9046 int32x4_t right; 9047 9048 mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4); 9049 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4); 9050 9051 mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1))); 9052 9053 left = vshrq_n_s32(vaddq_s32(mid, side), 1); 9054 right = vshrq_n_s32(vsubq_s32(mid, side), 1); 9055 9056 left = vshrq_n_s32(left, 16); 9057 right = vshrq_n_s32(right, 16); 9058 9059 drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); 9060 } 9061 9062 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9063 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9064 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9065 9066 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 9067 9068 pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) >> 1) >> 16); 9069 pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) >> 1) >> 16); 9070 } 9071 } else { 9072 int32x4_t shift4; 9073 9074 shift -= 1; 9075 shift4 = vdupq_n_s32(shift); 9076 9077 for (i = 0; i < frameCount4; ++i) { 9078 int32x4_t mid; 9079 int32x4_t side; 9080 int32x4_t left; 9081 int32x4_t right; 9082 9083 mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4); 9084 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4); 9085 9086 mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1))); 9087 9088 left = vshlq_s32(vaddq_s32(mid, side), shift4); 9089 right = vshlq_s32(vsubq_s32(mid, side), shift4); 9090 9091 left = vshrq_n_s32(left, 16); 9092 right = vshrq_n_s32(right, 16); 9093 9094 drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); 9095 } 9096 9097 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9098 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9099 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9100 9101 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 9102 9103 pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16); 9104 pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16); 9105 } 9106 } 9107 } 9108 #endif 9109 9110 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 9111 { 9112 #if defined(DRFLAC_SUPPORT_SSE2) 9113 if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { 9114 drflac_read_pcm_frames_s16__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9115 } else 9116 #elif defined(DRFLAC_SUPPORT_NEON) 9117 if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { 9118 drflac_read_pcm_frames_s16__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9119 } else 9120 #endif 9121 { 9122 /* Scalar fallback. */ 9123 #if 0 9124 drflac_read_pcm_frames_s16__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9125 #else 9126 drflac_read_pcm_frames_s16__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9127 #endif 9128 } 9129 } 9130 9131 9132 #if 0 9133 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 9134 { 9135 for (drflac_uint64 i = 0; i < frameCount; ++i) { 9136 pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) >> 16); 9137 pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) >> 16); 9138 } 9139 } 9140 #endif 9141 9142 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 9143 { 9144 drflac_uint64 i; 9145 drflac_uint64 frameCount4 = frameCount >> 2; 9146 9147 int shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 9148 int shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 9149 9150 for (i = 0; i < frameCount4; ++i) { 9151 drflac_int32 tempL0 = pInputSamples0[i*4+0] << shift0; 9152 drflac_int32 tempL1 = pInputSamples0[i*4+1] << shift0; 9153 drflac_int32 tempL2 = pInputSamples0[i*4+2] << shift0; 9154 drflac_int32 tempL3 = pInputSamples0[i*4+3] << shift0; 9155 9156 drflac_int32 tempR0 = pInputSamples1[i*4+0] << shift1; 9157 drflac_int32 tempR1 = pInputSamples1[i*4+1] << shift1; 9158 drflac_int32 tempR2 = pInputSamples1[i*4+2] << shift1; 9159 drflac_int32 tempR3 = pInputSamples1[i*4+3] << shift1; 9160 9161 tempL0 >>= 16; 9162 tempL1 >>= 16; 9163 tempL2 >>= 16; 9164 tempL3 >>= 16; 9165 9166 tempR0 >>= 16; 9167 tempR1 >>= 16; 9168 tempR2 >>= 16; 9169 tempR3 >>= 16; 9170 9171 pOutputSamples[i*8+0] = (drflac_int16)tempL0; 9172 pOutputSamples[i*8+1] = (drflac_int16)tempR0; 9173 pOutputSamples[i*8+2] = (drflac_int16)tempL1; 9174 pOutputSamples[i*8+3] = (drflac_int16)tempR1; 9175 pOutputSamples[i*8+4] = (drflac_int16)tempL2; 9176 pOutputSamples[i*8+5] = (drflac_int16)tempR2; 9177 pOutputSamples[i*8+6] = (drflac_int16)tempL3; 9178 pOutputSamples[i*8+7] = (drflac_int16)tempR3; 9179 } 9180 9181 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9182 pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0[i] << shift0) >> 16); 9183 pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1[i] << shift1) >> 16); 9184 } 9185 } 9186 9187 #if defined(DRFLAC_SUPPORT_SSE2) 9188 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 9189 { 9190 drflac_uint64 i; 9191 drflac_uint64 frameCount4 = frameCount >> 2; 9192 9193 drflac_int32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 9194 drflac_int32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 9195 9196 for (i = 0; i < frameCount4; ++i) { 9197 __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); 9198 __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); 9199 9200 left = _mm_srai_epi32(left, 16); 9201 right = _mm_srai_epi32(right, 16); 9202 9203 /* At this point we have results. We can now pack and interleave these into a single __m128i object and then store the in the output buffer. */ 9204 _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); 9205 } 9206 9207 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9208 pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0[i] << shift0) >> 16); 9209 pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1[i] << shift1) >> 16); 9210 } 9211 } 9212 #endif 9213 9214 #if defined(DRFLAC_SUPPORT_NEON) 9215 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 9216 { 9217 drflac_uint64 i; 9218 drflac_uint64 frameCount4 = frameCount >> 2; 9219 9220 drflac_int32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 9221 drflac_int32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 9222 9223 int32x4_t shift0_4 = vdupq_n_s32(shift0); 9224 int32x4_t shift1_4 = vdupq_n_s32(shift1); 9225 9226 for (i = 0; i < frameCount4; ++i) { 9227 int32x4_t left; 9228 int32x4_t right; 9229 9230 left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); 9231 right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); 9232 9233 left = vshrq_n_s32(left, 16); 9234 right = vshrq_n_s32(right, 16); 9235 9236 drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); 9237 } 9238 9239 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9240 pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0[i] << shift0) >> 16); 9241 pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1[i] << shift1) >> 16); 9242 } 9243 } 9244 #endif 9245 9246 static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) 9247 { 9248 #if defined(DRFLAC_SUPPORT_SSE2) 9249 if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { 9250 drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9251 } else 9252 #elif defined(DRFLAC_SUPPORT_NEON) 9253 if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { 9254 drflac_read_pcm_frames_s16__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9255 } else 9256 #endif 9257 { 9258 /* Scalar fallback. */ 9259 #if 0 9260 drflac_read_pcm_frames_s16__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9261 #else 9262 drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9263 #endif 9264 } 9265 } 9266 9267 drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut) 9268 { 9269 drflac_uint64 framesRead; 9270 drflac_int32 unusedBitsPerSample; 9271 9272 if (pFlac == NULL || framesToRead == 0) { 9273 return 0; 9274 } 9275 9276 if (pBufferOut == NULL) { 9277 return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead); 9278 } 9279 9280 unusedBitsPerSample = 32 - pFlac->bitsPerSample; 9281 9282 framesRead = 0; 9283 while (framesToRead > 0) { 9284 /* If we've run out of samples in this frame, go to the next. */ 9285 if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { 9286 if (!drflac__read_and_decode_next_flac_frame(pFlac)) { 9287 break; /* Couldn't read the next frame, so just break from the loop and return. */ 9288 } 9289 } else { 9290 unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); 9291 drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining; 9292 drflac_uint64 frameCountThisIteration = framesToRead; 9293 9294 if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) { 9295 frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining; 9296 } 9297 9298 if (channelCount == 2) { 9299 const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame; 9300 const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame; 9301 9302 switch (pFlac->currentFLACFrame.header.channelAssignment) 9303 { 9304 case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: 9305 { 9306 drflac_read_pcm_frames_s16__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); 9307 } break; 9308 9309 case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: 9310 { 9311 drflac_read_pcm_frames_s16__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); 9312 } break; 9313 9314 case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: 9315 { 9316 drflac_read_pcm_frames_s16__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); 9317 } break; 9318 9319 case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: 9320 default: 9321 { 9322 drflac_read_pcm_frames_s16__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); 9323 } break; 9324 } 9325 } else { 9326 /* Generic interleaving. */ 9327 drflac_uint64 i; 9328 for (i = 0; i < frameCountThisIteration; ++i) { 9329 unsigned int j; 9330 for (j = 0; j < channelCount; ++j) { 9331 drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample)); 9332 pBufferOut[(i*channelCount)+j] = (drflac_int16)(sampleS32 >> 16); 9333 } 9334 } 9335 } 9336 9337 framesRead += frameCountThisIteration; 9338 pBufferOut += frameCountThisIteration * channelCount; 9339 framesToRead -= frameCountThisIteration; 9340 pFlac->currentPCMFrame += frameCountThisIteration; 9341 pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration; 9342 } 9343 } 9344 9345 return framesRead; 9346 } 9347 9348 9349 #if 0 9350 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9351 { 9352 drflac_uint64 i; 9353 for (i = 0; i < frameCount; ++i) { 9354 drflac_int32 left = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 9355 drflac_int32 side = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 9356 drflac_int32 right = left - side; 9357 9358 pOutputSamples[i*2+0] = (float)(left / 2147483648.0); 9359 pOutputSamples[i*2+1] = (float)(right / 2147483648.0); 9360 } 9361 } 9362 #endif 9363 9364 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9365 { 9366 drflac_uint64 i; 9367 drflac_uint64 frameCount4 = frameCount >> 2; 9368 9369 float factor = 1 / 2147483648.0; 9370 9371 drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9372 drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9373 for (i = 0; i < frameCount4; ++i) { 9374 drflac_int32 left0 = pInputSamples0[i*4+0] << shift0; 9375 drflac_int32 left1 = pInputSamples0[i*4+1] << shift0; 9376 drflac_int32 left2 = pInputSamples0[i*4+2] << shift0; 9377 drflac_int32 left3 = pInputSamples0[i*4+3] << shift0; 9378 9379 drflac_int32 side0 = pInputSamples1[i*4+0] << shift1; 9380 drflac_int32 side1 = pInputSamples1[i*4+1] << shift1; 9381 drflac_int32 side2 = pInputSamples1[i*4+2] << shift1; 9382 drflac_int32 side3 = pInputSamples1[i*4+3] << shift1; 9383 9384 drflac_int32 right0 = left0 - side0; 9385 drflac_int32 right1 = left1 - side1; 9386 drflac_int32 right2 = left2 - side2; 9387 drflac_int32 right3 = left3 - side3; 9388 9389 pOutputSamples[i*8+0] = left0 * factor; 9390 pOutputSamples[i*8+1] = right0 * factor; 9391 pOutputSamples[i*8+2] = left1 * factor; 9392 pOutputSamples[i*8+3] = right1 * factor; 9393 pOutputSamples[i*8+4] = left2 * factor; 9394 pOutputSamples[i*8+5] = right2 * factor; 9395 pOutputSamples[i*8+6] = left3 * factor; 9396 pOutputSamples[i*8+7] = right3 * factor; 9397 } 9398 9399 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9400 drflac_int32 left = pInputSamples0[i] << shift0; 9401 drflac_int32 side = pInputSamples1[i] << shift1; 9402 drflac_int32 right = left - side; 9403 9404 pOutputSamples[i*2+0] = (float)(left * factor); 9405 pOutputSamples[i*2+1] = (float)(right * factor); 9406 } 9407 } 9408 9409 #if defined(DRFLAC_SUPPORT_SSE2) 9410 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9411 { 9412 drflac_uint64 frameCount4; 9413 drflac_int32 shift0; 9414 drflac_int32 shift1; 9415 drflac_uint64 i; 9416 __m128 factor; 9417 9418 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 9419 9420 frameCount4 = frameCount >> 2; 9421 9422 factor = _mm_set1_ps(1.0f / 8388608.0f); 9423 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; 9424 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; 9425 9426 for (i = 0; i < frameCount4; ++i) { 9427 __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); 9428 __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); 9429 __m128i right = _mm_sub_epi32(left, side); 9430 __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(left), factor); 9431 __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor); 9432 9433 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); 9434 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); 9435 } 9436 9437 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9438 drflac_int32 left = pInputSamples0[i] << shift0; 9439 drflac_int32 side = pInputSamples1[i] << shift1; 9440 drflac_int32 right = left - side; 9441 9442 pOutputSamples[i*2+0] = (float)(left / 8388608.0f); 9443 pOutputSamples[i*2+1] = (float)(right / 8388608.0f); 9444 } 9445 } 9446 #endif 9447 9448 #if defined(DRFLAC_SUPPORT_NEON) 9449 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9450 { 9451 drflac_uint64 frameCount4; 9452 drflac_int32 shift0; 9453 drflac_int32 shift1; 9454 drflac_uint64 i; 9455 float32x4_t factor4; 9456 int32x4_t shift0_4; 9457 int32x4_t shift1_4; 9458 9459 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 9460 9461 frameCount4 = frameCount >> 2; 9462 9463 factor4 = vdupq_n_f32(1.0f / 8388608.0f); 9464 9465 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; 9466 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; 9467 9468 shift0_4 = vdupq_n_s32(shift0); 9469 shift1_4 = vdupq_n_s32(shift1); 9470 9471 for (i = 0; i < frameCount4; ++i) { 9472 int32x4_t left; 9473 int32x4_t side; 9474 int32x4_t right; 9475 float32x4_t leftf; 9476 float32x4_t rightf; 9477 9478 left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); 9479 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); 9480 right = vsubq_s32(left, side); 9481 leftf = vmulq_f32(vcvtq_f32_s32(left), factor4); 9482 rightf = vmulq_f32(vcvtq_f32_s32(right), factor4); 9483 9484 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); 9485 } 9486 9487 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9488 drflac_int32 left = pInputSamples0[i] << shift0; 9489 drflac_int32 side = pInputSamples1[i] << shift1; 9490 drflac_int32 right = left - side; 9491 9492 pOutputSamples[i*2+0] = (float)(left / 8388608.0f); 9493 pOutputSamples[i*2+1] = (float)(right / 8388608.0f); 9494 } 9495 } 9496 #endif 9497 9498 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9499 { 9500 #if defined(DRFLAC_SUPPORT_SSE2) 9501 if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { 9502 drflac_read_pcm_frames_f32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9503 } else 9504 #elif defined(DRFLAC_SUPPORT_NEON) 9505 if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { 9506 drflac_read_pcm_frames_f32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9507 } else 9508 #endif 9509 { 9510 /* Scalar fallback. */ 9511 #if 0 9512 drflac_read_pcm_frames_f32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9513 #else 9514 drflac_read_pcm_frames_f32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9515 #endif 9516 } 9517 } 9518 9519 9520 #if 0 9521 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9522 { 9523 drflac_uint64 i; 9524 for (i = 0; i < frameCount; ++i) { 9525 drflac_int32 side = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 9526 drflac_int32 right = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 9527 drflac_int32 left = right + side; 9528 9529 pOutputSamples[i*2+0] = (float)(left / 2147483648.0); 9530 pOutputSamples[i*2+1] = (float)(right / 2147483648.0); 9531 } 9532 } 9533 #endif 9534 9535 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9536 { 9537 drflac_uint64 i; 9538 drflac_uint64 frameCount4 = frameCount >> 2; 9539 9540 float factor = 1 / 2147483648.0; 9541 9542 drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9543 drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9544 for (i = 0; i < frameCount4; ++i) { 9545 drflac_int32 side0 = pInputSamples0[i*4+0] << shift0; 9546 drflac_int32 side1 = pInputSamples0[i*4+1] << shift0; 9547 drflac_int32 side2 = pInputSamples0[i*4+2] << shift0; 9548 drflac_int32 side3 = pInputSamples0[i*4+3] << shift0; 9549 9550 drflac_int32 right0 = pInputSamples1[i*4+0] << shift1; 9551 drflac_int32 right1 = pInputSamples1[i*4+1] << shift1; 9552 drflac_int32 right2 = pInputSamples1[i*4+2] << shift1; 9553 drflac_int32 right3 = pInputSamples1[i*4+3] << shift1; 9554 9555 drflac_int32 left0 = right0 + side0; 9556 drflac_int32 left1 = right1 + side1; 9557 drflac_int32 left2 = right2 + side2; 9558 drflac_int32 left3 = right3 + side3; 9559 9560 pOutputSamples[i*8+0] = left0 * factor; 9561 pOutputSamples[i*8+1] = right0 * factor; 9562 pOutputSamples[i*8+2] = left1 * factor; 9563 pOutputSamples[i*8+3] = right1 * factor; 9564 pOutputSamples[i*8+4] = left2 * factor; 9565 pOutputSamples[i*8+5] = right2 * factor; 9566 pOutputSamples[i*8+6] = left3 * factor; 9567 pOutputSamples[i*8+7] = right3 * factor; 9568 } 9569 9570 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9571 drflac_int32 side = pInputSamples0[i] << shift0; 9572 drflac_int32 right = pInputSamples1[i] << shift1; 9573 drflac_int32 left = right + side; 9574 9575 pOutputSamples[i*2+0] = (float)(left * factor); 9576 pOutputSamples[i*2+1] = (float)(right * factor); 9577 } 9578 } 9579 9580 #if defined(DRFLAC_SUPPORT_SSE2) 9581 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9582 { 9583 drflac_uint64 frameCount4; 9584 drflac_int32 shift0; 9585 drflac_int32 shift1; 9586 drflac_uint64 i; 9587 __m128 factor; 9588 9589 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 9590 9591 frameCount4 = frameCount >> 2; 9592 9593 factor = _mm_set1_ps(1.0f / 8388608.0f); 9594 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; 9595 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; 9596 9597 for (i = 0; i < frameCount4; ++i) { 9598 __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); 9599 __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); 9600 __m128i left = _mm_add_epi32(right, side); 9601 __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(left), factor); 9602 __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor); 9603 9604 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); 9605 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); 9606 } 9607 9608 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9609 drflac_int32 side = pInputSamples0[i] << shift0; 9610 drflac_int32 right = pInputSamples1[i] << shift1; 9611 drflac_int32 left = right + side; 9612 9613 pOutputSamples[i*2+0] = (float)(left / 8388608.0f); 9614 pOutputSamples[i*2+1] = (float)(right / 8388608.0f); 9615 } 9616 } 9617 #endif 9618 9619 #if defined(DRFLAC_SUPPORT_NEON) 9620 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9621 { 9622 drflac_uint64 frameCount4; 9623 drflac_int32 shift0; 9624 drflac_int32 shift1; 9625 drflac_uint64 i; 9626 float32x4_t factor4; 9627 int32x4_t shift0_4; 9628 int32x4_t shift1_4; 9629 9630 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 9631 9632 frameCount4 = frameCount >> 2; 9633 9634 factor4 = vdupq_n_f32(1.0f / 8388608.0f); 9635 9636 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; 9637 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; 9638 9639 shift0_4 = vdupq_n_s32(shift0); 9640 shift1_4 = vdupq_n_s32(shift1); 9641 9642 for (i = 0; i < frameCount4; ++i) { 9643 int32x4_t side; 9644 int32x4_t right; 9645 int32x4_t left; 9646 float32x4_t leftf; 9647 float32x4_t rightf; 9648 9649 side = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); 9650 right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); 9651 left = vaddq_s32(right, side); 9652 leftf = vmulq_f32(vcvtq_f32_s32(left), factor4); 9653 rightf = vmulq_f32(vcvtq_f32_s32(right), factor4); 9654 9655 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); 9656 } 9657 9658 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9659 drflac_int32 side = pInputSamples0[i] << shift0; 9660 drflac_int32 right = pInputSamples1[i] << shift1; 9661 drflac_int32 left = right + side; 9662 9663 pOutputSamples[i*2+0] = (float)(left / 8388608.0f); 9664 pOutputSamples[i*2+1] = (float)(right / 8388608.0f); 9665 } 9666 } 9667 #endif 9668 9669 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9670 { 9671 #if defined(DRFLAC_SUPPORT_SSE2) 9672 if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { 9673 drflac_read_pcm_frames_f32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9674 } else 9675 #elif defined(DRFLAC_SUPPORT_NEON) 9676 if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { 9677 drflac_read_pcm_frames_f32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9678 } else 9679 #endif 9680 { 9681 /* Scalar fallback. */ 9682 #if 0 9683 drflac_read_pcm_frames_f32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9684 #else 9685 drflac_read_pcm_frames_f32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 9686 #endif 9687 } 9688 } 9689 9690 9691 #if 0 9692 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9693 { 9694 for (drflac_uint64 i = 0; i < frameCount; ++i) { 9695 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9696 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9697 9698 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 9699 9700 pOutputSamples[i*2+0] = (float)((((mid + side) >> 1) << (unusedBitsPerSample)) / 2147483648.0); 9701 pOutputSamples[i*2+1] = (float)((((mid - side) >> 1) << (unusedBitsPerSample)) / 2147483648.0); 9702 } 9703 } 9704 #endif 9705 9706 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9707 { 9708 drflac_uint64 i; 9709 drflac_uint64 frameCount4 = frameCount >> 2; 9710 9711 float factor = 1 / 2147483648.0; 9712 9713 int shift = unusedBitsPerSample; 9714 if (shift > 0) { 9715 shift -= 1; 9716 for (i = 0; i < frameCount4; ++i) { 9717 drflac_int32 temp0L; 9718 drflac_int32 temp1L; 9719 drflac_int32 temp2L; 9720 drflac_int32 temp3L; 9721 drflac_int32 temp0R; 9722 drflac_int32 temp1R; 9723 drflac_int32 temp2R; 9724 drflac_int32 temp3R; 9725 9726 drflac_int32 mid0 = pInputSamples0[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9727 drflac_int32 mid1 = pInputSamples0[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9728 drflac_int32 mid2 = pInputSamples0[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9729 drflac_int32 mid3 = pInputSamples0[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9730 9731 drflac_int32 side0 = pInputSamples1[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9732 drflac_int32 side1 = pInputSamples1[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9733 drflac_int32 side2 = pInputSamples1[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9734 drflac_int32 side3 = pInputSamples1[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9735 9736 mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01); 9737 mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01); 9738 mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01); 9739 mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01); 9740 9741 temp0L = ((mid0 + side0) << shift); 9742 temp1L = ((mid1 + side1) << shift); 9743 temp2L = ((mid2 + side2) << shift); 9744 temp3L = ((mid3 + side3) << shift); 9745 9746 temp0R = ((mid0 - side0) << shift); 9747 temp1R = ((mid1 - side1) << shift); 9748 temp2R = ((mid2 - side2) << shift); 9749 temp3R = ((mid3 - side3) << shift); 9750 9751 pOutputSamples[i*8+0] = (float)(temp0L * factor); 9752 pOutputSamples[i*8+1] = (float)(temp0R * factor); 9753 pOutputSamples[i*8+2] = (float)(temp1L * factor); 9754 pOutputSamples[i*8+3] = (float)(temp1R * factor); 9755 pOutputSamples[i*8+4] = (float)(temp2L * factor); 9756 pOutputSamples[i*8+5] = (float)(temp2R * factor); 9757 pOutputSamples[i*8+6] = (float)(temp3L * factor); 9758 pOutputSamples[i*8+7] = (float)(temp3R * factor); 9759 } 9760 } else { 9761 for (i = 0; i < frameCount4; ++i) { 9762 drflac_int32 temp0L; 9763 drflac_int32 temp1L; 9764 drflac_int32 temp2L; 9765 drflac_int32 temp3L; 9766 drflac_int32 temp0R; 9767 drflac_int32 temp1R; 9768 drflac_int32 temp2R; 9769 drflac_int32 temp3R; 9770 9771 drflac_int32 mid0 = pInputSamples0[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9772 drflac_int32 mid1 = pInputSamples0[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9773 drflac_int32 mid2 = pInputSamples0[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9774 drflac_int32 mid3 = pInputSamples0[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9775 9776 drflac_int32 side0 = pInputSamples1[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9777 drflac_int32 side1 = pInputSamples1[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9778 drflac_int32 side2 = pInputSamples1[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9779 drflac_int32 side3 = pInputSamples1[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9780 9781 mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01); 9782 mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01); 9783 mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01); 9784 mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01); 9785 9786 temp0L = ((mid0 + side0) >> 1); 9787 temp1L = ((mid1 + side1) >> 1); 9788 temp2L = ((mid2 + side2) >> 1); 9789 temp3L = ((mid3 + side3) >> 1); 9790 9791 temp0R = ((mid0 - side0) >> 1); 9792 temp1R = ((mid1 - side1) >> 1); 9793 temp2R = ((mid2 - side2) >> 1); 9794 temp3R = ((mid3 - side3) >> 1); 9795 9796 pOutputSamples[i*8+0] = (float)(temp0L * factor); 9797 pOutputSamples[i*8+1] = (float)(temp0R * factor); 9798 pOutputSamples[i*8+2] = (float)(temp1L * factor); 9799 pOutputSamples[i*8+3] = (float)(temp1R * factor); 9800 pOutputSamples[i*8+4] = (float)(temp2L * factor); 9801 pOutputSamples[i*8+5] = (float)(temp2R * factor); 9802 pOutputSamples[i*8+6] = (float)(temp3L * factor); 9803 pOutputSamples[i*8+7] = (float)(temp3R * factor); 9804 } 9805 } 9806 9807 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9808 int mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9809 int side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9810 9811 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 9812 9813 pOutputSamples[i*2+0] = (float)((((mid + side) >> 1) << unusedBitsPerSample) * factor); 9814 pOutputSamples[i*2+1] = (float)((((mid - side) >> 1) << unusedBitsPerSample) * factor); 9815 } 9816 } 9817 9818 #if defined(DRFLAC_SUPPORT_SSE2) 9819 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9820 { 9821 drflac_uint64 i; 9822 drflac_uint64 frameCount4; 9823 float factor; 9824 drflac_int32 shift; 9825 __m128 factor128; 9826 9827 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 9828 9829 frameCount4 = frameCount >> 2; 9830 9831 factor = 1.0f / 8388608.0f; 9832 factor128 = _mm_set1_ps(1.0f / 8388608.0f); 9833 9834 shift = unusedBitsPerSample - 8; 9835 if (shift == 0) { 9836 for (i = 0; i < frameCount4; ++i) { 9837 __m128i mid; 9838 __m128i side; 9839 __m128i tempL; 9840 __m128i tempR; 9841 __m128 leftf; 9842 __m128 rightf; 9843 9844 mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 9845 side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 9846 9847 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); 9848 9849 tempL = _mm_srai_epi32(_mm_add_epi32(mid, side), 1); 9850 tempR = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1); 9851 9852 leftf = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128); 9853 rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128); 9854 9855 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); 9856 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); 9857 } 9858 9859 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9860 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9861 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9862 9863 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 9864 9865 pOutputSamples[i*2+0] = (float)(((mid + side) >> 1) * factor); 9866 pOutputSamples[i*2+1] = (float)(((mid - side) >> 1) * factor); 9867 } 9868 } else { 9869 shift -= 1; 9870 for (i = 0; i < frameCount4; ++i) { 9871 __m128i mid; 9872 __m128i side; 9873 __m128i tempL; 9874 __m128i tempR; 9875 __m128 leftf; 9876 __m128 rightf; 9877 9878 mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 9879 side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 9880 9881 mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); 9882 9883 tempL = _mm_slli_epi32(_mm_add_epi32(mid, side), shift); 9884 tempR = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift); 9885 9886 leftf = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128); 9887 rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128); 9888 9889 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); 9890 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); 9891 } 9892 9893 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9894 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9895 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9896 9897 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 9898 9899 pOutputSamples[i*2+0] = (float)(((mid + side) << shift) * factor); 9900 pOutputSamples[i*2+1] = (float)(((mid - side) << shift) * factor); 9901 } 9902 } 9903 } 9904 #endif 9905 9906 #if defined(DRFLAC_SUPPORT_NEON) 9907 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9908 { 9909 drflac_uint64 i; 9910 drflac_uint64 frameCount4; 9911 float factor; 9912 drflac_int32 shift; 9913 float32x4_t factor4; 9914 int32x4_t shift4; 9915 int32x4_t wbps0_4; /* Wasted Bits Per Sample */ 9916 int32x4_t wbps1_4; /* Wasted Bits Per Sample */ 9917 9918 DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); 9919 9920 frameCount4 = frameCount >> 2; 9921 9922 factor = 1.0f / 8388608.0f; 9923 factor4 = vdupq_n_f32(factor); 9924 9925 wbps0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 9926 wbps1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 9927 9928 shift = unusedBitsPerSample - 8; 9929 if (shift == 0) { 9930 for (i = 0; i < frameCount4; ++i) { 9931 int32x4_t lefti; 9932 int32x4_t righti; 9933 float32x4_t leftf; 9934 float32x4_t rightf; 9935 9936 int32x4_t mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbps0_4); 9937 int32x4_t side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbps1_4); 9938 9939 mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1))); 9940 9941 lefti = vshrq_n_s32(vaddq_s32(mid, side), 1); 9942 righti = vshrq_n_s32(vsubq_s32(mid, side), 1); 9943 9944 leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4); 9945 rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4); 9946 9947 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); 9948 } 9949 9950 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9951 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9952 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9953 9954 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 9955 9956 pOutputSamples[i*2+0] = (float)(((mid + side) >> 1) * factor); 9957 pOutputSamples[i*2+1] = (float)(((mid - side) >> 1) * factor); 9958 } 9959 } else { 9960 shift -= 1; 9961 shift4 = vdupq_n_s32(shift); 9962 for (i = 0; i < frameCount4; ++i) { 9963 int32x4_t mid; 9964 int32x4_t side; 9965 int32x4_t lefti; 9966 int32x4_t righti; 9967 float32x4_t leftf; 9968 float32x4_t rightf; 9969 9970 mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbps0_4); 9971 side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbps1_4); 9972 9973 mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1))); 9974 9975 lefti = vshlq_s32(vaddq_s32(mid, side), shift4); 9976 righti = vshlq_s32(vsubq_s32(mid, side), shift4); 9977 9978 leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4); 9979 rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4); 9980 9981 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); 9982 } 9983 9984 for (i = (frameCount4 << 2); i < frameCount; ++i) { 9985 drflac_int32 mid = pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; 9986 drflac_int32 side = pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; 9987 9988 mid = (((drflac_uint32)mid) << 1) | (side & 0x01); 9989 9990 pOutputSamples[i*2+0] = (float)(((mid + side) << shift) * factor); 9991 pOutputSamples[i*2+1] = (float)(((mid - side) << shift) * factor); 9992 } 9993 } 9994 } 9995 #endif 9996 9997 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 9998 { 9999 #if defined(DRFLAC_SUPPORT_SSE2) 10000 if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { 10001 drflac_read_pcm_frames_f32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 10002 } else 10003 #elif defined(DRFLAC_SUPPORT_NEON) 10004 if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { 10005 drflac_read_pcm_frames_f32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 10006 } else 10007 #endif 10008 { 10009 /* Scalar fallback. */ 10010 #if 0 10011 drflac_read_pcm_frames_f32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 10012 #else 10013 drflac_read_pcm_frames_f32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 10014 #endif 10015 } 10016 } 10017 10018 #if 0 10019 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 10020 { 10021 for (drflac_uint64 i = 0; i < frameCount; ++i) { 10022 pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) / 2147483648.0); 10023 pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) / 2147483648.0); 10024 } 10025 } 10026 #endif 10027 10028 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 10029 { 10030 drflac_uint64 i; 10031 drflac_uint64 frameCount4 = frameCount >> 2; 10032 10033 float factor = 1 / 2147483648.0; 10034 10035 drflac_int32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); 10036 drflac_int32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); 10037 10038 for (i = 0; i < frameCount4; ++i) { 10039 drflac_int32 tempL0 = pInputSamples0[i*4+0] << shift0; 10040 drflac_int32 tempL1 = pInputSamples0[i*4+1] << shift0; 10041 drflac_int32 tempL2 = pInputSamples0[i*4+2] << shift0; 10042 drflac_int32 tempL3 = pInputSamples0[i*4+3] << shift0; 10043 10044 drflac_int32 tempR0 = pInputSamples1[i*4+0] << shift1; 10045 drflac_int32 tempR1 = pInputSamples1[i*4+1] << shift1; 10046 drflac_int32 tempR2 = pInputSamples1[i*4+2] << shift1; 10047 drflac_int32 tempR3 = pInputSamples1[i*4+3] << shift1; 10048 10049 pOutputSamples[i*8+0] = (float)(tempL0 * factor); 10050 pOutputSamples[i*8+1] = (float)(tempR0 * factor); 10051 pOutputSamples[i*8+2] = (float)(tempL1 * factor); 10052 pOutputSamples[i*8+3] = (float)(tempR1 * factor); 10053 pOutputSamples[i*8+4] = (float)(tempL2 * factor); 10054 pOutputSamples[i*8+5] = (float)(tempR2 * factor); 10055 pOutputSamples[i*8+6] = (float)(tempL3 * factor); 10056 pOutputSamples[i*8+7] = (float)(tempR3 * factor); 10057 } 10058 10059 for (i = (frameCount4 << 2); i < frameCount; ++i) { 10060 pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << shift0) * factor); 10061 pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << shift1) * factor); 10062 } 10063 } 10064 10065 #if defined(DRFLAC_SUPPORT_SSE2) 10066 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 10067 { 10068 drflac_uint64 i; 10069 drflac_uint64 frameCount4 = frameCount >> 2; 10070 10071 float factor = 1.0f / 8388608.0f; 10072 __m128 factor128 = _mm_set1_ps(1.0f / 8388608.0f); 10073 10074 drflac_int32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; 10075 drflac_int32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; 10076 10077 for (i = 0; i < frameCount4; ++i) { 10078 __m128i lefti; 10079 __m128i righti; 10080 __m128 leftf; 10081 __m128 rightf; 10082 10083 lefti = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); 10084 righti = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); 10085 10086 leftf = _mm_mul_ps(_mm_cvtepi32_ps(lefti), factor128); 10087 rightf = _mm_mul_ps(_mm_cvtepi32_ps(righti), factor128); 10088 10089 _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); 10090 _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); 10091 } 10092 10093 for (i = (frameCount4 << 2); i < frameCount; ++i) { 10094 pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << shift0) * factor); 10095 pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << shift1) * factor); 10096 } 10097 } 10098 #endif 10099 10100 #if defined(DRFLAC_SUPPORT_NEON) 10101 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 10102 { 10103 drflac_uint64 i; 10104 drflac_uint64 frameCount4 = frameCount >> 2; 10105 10106 float factor = 1.0f / 8388608.0f; 10107 float32x4_t factor4 = vdupq_n_f32(factor); 10108 10109 drflac_int32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; 10110 drflac_int32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; 10111 10112 int32x4_t shift0_4 = vdupq_n_s32(shift0); 10113 int32x4_t shift1_4 = vdupq_n_s32(shift1); 10114 10115 for (i = 0; i < frameCount4; ++i) { 10116 int32x4_t lefti; 10117 int32x4_t righti; 10118 float32x4_t leftf; 10119 float32x4_t rightf; 10120 10121 lefti = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4); 10122 righti = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4); 10123 10124 leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4); 10125 rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4); 10126 10127 drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); 10128 } 10129 10130 for (i = (frameCount4 << 2); i < frameCount; ++i) { 10131 pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << shift0) * factor); 10132 pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << shift1) * factor); 10133 } 10134 } 10135 #endif 10136 10137 static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) 10138 { 10139 #if defined(DRFLAC_SUPPORT_SSE2) 10140 if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { 10141 drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 10142 } else 10143 #elif defined(DRFLAC_SUPPORT_NEON) 10144 if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { 10145 drflac_read_pcm_frames_f32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 10146 } else 10147 #endif 10148 { 10149 /* Scalar fallback. */ 10150 #if 0 10151 drflac_read_pcm_frames_f32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 10152 #else 10153 drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); 10154 #endif 10155 } 10156 } 10157 10158 drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut) 10159 { 10160 drflac_uint64 framesRead; 10161 drflac_int32 unusedBitsPerSample; 10162 10163 if (pFlac == NULL || framesToRead == 0) { 10164 return 0; 10165 } 10166 10167 if (pBufferOut == NULL) { 10168 return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead); 10169 } 10170 10171 unusedBitsPerSample = 32 - pFlac->bitsPerSample; 10172 10173 framesRead = 0; 10174 while (framesToRead > 0) { 10175 /* If we've run out of samples in this frame, go to the next. */ 10176 if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { 10177 if (!drflac__read_and_decode_next_flac_frame(pFlac)) { 10178 break; /* Couldn't read the next frame, so just break from the loop and return. */ 10179 } 10180 } else { 10181 unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); 10182 drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining; 10183 drflac_uint64 frameCountThisIteration = framesToRead; 10184 10185 if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) { 10186 frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining; 10187 } 10188 10189 if (channelCount == 2) { 10190 const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame; 10191 const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame; 10192 10193 switch (pFlac->currentFLACFrame.header.channelAssignment) 10194 { 10195 case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: 10196 { 10197 drflac_read_pcm_frames_f32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); 10198 } break; 10199 10200 case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: 10201 { 10202 drflac_read_pcm_frames_f32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); 10203 } break; 10204 10205 case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: 10206 { 10207 drflac_read_pcm_frames_f32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); 10208 } break; 10209 10210 case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: 10211 default: 10212 { 10213 drflac_read_pcm_frames_f32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); 10214 } break; 10215 } 10216 } else { 10217 /* Generic interleaving. */ 10218 drflac_uint64 i; 10219 for (i = 0; i < frameCountThisIteration; ++i) { 10220 unsigned int j; 10221 for (j = 0; j < channelCount; ++j) { 10222 pBufferOut[(i*channelCount)+j] = (float)(((pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample)) / 2147483648.0); 10223 } 10224 } 10225 } 10226 10227 framesRead += frameCountThisIteration; 10228 pBufferOut += frameCountThisIteration * channelCount; 10229 framesToRead -= frameCountThisIteration; 10230 pFlac->currentPCMFrame += frameCountThisIteration; 10231 pFlac->currentFLACFrame.pcmFramesRemaining -= (unsigned int)frameCountThisIteration; 10232 } 10233 } 10234 10235 return framesRead; 10236 } 10237 10238 10239 drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex) 10240 { 10241 if (pFlac == NULL) { 10242 return DRFLAC_FALSE; 10243 } 10244 10245 /* Don't do anything if we're already on the seek point. */ 10246 if (pFlac->currentPCMFrame == pcmFrameIndex) { 10247 return DRFLAC_TRUE; 10248 } 10249 10250 /* 10251 If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present 10252 when the decoder was opened. 10253 */ 10254 if (pFlac->firstFLACFramePosInBytes == 0) { 10255 return DRFLAC_FALSE; 10256 } 10257 10258 if (pcmFrameIndex == 0) { 10259 pFlac->currentPCMFrame = 0; 10260 return drflac__seek_to_first_frame(pFlac); 10261 } else { 10262 drflac_bool32 wasSuccessful = DRFLAC_FALSE; 10263 10264 /* Clamp the sample to the end. */ 10265 if (pcmFrameIndex > pFlac->totalPCMFrameCount) { 10266 pcmFrameIndex = pFlac->totalPCMFrameCount; 10267 } 10268 10269 /* If the target sample and the current sample are in the same frame we just move the position forward. */ 10270 if (pcmFrameIndex > pFlac->currentPCMFrame) { 10271 /* Forward. */ 10272 drflac_uint32 offset = (drflac_uint32)(pcmFrameIndex - pFlac->currentPCMFrame); 10273 if (pFlac->currentFLACFrame.pcmFramesRemaining > offset) { 10274 pFlac->currentFLACFrame.pcmFramesRemaining -= offset; 10275 pFlac->currentPCMFrame = pcmFrameIndex; 10276 return DRFLAC_TRUE; 10277 } 10278 } else { 10279 /* Backward. */ 10280 drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentPCMFrame - pcmFrameIndex); 10281 drflac_uint32 currentFLACFramePCMFrameCount = pFlac->currentFLACFrame.header.blockSizeInPCMFrames; 10282 drflac_uint32 currentFLACFramePCMFramesConsumed = currentFLACFramePCMFrameCount - pFlac->currentFLACFrame.pcmFramesRemaining; 10283 if (currentFLACFramePCMFramesConsumed > offsetAbs) { 10284 pFlac->currentFLACFrame.pcmFramesRemaining += offsetAbs; 10285 pFlac->currentPCMFrame = pcmFrameIndex; 10286 return DRFLAC_TRUE; 10287 } 10288 } 10289 10290 /* 10291 Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so 10292 we'll instead use Ogg's natural seeking facility. 10293 */ 10294 #ifndef DR_FLAC_NO_OGG 10295 if (pFlac->container == drflac_container_ogg) 10296 { 10297 wasSuccessful = drflac_ogg__seek_to_pcm_frame(pFlac, pcmFrameIndex); 10298 } 10299 else 10300 #endif 10301 { 10302 /* First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. */ 10303 if (!wasSuccessful && !pFlac->_noSeekTableSeek) { 10304 wasSuccessful = drflac__seek_to_pcm_frame__seek_table(pFlac, pcmFrameIndex); 10305 } 10306 10307 #if !defined(DR_FLAC_NO_CRC) 10308 /* Fall back to binary search if seek table seeking fails. This requires the length of the stream to be known. */ 10309 if (!wasSuccessful && !pFlac->_noBinarySearchSeek && pFlac->totalPCMFrameCount > 0) { 10310 wasSuccessful = drflac__seek_to_pcm_frame__binary_search(pFlac, pcmFrameIndex); 10311 } 10312 #endif 10313 10314 /* Fall back to brute force if all else fails. */ 10315 if (!wasSuccessful && !pFlac->_noBruteForceSeek) { 10316 wasSuccessful = drflac__seek_to_pcm_frame__brute_force(pFlac, pcmFrameIndex); 10317 } 10318 } 10319 10320 pFlac->currentPCMFrame = pcmFrameIndex; 10321 return wasSuccessful; 10322 } 10323 } 10324 10325 10326 10327 /* High Level APIs */ 10328 10329 #if defined(SIZE_MAX) 10330 #define DRFLAC_SIZE_MAX SIZE_MAX 10331 #else 10332 #if defined(DRFLAC_64BIT) 10333 #define DRFLAC_SIZE_MAX ((drflac_uint64)0xFFFFFFFFFFFFFFFF) 10334 #else 10335 #define DRFLAC_SIZE_MAX 0xFFFFFFFF 10336 #endif 10337 #endif 10338 10339 10340 /* Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me. */ 10341 #define DRFLAC_DEFINE_FULL_READ_AND_CLOSE(extension, type) \ 10342 static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)\ 10343 { \ 10344 type* pSampleData = NULL; \ 10345 drflac_uint64 totalPCMFrameCount; \ 10346 \ 10347 DRFLAC_ASSERT(pFlac != NULL); \ 10348 \ 10349 totalPCMFrameCount = pFlac->totalPCMFrameCount; \ 10350 \ 10351 if (totalPCMFrameCount == 0) { \ 10352 type buffer[4096]; \ 10353 drflac_uint64 pcmFramesRead; \ 10354 size_t sampleDataBufferSize = sizeof(buffer); \ 10355 \ 10356 pSampleData = (type*)drflac__malloc_from_callbacks(sampleDataBufferSize, &pFlac->allocationCallbacks); \ 10357 if (pSampleData == NULL) { \ 10358 goto on_error; \ 10359 } \ 10360 \ 10361 while ((pcmFramesRead = (drflac_uint64)drflac_read_pcm_frames_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0])/pFlac->channels, buffer)) > 0) { \ 10362 if (((totalPCMFrameCount + pcmFramesRead) * pFlac->channels * sizeof(type)) > sampleDataBufferSize) { \ 10363 type* pNewSampleData; \ 10364 size_t newSampleDataBufferSize; \ 10365 \ 10366 newSampleDataBufferSize = sampleDataBufferSize * 2; \ 10367 pNewSampleData = (type*)drflac__realloc_from_callbacks(pSampleData, newSampleDataBufferSize, sampleDataBufferSize, &pFlac->allocationCallbacks); \ 10368 if (pNewSampleData == NULL) { \ 10369 drflac__free_from_callbacks(pSampleData, &pFlac->allocationCallbacks); \ 10370 goto on_error; \ 10371 } \ 10372 \ 10373 sampleDataBufferSize = newSampleDataBufferSize; \ 10374 pSampleData = pNewSampleData; \ 10375 } \ 10376 \ 10377 DRFLAC_COPY_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), buffer, (size_t)(pcmFramesRead*pFlac->channels*sizeof(type))); \ 10378 totalPCMFrameCount += pcmFramesRead; \ 10379 } \ 10380 \ 10381 /* At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to \ 10382 protect those ears from random noise! */ \ 10383 DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type))); \ 10384 } else { \ 10385 drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type); \ 10386 if (dataSize > DRFLAC_SIZE_MAX) { \ 10387 goto on_error; /* The decoded data is too big. */ \ 10388 } \ 10389 \ 10390 pSampleData = (type*)drflac__malloc_from_callbacks((size_t)dataSize, &pFlac->allocationCallbacks); /* <-- Safe cast as per the check above. */ \ 10391 if (pSampleData == NULL) { \ 10392 goto on_error; \ 10393 } \ 10394 \ 10395 totalPCMFrameCount = drflac_read_pcm_frames_##extension(pFlac, pFlac->totalPCMFrameCount, pSampleData); \ 10396 } \ 10397 \ 10398 if (sampleRateOut) *sampleRateOut = pFlac->sampleRate; \ 10399 if (channelsOut) *channelsOut = pFlac->channels; \ 10400 if (totalPCMFrameCountOut) *totalPCMFrameCountOut = totalPCMFrameCount; \ 10401 \ 10402 drflac_close(pFlac); \ 10403 return pSampleData; \ 10404 \ 10405 on_error: \ 10406 drflac_close(pFlac); \ 10407 return NULL; \ 10408 } 10409 10410 DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s32, drflac_int32) 10411 DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s16, drflac_int16) 10412 DRFLAC_DEFINE_FULL_READ_AND_CLOSE(f32, float) 10413 10414 drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks) 10415 { 10416 drflac* pFlac; 10417 10418 if (channelsOut) { 10419 *channelsOut = 0; 10420 } 10421 if (sampleRateOut) { 10422 *sampleRateOut = 0; 10423 } 10424 if (totalPCMFrameCountOut) { 10425 *totalPCMFrameCountOut = 0; 10426 } 10427 10428 pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks); 10429 if (pFlac == NULL) { 10430 return NULL; 10431 } 10432 10433 return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); 10434 } 10435 10436 drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks) 10437 { 10438 drflac* pFlac; 10439 10440 if (channelsOut) { 10441 *channelsOut = 0; 10442 } 10443 if (sampleRateOut) { 10444 *sampleRateOut = 0; 10445 } 10446 if (totalPCMFrameCountOut) { 10447 *totalPCMFrameCountOut = 0; 10448 } 10449 10450 pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks); 10451 if (pFlac == NULL) { 10452 return NULL; 10453 } 10454 10455 return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); 10456 } 10457 10458 float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks) 10459 { 10460 drflac* pFlac; 10461 10462 if (channelsOut) { 10463 *channelsOut = 0; 10464 } 10465 if (sampleRateOut) { 10466 *sampleRateOut = 0; 10467 } 10468 if (totalPCMFrameCountOut) { 10469 *totalPCMFrameCountOut = 0; 10470 } 10471 10472 pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks); 10473 if (pFlac == NULL) { 10474 return NULL; 10475 } 10476 10477 return drflac__full_read_and_close_f32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); 10478 } 10479 10480 #ifndef DR_FLAC_NO_STDIO 10481 drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) 10482 { 10483 drflac* pFlac; 10484 10485 if (sampleRate) { 10486 *sampleRate = 0; 10487 } 10488 if (channels) { 10489 *channels = 0; 10490 } 10491 if (totalPCMFrameCount) { 10492 *totalPCMFrameCount = 0; 10493 } 10494 10495 pFlac = drflac_open_file(filename, pAllocationCallbacks); 10496 if (pFlac == NULL) { 10497 return NULL; 10498 } 10499 10500 return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount); 10501 } 10502 10503 drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) 10504 { 10505 drflac* pFlac; 10506 10507 if (sampleRate) { 10508 *sampleRate = 0; 10509 } 10510 if (channels) { 10511 *channels = 0; 10512 } 10513 if (totalPCMFrameCount) { 10514 *totalPCMFrameCount = 0; 10515 } 10516 10517 pFlac = drflac_open_file(filename, pAllocationCallbacks); 10518 if (pFlac == NULL) { 10519 return NULL; 10520 } 10521 10522 return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount); 10523 } 10524 10525 float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) 10526 { 10527 drflac* pFlac; 10528 10529 if (sampleRate) { 10530 *sampleRate = 0; 10531 } 10532 if (channels) { 10533 *channels = 0; 10534 } 10535 if (totalPCMFrameCount) { 10536 *totalPCMFrameCount = 0; 10537 } 10538 10539 pFlac = drflac_open_file(filename, pAllocationCallbacks); 10540 if (pFlac == NULL) { 10541 return NULL; 10542 } 10543 10544 return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount); 10545 } 10546 #endif 10547 10548 drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) 10549 { 10550 drflac* pFlac; 10551 10552 if (sampleRate) { 10553 *sampleRate = 0; 10554 } 10555 if (channels) { 10556 *channels = 0; 10557 } 10558 if (totalPCMFrameCount) { 10559 *totalPCMFrameCount = 0; 10560 } 10561 10562 pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks); 10563 if (pFlac == NULL) { 10564 return NULL; 10565 } 10566 10567 return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount); 10568 } 10569 10570 drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) 10571 { 10572 drflac* pFlac; 10573 10574 if (sampleRate) { 10575 *sampleRate = 0; 10576 } 10577 if (channels) { 10578 *channels = 0; 10579 } 10580 if (totalPCMFrameCount) { 10581 *totalPCMFrameCount = 0; 10582 } 10583 10584 pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks); 10585 if (pFlac == NULL) { 10586 return NULL; 10587 } 10588 10589 return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount); 10590 } 10591 10592 float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) 10593 { 10594 drflac* pFlac; 10595 10596 if (sampleRate) { 10597 *sampleRate = 0; 10598 } 10599 if (channels) { 10600 *channels = 0; 10601 } 10602 if (totalPCMFrameCount) { 10603 *totalPCMFrameCount = 0; 10604 } 10605 10606 pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks); 10607 if (pFlac == NULL) { 10608 return NULL; 10609 } 10610 10611 return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount); 10612 } 10613 10614 10615 void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks) 10616 { 10617 if (pAllocationCallbacks != NULL) { 10618 drflac__free_from_callbacks(p, pAllocationCallbacks); 10619 } else { 10620 drflac__free_default(p, NULL); 10621 } 10622 } 10623 10624 10625 10626 10627 void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments) 10628 { 10629 if (pIter == NULL) { 10630 return; 10631 } 10632 10633 pIter->countRemaining = commentCount; 10634 pIter->pRunningData = (const char*)pComments; 10635 } 10636 10637 const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut) 10638 { 10639 drflac_int32 length; 10640 const char* pComment; 10641 10642 /* Safety. */ 10643 if (pCommentLengthOut) { 10644 *pCommentLengthOut = 0; 10645 } 10646 10647 if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) { 10648 return NULL; 10649 } 10650 10651 length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData); 10652 pIter->pRunningData += 4; 10653 10654 pComment = pIter->pRunningData; 10655 pIter->pRunningData += length; 10656 pIter->countRemaining -= 1; 10657 10658 if (pCommentLengthOut) { 10659 *pCommentLengthOut = length; 10660 } 10661 10662 return pComment; 10663 } 10664 10665 10666 10667 10668 void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData) 10669 { 10670 if (pIter == NULL) { 10671 return; 10672 } 10673 10674 pIter->countRemaining = trackCount; 10675 pIter->pRunningData = (const char*)pTrackData; 10676 } 10677 10678 drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack) 10679 { 10680 drflac_cuesheet_track cuesheetTrack; 10681 const char* pRunningData; 10682 drflac_uint64 offsetHi; 10683 drflac_uint64 offsetLo; 10684 10685 if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) { 10686 return DRFLAC_FALSE; 10687 } 10688 10689 pRunningData = pIter->pRunningData; 10690 10691 offsetHi = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; 10692 offsetLo = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; 10693 cuesheetTrack.offset = offsetLo | (offsetHi << 32); 10694 cuesheetTrack.trackNumber = pRunningData[0]; pRunningData += 1; 10695 DRFLAC_COPY_MEMORY(cuesheetTrack.ISRC, pRunningData, sizeof(cuesheetTrack.ISRC)); pRunningData += 12; 10696 cuesheetTrack.isAudio = (pRunningData[0] & 0x80) != 0; 10697 cuesheetTrack.preEmphasis = (pRunningData[0] & 0x40) != 0; pRunningData += 14; 10698 cuesheetTrack.indexCount = pRunningData[0]; pRunningData += 1; 10699 cuesheetTrack.pIndexPoints = (const drflac_cuesheet_track_index*)pRunningData; pRunningData += cuesheetTrack.indexCount * sizeof(drflac_cuesheet_track_index); 10700 10701 pIter->pRunningData = pRunningData; 10702 pIter->countRemaining -= 1; 10703 10704 if (pCuesheetTrack) { 10705 *pCuesheetTrack = cuesheetTrack; 10706 } 10707 10708 return DRFLAC_TRUE; 10709 } 10710 10711 #if defined(__GNUC__) 10712 #pragma GCC diagnostic pop 10713 #endif 10714 #endif /* DR_FLAC_IMPLEMENTATION */ 10715 10716 10717 /* 10718 REVISION HISTORY 10719 ================ 10720 v0.12.2 - 2019-10-07 10721 - Internal code clean up. 10722 10723 v0.12.1 - 2019-09-29 10724 - Fix some Clang Static Analyzer warnings. 10725 - Fix an unused variable warning. 10726 10727 v0.12.0 - 2019-09-23 10728 - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation 10729 routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs: 10730 - drflac_open() 10731 - drflac_open_relaxed() 10732 - drflac_open_with_metadata() 10733 - drflac_open_with_metadata_relaxed() 10734 - drflac_open_file() 10735 - drflac_open_file_with_metadata() 10736 - drflac_open_memory() 10737 - drflac_open_memory_with_metadata() 10738 - drflac_open_and_read_pcm_frames_s32() 10739 - drflac_open_and_read_pcm_frames_s16() 10740 - drflac_open_and_read_pcm_frames_f32() 10741 - drflac_open_file_and_read_pcm_frames_s32() 10742 - drflac_open_file_and_read_pcm_frames_s16() 10743 - drflac_open_file_and_read_pcm_frames_f32() 10744 - drflac_open_memory_and_read_pcm_frames_s32() 10745 - drflac_open_memory_and_read_pcm_frames_s16() 10746 - drflac_open_memory_and_read_pcm_frames_f32() 10747 Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use 10748 DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE. 10749 - Remove deprecated APIs: 10750 - drflac_read_s32() 10751 - drflac_read_s16() 10752 - drflac_read_f32() 10753 - drflac_seek_to_sample() 10754 - drflac_open_and_decode_s32() 10755 - drflac_open_and_decode_s16() 10756 - drflac_open_and_decode_f32() 10757 - drflac_open_and_decode_file_s32() 10758 - drflac_open_and_decode_file_s16() 10759 - drflac_open_and_decode_file_f32() 10760 - drflac_open_and_decode_memory_s32() 10761 - drflac_open_and_decode_memory_s16() 10762 - drflac_open_and_decode_memory_f32() 10763 - Remove drflac.totalSampleCount which is now replaced with drflac.totalPCMFrameCount. You can emulate drflac.totalSampleCount 10764 by doing pFlac->totalPCMFrameCount*pFlac->channels. 10765 - Rename drflac.currentFrame to drflac.currentFLACFrame to remove ambiguity with PCM frames. 10766 - Fix errors when seeking to the end of a stream. 10767 - Optimizations to seeking. 10768 - SSE improvements and optimizations. 10769 - ARM NEON optimizations. 10770 - Optimizations to drflac_read_pcm_frames_s16(). 10771 - Optimizations to drflac_read_pcm_frames_s32(). 10772 10773 v0.11.10 - 2019-06-26 10774 - Fix a compiler error. 10775 10776 v0.11.9 - 2019-06-16 10777 - Silence some ThreadSanitizer warnings. 10778 10779 v0.11.8 - 2019-05-21 10780 - Fix warnings. 10781 10782 v0.11.7 - 2019-05-06 10783 - C89 fixes. 10784 10785 v0.11.6 - 2019-05-05 10786 - Add support for C89. 10787 - Fix a compiler warning when CRC is disabled. 10788 - Change license to choice of public domain or MIT-0. 10789 10790 v0.11.5 - 2019-04-19 10791 - Fix a compiler error with GCC. 10792 10793 v0.11.4 - 2019-04-17 10794 - Fix some warnings with GCC when compiling with -std=c99. 10795 10796 v0.11.3 - 2019-04-07 10797 - Silence warnings with GCC. 10798 10799 v0.11.2 - 2019-03-10 10800 - Fix a warning. 10801 10802 v0.11.1 - 2019-02-17 10803 - Fix a potential bug with seeking. 10804 10805 v0.11.0 - 2018-12-16 10806 - API CHANGE: Deprecated drflac_read_s32(), drflac_read_s16() and drflac_read_f32() and replaced them with 10807 drflac_read_pcm_frames_s32(), drflac_read_pcm_frames_s16() and drflac_read_pcm_frames_f32(). The new APIs take 10808 and return PCM frame counts instead of sample counts. To upgrade you will need to change the input count by 10809 dividing it by the channel count, and then do the same with the return value. 10810 - API_CHANGE: Deprecated drflac_seek_to_sample() and replaced with drflac_seek_to_pcm_frame(). Same rules as 10811 the changes to drflac_read_*() apply. 10812 - API CHANGE: Deprecated drflac_open_and_decode_*() and replaced with drflac_open_*_and_read_*(). Same rules as 10813 the changes to drflac_read_*() apply. 10814 - Optimizations. 10815 10816 v0.10.0 - 2018-09-11 10817 - Remove the DR_FLAC_NO_WIN32_IO option and the Win32 file IO functionality. If you need to use Win32 file IO you 10818 need to do it yourself via the callback API. 10819 - Fix the clang build. 10820 - Fix undefined behavior. 10821 - Fix errors with CUESHEET metdata blocks. 10822 - Add an API for iterating over each cuesheet track in the CUESHEET metadata block. This works the same way as the 10823 Vorbis comment API. 10824 - Other miscellaneous bug fixes, mostly relating to invalid FLAC streams. 10825 - Minor optimizations. 10826 10827 v0.9.11 - 2018-08-29 10828 - Fix a bug with sample reconstruction. 10829 10830 v0.9.10 - 2018-08-07 10831 - Improve 64-bit detection. 10832 10833 v0.9.9 - 2018-08-05 10834 - Fix C++ build on older versions of GCC. 10835 10836 v0.9.8 - 2018-07-24 10837 - Fix compilation errors. 10838 10839 v0.9.7 - 2018-07-05 10840 - Fix a warning. 10841 10842 v0.9.6 - 2018-06-29 10843 - Fix some typos. 10844 10845 v0.9.5 - 2018-06-23 10846 - Fix some warnings. 10847 10848 v0.9.4 - 2018-06-14 10849 - Optimizations to seeking. 10850 - Clean up. 10851 10852 v0.9.3 - 2018-05-22 10853 - Bug fix. 10854 10855 v0.9.2 - 2018-05-12 10856 - Fix a compilation error due to a missing break statement. 10857 10858 v0.9.1 - 2018-04-29 10859 - Fix compilation error with Clang. 10860 10861 v0.9 - 2018-04-24 10862 - Fix Clang build. 10863 - Start using major.minor.revision versioning. 10864 10865 v0.8g - 2018-04-19 10866 - Fix build on non-x86/x64 architectures. 10867 10868 v0.8f - 2018-02-02 10869 - Stop pretending to support changing rate/channels mid stream. 10870 10871 v0.8e - 2018-02-01 10872 - Fix a crash when the block size of a frame is larger than the maximum block size defined by the FLAC stream. 10873 - Fix a crash the the Rice partition order is invalid. 10874 10875 v0.8d - 2017-09-22 10876 - Add support for decoding streams with ID3 tags. ID3 tags are just skipped. 10877 10878 v0.8c - 2017-09-07 10879 - Fix warning on non-x86/x64 architectures. 10880 10881 v0.8b - 2017-08-19 10882 - Fix build on non-x86/x64 architectures. 10883 10884 v0.8a - 2017-08-13 10885 - A small optimization for the Clang build. 10886 10887 v0.8 - 2017-08-12 10888 - API CHANGE: Rename dr_* types to drflac_*. 10889 - Optimizations. This brings dr_flac back to about the same class of efficiency as the reference implementation. 10890 - Add support for custom implementations of malloc(), realloc(), etc. 10891 - Add CRC checking to Ogg encapsulated streams. 10892 - Fix VC++ 6 build. This is only for the C++ compiler. The C compiler is not currently supported. 10893 - Bug fixes. 10894 10895 v0.7 - 2017-07-23 10896 - Add support for opening a stream without a header block. To do this, use drflac_open_relaxed() / drflac_open_with_metadata_relaxed(). 10897 10898 v0.6 - 2017-07-22 10899 - Add support for recovering from invalid frames. With this change, dr_flac will simply skip over invalid frames as if they 10900 never existed. Frames are checked against their sync code, the CRC-8 of the frame header and the CRC-16 of the whole frame. 10901 10902 v0.5 - 2017-07-16 10903 - Fix typos. 10904 - Change drflac_bool* types to unsigned. 10905 - Add CRC checking. This makes dr_flac slower, but can be disabled with #define DR_FLAC_NO_CRC. 10906 10907 v0.4f - 2017-03-10 10908 - Fix a couple of bugs with the bitstreaming code. 10909 10910 v0.4e - 2017-02-17 10911 - Fix some warnings. 10912 10913 v0.4d - 2016-12-26 10914 - Add support for 32-bit floating-point PCM decoding. 10915 - Use drflac_int* and drflac_uint* sized types to improve compiler support. 10916 - Minor improvements to documentation. 10917 10918 v0.4c - 2016-12-26 10919 - Add support for signed 16-bit integer PCM decoding. 10920 10921 v0.4b - 2016-10-23 10922 - A minor change to drflac_bool8 and drflac_bool32 types. 10923 10924 v0.4a - 2016-10-11 10925 - Rename drBool32 to drflac_bool32 for styling consistency. 10926 10927 v0.4 - 2016-09-29 10928 - API/ABI CHANGE: Use fixed size 32-bit booleans instead of the built-in bool type. 10929 - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32(). 10930 - API CHANGE: Swap the order of "channels" and "sampleRate" parameters in drflac_open_and_decode*(). Rationale for this is to 10931 keep it consistent with drflac_audio. 10932 10933 v0.3f - 2016-09-21 10934 - Fix a warning with GCC. 10935 10936 v0.3e - 2016-09-18 10937 - Fixed a bug where GCC 4.3+ was not getting properly identified. 10938 - Fixed a few typos. 10939 - Changed date formats to ISO 8601 (YYYY-MM-DD). 10940 10941 v0.3d - 2016-06-11 10942 - Minor clean up. 10943 10944 v0.3c - 2016-05-28 10945 - Fixed compilation error. 10946 10947 v0.3b - 2016-05-16 10948 - Fixed Linux/GCC build. 10949 - Updated documentation. 10950 10951 v0.3a - 2016-05-15 10952 - Minor fixes to documentation. 10953 10954 v0.3 - 2016-05-11 10955 - Optimizations. Now at about parity with the reference implementation on 32-bit builds. 10956 - Lots of clean up. 10957 10958 v0.2b - 2016-05-10 10959 - Bug fixes. 10960 10961 v0.2a - 2016-05-10 10962 - Made drflac_open_and_decode() more robust. 10963 - Removed an unused debugging variable 10964 10965 v0.2 - 2016-05-09 10966 - Added support for Ogg encapsulation. 10967 - API CHANGE. Have the onSeek callback take a third argument which specifies whether or not the seek 10968 should be relative to the start or the current position. Also changes the seeking rules such that 10969 seeking offsets will never be negative. 10970 - Have drflac_open_and_decode() fail gracefully if the stream has an unknown total sample count. 10971 10972 v0.1b - 2016-05-07 10973 - Properly close the file handle in drflac_open_file() and family when the decoder fails to initialize. 10974 - Removed a stale comment. 10975 10976 v0.1a - 2016-05-05 10977 - Minor formatting changes. 10978 - Fixed a warning on the GCC build. 10979 10980 v0.1 - 2016-05-03 10981 - Initial versioned release. 10982 */ 10983 10984 /* 10985 This software is available as a choice of the following licenses. Choose 10986 whichever you prefer. 10987 10988 =============================================================================== 10989 ALTERNATIVE 1 - Public Domain (www.unlicense.org) 10990 =============================================================================== 10991 This is free and unencumbered software released into the public domain. 10992 10993 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 10994 software, either in source code form or as a compiled binary, for any purpose, 10995 commercial or non-commercial, and by any means. 10996 10997 In jurisdictions that recognize copyright laws, the author or authors of this 10998 software dedicate any and all copyright interest in the software to the public 10999 domain. We make this dedication for the benefit of the public at large and to 11000 the detriment of our heirs and successors. We intend this dedication to be an 11001 overt act of relinquishment in perpetuity of all present and future rights to 11002 this software under copyright law. 11003 11004 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11005 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 11006 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 11007 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 11008 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 11009 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11010 11011 For more information, please refer to <http://unlicense.org/> 11012 11013 =============================================================================== 11014 ALTERNATIVE 2 - MIT No Attribution 11015 =============================================================================== 11016 Copyright 2018 David Reid 11017 11018 Permission is hereby granted, free of charge, to any person obtaining a copy of 11019 this software and associated documentation files (the "Software"), to deal in 11020 the Software without restriction, including without limitation the rights to 11021 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11022 of the Software, and to permit persons to whom the Software is furnished to do 11023 so. 11024 11025 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11026 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 11027 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 11028 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 11029 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 11030 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 11031 SOFTWARE. 11032 */