diff options
Diffstat (limited to 'src/external/dr_flac.h')
| -rw-r--r-- | src/external/dr_flac.h | 576 |
1 files changed, 407 insertions, 169 deletions
diff --git a/src/external/dr_flac.h b/src/external/dr_flac.h index e0b3649a..0c43eed7 100644 --- a/src/external/dr_flac.h +++ b/src/external/dr_flac.h @@ -1,6 +1,6 @@ /* FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file. -dr_flac - v0.12.31 - 2021-08-16 +dr_flac - v0.12.39 - 2022-09-17 David Reid - [email protected] @@ -210,6 +210,9 @@ Build Options #define DR_FLAC_NO_SIMD Disables SIMD optimizations (SSE on x86/x64 architectures, NEON on ARM architectures). Use this if you are having compatibility issues with your compiler. +#define DR_FLAC_NO_WCHAR + Disables all functions ending with `_w`. Use this if your compiler does not provide wchar.h. Not required if DR_FLAC_NO_STDIO is also defined. + Notes @@ -232,7 +235,7 @@ extern "C" { #define DRFLAC_VERSION_MAJOR 0 #define DRFLAC_VERSION_MINOR 12 -#define DRFLAC_VERSION_REVISION 31 +#define DRFLAC_VERSION_REVISION 39 #define DRFLAC_VERSION_STRING DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION) #include <stddef.h> /* For size_t. */ @@ -244,7 +247,7 @@ typedef signed short drflac_int16; typedef unsigned short drflac_uint16; typedef signed int drflac_int32; typedef unsigned int drflac_uint32; -#if defined(_MSC_VER) +#if defined(_MSC_VER) && !defined(__clang__) typedef signed __int64 drflac_int64; typedef unsigned __int64 drflac_uint64; #else @@ -261,7 +264,7 @@ typedef unsigned int drflac_uint32; #pragma GCC diagnostic pop #endif #endif -#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__powerpc64__) +#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined(_M_IA64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__powerpc64__) typedef drflac_uint64 drflac_uintptr; #else typedef drflac_uint32 drflac_uintptr; @@ -383,15 +386,13 @@ typedef enum drflac_seek_origin_current } drflac_seek_origin; -/* Packing is important on this structure because we map this directly to the raw data within the SEEKTABLE metadata block. */ -#pragma pack(2) +/* The order of members in this structure is important because we map this directly to the raw data within the SEEKTABLE metadata block. */ typedef struct { drflac_uint64 firstPCMFrame; drflac_uint64 flacFrameOffset; /* The offset from the first byte of the header of the first frame. */ drflac_uint16 pcmFrameCount; } drflac_seekpoint; -#pragma pack() typedef struct { @@ -1280,15 +1281,13 @@ typedef struct const char* pRunningData; } drflac_cuesheet_track_iterator; -/* Packing is important on this structure because we map this directly to the raw data within the CUESHEET metadata block. */ -#pragma pack(4) +/* The order of members here is important because we map this directly to the raw data within the CUESHEET metadata block. */ typedef struct { drflac_uint64 offset; drflac_uint8 index; drflac_uint8 reserved[3]; } drflac_cuesheet_track_index; -#pragma pack() typedef struct { @@ -1363,9 +1362,15 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat I am using "__inline__" only when we're compiling in strict ANSI mode. */ #if defined(__STRICT_ANSI__) - #define DRFLAC_INLINE __inline__ __attribute__((always_inline)) + #define DRFLAC_GNUC_INLINE_HINT __inline__ #else - #define DRFLAC_INLINE inline __attribute__((always_inline)) + #define DRFLAC_GNUC_INLINE_HINT inline + #endif + + #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)) || defined(__clang__) + #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT __attribute__((always_inline)) + #else + #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT #endif #elif defined(__WATCOMC__) #define DRFLAC_INLINE __inline @@ -1378,7 +1383,7 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat #define DRFLAC_X64 #elif defined(__i386) || defined(_M_IX86) #define DRFLAC_X86 -#elif defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64) +#elif defined(__arm__) || defined(_M_ARM) || defined(__arm64) || defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64) #define DRFLAC_ARM #endif @@ -1431,16 +1436,6 @@ Unfortuantely dr_flac depends on this for a few things so we're just going to di #if defined(DRFLAC_ARM) #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)) #define DRFLAC_SUPPORT_NEON - #endif - - /* Fall back to looking for the #include file. */ - #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include) - #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>) - #define DRFLAC_SUPPORT_NEON - #endif - #endif - - #if defined(DRFLAC_SUPPORT_NEON) #include <arm_neon.h> #endif #endif @@ -1519,9 +1514,7 @@ static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void) { #if defined(DRFLAC_SUPPORT_SSE41) #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41) - #if defined(DRFLAC_X64) - return DRFLAC_TRUE; /* 64-bit targets always support SSE4.1. */ - #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__) + #if defined(__SSE4_1__) || defined(__AVX__) return DRFLAC_TRUE; /* If the compiler is allowed to freely generate SSE41 code we can assume support. */ #else #if defined(DRFLAC_NO_CPUID) @@ -1586,18 +1579,21 @@ static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void) extern __inline drflac_uint64 _watcom_bswap64(drflac_uint64); #pragma aux _watcom_bswap16 = \ "xchg al, ah" \ - parm [ax] \ - modify [ax]; + parm [ax] \ + value [ax] \ + modify nomemory; #pragma aux _watcom_bswap32 = \ - "bswap eax" \ - parm [eax] \ - modify [eax]; + "bswap eax" \ + parm [eax] \ + value [eax] \ + modify nomemory; #pragma aux _watcom_bswap64 = \ "bswap eax" \ "bswap edx" \ "xchg eax,edx" \ parm [eax edx] \ - modify [eax edx]; + value [eax edx] \ + modify nomemory; #endif @@ -1698,6 +1694,10 @@ typedef drflac_int32 drflac_result; #define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE 9 #define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE 10 +#define DRFLAC_SEEKPOINT_SIZE_IN_BYTES 18 +#define DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES 36 +#define DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES 12 + #define drflac_align(x, a) ((((x) + (a) - 1) / (a)) * (a)) @@ -1909,6 +1909,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n) return n; } +static DRFLAC_INLINE drflac_uint32 drflac__be2host_32_ptr_unaligned(const void* pData) +{ + const drflac_uint8* pNum = (drflac_uint8*)pData; + return *(pNum) << 24 | *(pNum+1) << 16 | *(pNum+2) << 8 | *(pNum+3); +} + static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n) { if (drflac__is_little_endian()) { @@ -1928,6 +1934,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n) return n; } +static DRFLAC_INLINE drflac_uint32 drflac__le2host_32_ptr_unaligned(const void* pData) +{ + const drflac_uint8* pNum = (drflac_uint8*)pData; + return *pNum | *(pNum+1) << 8 | *(pNum+2) << 16 | *(pNum+3) << 24; +} + static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n) { @@ -2429,6 +2441,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i if (!drflac__reload_cache(bs)) { return DRFLAC_FALSE; } + if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + /* This happens when we get to end of stream */ + return DRFLAC_FALSE; + } *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo); bs->consumedBits += bitCountLo; @@ -2684,6 +2700,10 @@ static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs) #if defined(__WATCOMC__) && defined(__386__) #define DRFLAC_IMPLEMENT_CLZ_WATCOM #endif +#ifdef __MRC__ +#include <intrinsics.h> +#define DRFLAC_IMPLEMENT_CLZ_MRC +#endif static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x) { @@ -2724,6 +2744,8 @@ static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported(void) /* Fast compile time check for ARM. */ #if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) return DRFLAC_TRUE; +#elif defined(__MRC__) + return DRFLAC_TRUE; #else /* If the compiler itself does not support the intrinsic then we'll need to return false. */ #ifdef DRFLAC_HAS_LZCNT_INTRINSIC @@ -2833,6 +2855,15 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x) #ifdef DRFLAC_IMPLEMENT_CLZ_WATCOM static __inline drflac_uint32 drflac__clz_watcom (drflac_uint32); +#ifdef DRFLAC_IMPLEMENT_CLZ_WATCOM_LZCNT +/* Use the LZCNT instruction (only available on some processors since the 2010s). */ +#pragma aux drflac__clz_watcom_lzcnt = \ + "db 0F3h, 0Fh, 0BDh, 0C0h" /* lzcnt eax, eax */ \ + parm [eax] \ + value [eax] \ + modify nomemory; +#else +/* Use the 386+-compatible implementation. */ #pragma aux drflac__clz_watcom = \ "bsr eax, eax" \ "xor eax, 31" \ @@ -2840,6 +2871,7 @@ static __inline drflac_uint32 drflac__clz_watcom (drflac_uint32); value [eax] \ modify exact [eax] nomemory; #endif +#endif static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x) { @@ -2851,8 +2883,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x) { #ifdef DRFLAC_IMPLEMENT_CLZ_MSVC return drflac__clz_msvc(x); +#elif defined(DRFLAC_IMPLEMENT_CLZ_WATCOM_LZCNT) + return drflac__clz_watcom_lzcnt(x); #elif defined(DRFLAC_IMPLEMENT_CLZ_WATCOM) return (x == 0) ? sizeof(x)*8 : drflac__clz_watcom(x); +#elif defined(__MRC__) + return __cntlzw(x); #else return drflac__clz_software(x); #endif @@ -2872,9 +2908,24 @@ static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, } } + if (bs->cache == 1) { + /* Not catching this would lead to undefined behaviour: a shift of a 32-bit number by 32 or more is undefined */ + *pOffsetOut = zeroCounter + (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs) - 1; + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + return DRFLAC_TRUE; + } + setBitOffsetPlus1 = drflac__clz(bs->cache); setBitOffsetPlus1 += 1; + if (setBitOffsetPlus1 > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + /* This happens when we get to end of stream */ + return DRFLAC_FALSE; + } + bs->consumedBits += setBitOffsetPlus1; bs->cache <<= setBitOffsetPlus1; @@ -2989,6 +3040,25 @@ static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64 } +static DRFLAC_INLINE drflac_uint32 drflac__ilog2_u32(drflac_uint32 x) +{ +#if 1 /* Needs optimizing. */ + drflac_uint32 result = 0; + while (x > 0) { + result += 1; + x >>= 1; + } + + return result; +#endif +} + +static DRFLAC_INLINE drflac_bool32 drflac__use_64_bit_prediction(drflac_uint32 bitsPerSample, drflac_uint32 order, drflac_uint32 precision) +{ + /* https://web.archive.org/web/20220205005724/https://github.com/ietf-wg-cellar/flac-specification/blob/37a49aa48ba4ba12e8757badfc59c0df35435fec/rfc_backmatter.md */ + return bitsPerSample + precision + drflac__ilog2_u32(order) > 32; +} + /* The next two functions are responsible for calculating the prediction. @@ -2996,6 +3066,9 @@ The next two functions are responsible for calculating the prediction. When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16. */ +#if defined(__clang__) +__attribute__((no_sanitize("signed-integer-overflow"))) +#endif static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) { drflac_int32 prediction = 0; @@ -3231,7 +3304,7 @@ static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the sake of readability and should only be used as a reference. */ -static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { drflac_uint32 i; @@ -3270,10 +3343,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drfla } - if (bitsPerSample+shift >= 32) { - pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i); + if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) { + pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i); } else { - pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i); + pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i); } } @@ -3370,6 +3443,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac if (!drflac__reload_cache(bs)) { return DRFLAC_FALSE; } + if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + /* This happens when we get to end of stream */ + return DRFLAC_FALSE; + } } riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo)); @@ -3450,6 +3527,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drf if (!drflac__reload_cache(bs)) { return DRFLAC_FALSE; } + if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + /* This happens when we get to end of stream */ + return DRFLAC_FALSE; + } bs_cache = bs->cache; bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount; @@ -3560,6 +3641,11 @@ static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac return DRFLAC_FALSE; } + if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + /* This happens when we get to end of stream */ + return DRFLAC_FALSE; + } + bs_cache = bs->cache; bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount; } @@ -3646,7 +3732,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorde return DRFLAC_TRUE; } -static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; drflac_uint32 zeroCountPart0 = 0; @@ -3664,14 +3750,14 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b DRFLAC_ASSERT(bs != NULL); DRFLAC_ASSERT(pSamplesOut != NULL); - if (order == 0) { - return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + if (lpcOrder == 0) { + return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut); } riceParamMask = (drflac_uint32)~((~0UL) << riceParam); pSamplesOutEnd = pSamplesOut + (count & ~3); - if (bitsPerSample+shift > 32) { + if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) { while (pSamplesOut < pSamplesOutEnd) { /* Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version @@ -3699,10 +3785,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; - pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); - pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1); - pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2); - pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3); + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0); + pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 1); + pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 2); + pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 3); pSamplesOut += 4; } @@ -3730,10 +3816,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; - pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); - pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1); - pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2); - pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3); + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0); + pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 1); + pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 2); + pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 3); pSamplesOut += 4; } @@ -3753,10 +3839,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b /*riceParamPart0 = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/ /* Sample reconstruction. */ - if (bitsPerSample+shift > 32) { - pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); + if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) { + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0); } else { - pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0); } i += 1; @@ -4212,20 +4298,20 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac return DRFLAC_TRUE; } -static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { DRFLAC_ASSERT(bs != NULL); DRFLAC_ASSERT(pSamplesOut != NULL); /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */ - if (order > 0 && order <= 12) { - if (bitsPerSample+shift > 32) { - return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut); + if (lpcOrder > 0 && lpcOrder <= 12) { + if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) { + return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut); } else { - return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut); + return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut); } } else { - return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut); } } #endif @@ -4364,7 +4450,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_32(drflac_ const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; - riceParamMask = ~((~0UL) << riceParam); + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); riceParamMask128 = vdupq_n_u32(riceParamMask); riceParam128 = vdupq_n_s32(riceParam); @@ -4550,10 +4636,13 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_ int32x4_t riceParam128; int64x1_t shift64; uint32x4_t one128; + int64x2_t prediction128 = { 0 }; + uint32x4_t zeroCountPart128; + uint32x4_t riceParamPart128; const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; - riceParamMask = ~((~0UL) << riceParam); + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); riceParamMask128 = vdupq_n_u32(riceParamMask); riceParam128 = vdupq_n_s32(riceParam); @@ -4562,7 +4651,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_ /* Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than - what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results + what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted so I think there's opportunity for this to be simplified. */ @@ -4630,10 +4719,6 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_ /* For this version we are doing one sample at a time. */ while (pDecodedSamples < pDecodedSamplesEnd) { - int64x2_t prediction128; - uint32x4_t zeroCountPart128; - uint32x4_t riceParamPart128; - if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) || !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) || !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) || @@ -4710,41 +4795,41 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_ return DRFLAC_TRUE; } -static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { DRFLAC_ASSERT(bs != NULL); DRFLAC_ASSERT(pSamplesOut != NULL); /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */ - if (order > 0 && order <= 12) { - if (bitsPerSample+shift > 32) { - return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut); + if (lpcOrder > 0 && lpcOrder <= 12) { + if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) { + return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut); } else { - return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut); + return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut); } } else { - return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut); } } #endif -static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { #if defined(DRFLAC_SUPPORT_SSE41) if (drflac__gIsSSE41Supported) { - return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut); } else #elif defined(DRFLAC_SUPPORT_NEON) if (drflac__gIsNEONSupported) { - return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut); } else #endif { /* Scalar fallback. */ #if 0 - return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut); #else - return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut); #endif } } @@ -4765,7 +4850,10 @@ static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_ return DRFLAC_TRUE; } -static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +#if defined(__clang__) +__attribute__((no_sanitize("signed-integer-overflow"))) +#endif +static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { drflac_uint32 i; @@ -4782,10 +4870,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* pSamplesOut[i] = 0; } - if (bitsPerSample >= 24) { - pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i); + if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) { + pSamplesOut[i] += drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i); } else { - pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i); + pSamplesOut[i] += drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i); } } @@ -4798,7 +4886,7 @@ Reads and decodes the residual for the sub-frame the decoder is currently sittin when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The <blockSize> and <order> parameters are used to determine how many residual values need to be decoded. */ -static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) +static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) { drflac_uint8 residualMethod; drflac_uint8 partitionOrder; @@ -4818,7 +4906,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_ } /* Ignore the first <order> values. */ - pDecodedSamples += order; + pDecodedSamples += lpcOrder; if (!drflac__read_uint8(bs, 4, &partitionOrder)) { return DRFLAC_FALSE; @@ -4833,11 +4921,11 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_ } /* Validation check. */ - if ((blockSize / (1 << partitionOrder)) < order) { + if ((blockSize / (1 << partitionOrder)) < lpcOrder) { return DRFLAC_FALSE; } - samplesInPartition = (blockSize / (1 << partitionOrder)) - order; + samplesInPartition = (blockSize / (1 << partitionOrder)) - lpcOrder; partitionsRemaining = (1 << partitionOrder); for (;;) { drflac_uint8 riceParam = 0; @@ -4858,7 +4946,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_ } if (riceParam != 0xFF) { - if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) { + if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) { return DRFLAC_FALSE; } } else { @@ -4867,7 +4955,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_ return DRFLAC_FALSE; } - if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) { + if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) { return DRFLAC_FALSE; } } @@ -5036,7 +5124,7 @@ static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 pDecodedSamples[i] = sample; } - if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) { + if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, 4, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) { return DRFLAC_FALSE; } @@ -5091,7 +5179,7 @@ static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 bl } } - if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) { + if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) { return DRFLAC_FALSE; } @@ -5219,6 +5307,9 @@ static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_u return DRFLAC_FALSE; } crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16); + if (header->blockSizeInPCMFrames == 0xFFFF) { + return DRFLAC_FALSE; /* Frame is too big. This is the size of the frame minus 1. The STREAMINFO block defines the max block size which is 16-bits. Adding one will make it 17 bits and therefore too big. */ + } header->blockSizeInPCMFrames += 1; } else { DRFLAC_ASSERT(blockSize >= 8); @@ -5257,6 +5348,11 @@ static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_u header->bitsPerSample = streaminfoBitsPerSample; } + if (header->bitsPerSample != streaminfoBitsPerSample) { + /* If this subframe has a different bitsPerSample then streaminfo or the first frame, reject it */ + return DRFLAC_FALSE; + } + if (!drflac__read_uint8(bs, 8, &header->crc8)) { return DRFLAC_FALSE; } @@ -5343,6 +5439,11 @@ static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, subframeBitsPerSample += 1; } + if (subframeBitsPerSample > 32) { + /* libFLAC and ffmpeg reject 33-bit subframes as well */ + return DRFLAC_FALSE; + } + /* Need to handle wasted bits per sample. */ if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) { return DRFLAC_FALSE; @@ -6013,6 +6114,11 @@ static drflac_bool32 drflac__seek_to_pcm_frame__seek_table(drflac* pFlac, drflac return DRFLAC_FALSE; } + /* Do not use the seektable if pcmFramIndex is not coverd by it. */ + if (pFlac->pSeekpoints[0].firstPCMFrame > pcmFrameIndex) { + return DRFLAC_FALSE; + } + for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) { if (pFlac->pSeekpoints[iSeekpoint].firstPCMFrame >= pcmFrameIndex) { break; @@ -6360,7 +6466,7 @@ static void drflac__free_from_callbacks(void* p, const drflac_allocation_callbac } -static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeektableSize, drflac_allocation_callbacks* pAllocationCallbacks) +static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeekpointCount, drflac_allocation_callbacks* pAllocationCallbacks) { /* We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that @@ -6420,32 +6526,37 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d seektableSize = blockSize; if (onMeta) { + drflac_uint32 seekpointCount; drflac_uint32 iSeekpoint; void* pRawData; - pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + seekpointCount = blockSize/DRFLAC_SEEKPOINT_SIZE_IN_BYTES; + + pRawData = drflac__malloc_from_callbacks(seekpointCount * sizeof(drflac_seekpoint), pAllocationCallbacks); if (pRawData == NULL) { return DRFLAC_FALSE; } - if (onRead(pUserData, pRawData, blockSize) != blockSize) { - drflac__free_from_callbacks(pRawData, pAllocationCallbacks); - return DRFLAC_FALSE; - } + /* We need to read seekpoint by seekpoint and do some processing. */ + for (iSeekpoint = 0; iSeekpoint < seekpointCount; ++iSeekpoint) { + drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint; - metadata.pRawData = pRawData; - metadata.rawDataSize = blockSize; - metadata.data.seektable.seekpointCount = blockSize/sizeof(drflac_seekpoint); - metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData; + if (onRead(pUserData, pSeekpoint, DRFLAC_SEEKPOINT_SIZE_IN_BYTES) != DRFLAC_SEEKPOINT_SIZE_IN_BYTES) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } - /* Endian swap. */ - for (iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) { - drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint; + /* Endian swap. */ pSeekpoint->firstPCMFrame = drflac__be2host_64(pSeekpoint->firstPCMFrame); pSeekpoint->flacFrameOffset = drflac__be2host_64(pSeekpoint->flacFrameOffset); pSeekpoint->pcmFrameCount = drflac__be2host_16(pSeekpoint->pcmFrameCount); } + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + metadata.data.seektable.seekpointCount = seekpointCount; + metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData; + onMeta(pUserDataMD, &metadata); drflac__free_from_callbacks(pRawData, pAllocationCallbacks); @@ -6480,7 +6591,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d pRunningData = (const char*)pRawData; pRunningDataEnd = (const char*)pRawData + blockSize; - metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.vorbis_comment.vendorLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4; /* Need space for the rest of the block */ if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ @@ -6488,7 +6599,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d return DRFLAC_FALSE; } metadata.data.vorbis_comment.vendor = pRunningData; pRunningData += metadata.data.vorbis_comment.vendorLength; - metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.vorbis_comment.commentCount = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4; /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */ if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */ @@ -6506,7 +6617,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d return DRFLAC_FALSE; } - commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + commentLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4; if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ drflac__free_from_callbacks(pRawData, pAllocationCallbacks); return DRFLAC_FALSE; @@ -6530,9 +6641,15 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d void* pRawData; const char* pRunningData; const char* pRunningDataEnd; + size_t bufferSize; drflac_uint8 iTrack; drflac_uint8 iIndex; + void* pTrackData; + /* + This needs to be loaded in two passes. The first pass is used to calculate the size of the memory allocation + we need for storing the necessary data. The second pass will fill that buffer with usable data. + */ pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); if (pRawData == NULL) { return DRFLAC_FALSE; @@ -6553,38 +6670,91 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8; metadata.data.cuesheet.isCD = (pRunningData[0] & 0x80) != 0; pRunningData += 259; metadata.data.cuesheet.trackCount = pRunningData[0]; pRunningData += 1; - metadata.data.cuesheet.pTrackData = pRunningData; + metadata.data.cuesheet.pTrackData = NULL; /* Will be filled later. */ - /* Check that the cuesheet tracks are valid before passing it to the callback */ - for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) { - drflac_uint8 indexCount; - drflac_uint32 indexPointSize; + /* Pass 1: Calculate the size of the buffer for the track data. */ + { + const char* pRunningDataSaved = pRunningData; /* Will be restored at the end in preparation for the second pass. */ - if (pRunningDataEnd - pRunningData < 36) { - drflac__free_from_callbacks(pRawData, pAllocationCallbacks); - return DRFLAC_FALSE; + bufferSize = metadata.data.cuesheet.trackCount * DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES; + + for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) { + drflac_uint8 indexCount; + drflac_uint32 indexPointSize; + + if (pRunningDataEnd - pRunningData < DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + /* Skip to the index point count */ + pRunningData += 35; + + indexCount = pRunningData[0]; + pRunningData += 1; + + bufferSize += indexCount * sizeof(drflac_cuesheet_track_index); + + /* Quick validation check. */ + indexPointSize = indexCount * DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES; + if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + pRunningData += indexPointSize; } - /* Skip to the index point count */ - pRunningData += 35; - indexCount = pRunningData[0]; pRunningData += 1; - indexPointSize = indexCount * sizeof(drflac_cuesheet_track_index); - if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) { + pRunningData = pRunningDataSaved; + } + + /* Pass 2: Allocate a buffer and fill the data. Validation was done in the step above so can be skipped. */ + { + char* pRunningTrackData; + + pTrackData = drflac__malloc_from_callbacks(bufferSize, pAllocationCallbacks); + if (pTrackData == NULL) { drflac__free_from_callbacks(pRawData, pAllocationCallbacks); return DRFLAC_FALSE; } - /* Endian swap. */ - for (iIndex = 0; iIndex < indexCount; ++iIndex) { - drflac_cuesheet_track_index* pTrack = (drflac_cuesheet_track_index*)pRunningData; - pRunningData += sizeof(drflac_cuesheet_track_index); - pTrack->offset = drflac__be2host_64(pTrack->offset); + pRunningTrackData = (char*)pTrackData; + + for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) { + drflac_uint8 indexCount; + + DRFLAC_COPY_MEMORY(pRunningTrackData, pRunningData, DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES); + pRunningData += DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES-1; /* Skip forward, but not beyond the last byte in the CUESHEET_TRACK block which is the index count. */ + pRunningTrackData += DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES-1; + + /* Grab the index count for the next part. */ + indexCount = pRunningData[0]; + pRunningData += 1; + pRunningTrackData += 1; + + /* Extract each track index. */ + for (iIndex = 0; iIndex < indexCount; ++iIndex) { + drflac_cuesheet_track_index* pTrackIndex = (drflac_cuesheet_track_index*)pRunningTrackData; + + DRFLAC_COPY_MEMORY(pRunningTrackData, pRunningData, DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES); + pRunningData += DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES; + pRunningTrackData += sizeof(drflac_cuesheet_track_index); + + pTrackIndex->offset = drflac__be2host_64(pTrackIndex->offset); + } } + + metadata.data.cuesheet.pTrackData = pTrackData; } + /* The original data is no longer needed. */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + pRawData = NULL; + onMeta(pUserDataMD, &metadata); - drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + drflac__free_from_callbacks(pTrackData, pAllocationCallbacks); + pTrackData = NULL; } } break; @@ -6615,28 +6785,28 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d pRunningData = (const char*)pRawData; pRunningDataEnd = (const char*)pRawData + blockSize; - metadata.data.picture.type = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; - metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.type = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4; + metadata.data.picture.mimeLength = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4; /* Need space for the rest of the block */ if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ drflac__free_from_callbacks(pRawData, pAllocationCallbacks); return DRFLAC_FALSE; } - metadata.data.picture.mime = pRunningData; pRunningData += metadata.data.picture.mimeLength; - metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.mime = pRunningData; pRunningData += metadata.data.picture.mimeLength; + metadata.data.picture.descriptionLength = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4; /* Need space for the rest of the block */ if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ drflac__free_from_callbacks(pRawData, pAllocationCallbacks); return DRFLAC_FALSE; } - metadata.data.picture.description = pRunningData; pRunningData += metadata.data.picture.descriptionLength; - metadata.data.picture.width = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; - metadata.data.picture.height = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; - metadata.data.picture.colorDepth = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; - metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; - metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.description = pRunningData; pRunningData += metadata.data.picture.descriptionLength; + metadata.data.picture.width = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4; + metadata.data.picture.height = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4; + metadata.data.picture.colorDepth = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4; + metadata.data.picture.indexColorCount = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4; + metadata.data.picture.pictureDataSize = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4; metadata.data.picture.pPictureData = (const drflac_uint8*)pRunningData; /* Need space for the picture after the block */ @@ -6714,9 +6884,9 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d } } - *pSeektablePos = seektablePos; - *pSeektableSize = seektableSize; - *pFirstFramePos = runningFilePos; + *pSeektablePos = seektablePos; + *pSeekpointCount = seektableSize / DRFLAC_SEEKPOINT_SIZE_IN_BYTES; + *pFirstFramePos = runningFilePos; return DRFLAC_TRUE; } @@ -7746,11 +7916,11 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac drflac_uint32 wholeSIMDVectorCountPerChannel; drflac_uint32 decodedSamplesAllocationSize; #ifndef DR_FLAC_NO_OGG - drflac_oggbs oggbs; + drflac_oggbs* pOggbs = NULL; #endif drflac_uint64 firstFramePos; drflac_uint64 seektablePos; - drflac_uint32 seektableSize; + drflac_uint32 seekpointCount; drflac_allocation_callbacks allocationCallbacks; drflac* pFlac; @@ -7804,18 +7974,21 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac /* There's additional data required for Ogg streams. */ if (init.container == drflac_container_ogg) { allocationSize += sizeof(drflac_oggbs); - } - DRFLAC_ZERO_MEMORY(&oggbs, sizeof(oggbs)); - if (init.container == drflac_container_ogg) { - oggbs.onRead = onRead; - oggbs.onSeek = onSeek; - oggbs.pUserData = pUserData; - oggbs.currentBytePos = init.oggFirstBytePos; - oggbs.firstBytePos = init.oggFirstBytePos; - oggbs.serialNumber = init.oggSerial; - oggbs.bosPageHeader = init.oggBosHeader; - oggbs.bytesRemainingInPage = 0; + pOggbs = (drflac_oggbs*)drflac__malloc_from_callbacks(sizeof(*pOggbs), &allocationCallbacks); + if (pOggbs == NULL) { + return NULL; /*DRFLAC_OUT_OF_MEMORY;*/ + } + + DRFLAC_ZERO_MEMORY(pOggbs, sizeof(*pOggbs)); + pOggbs->onRead = onRead; + pOggbs->onSeek = onSeek; + pOggbs->pUserData = pUserData; + pOggbs->currentBytePos = init.oggFirstBytePos; + pOggbs->firstBytePos = init.oggFirstBytePos; + pOggbs->serialNumber = init.oggSerial; + pOggbs->bosPageHeader = init.oggBosHeader; + pOggbs->bytesRemainingInPage = 0; } #endif @@ -7824,9 +7997,9 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading and decoding the metadata. */ - firstFramePos = 42; /* <-- We know we are at byte 42 at this point. */ - seektablePos = 0; - seektableSize = 0; + firstFramePos = 42; /* <-- We know we are at byte 42 at this point. */ + seektablePos = 0; + seekpointCount = 0; if (init.hasMetadataBlocks) { drflac_read_proc onReadOverride = onRead; drflac_seek_proc onSeekOverride = onSeek; @@ -7836,20 +8009,26 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac if (init.container == drflac_container_ogg) { onReadOverride = drflac__on_read_ogg; onSeekOverride = drflac__on_seek_ogg; - pUserDataOverride = (void*)&oggbs; + pUserDataOverride = (void*)pOggbs; } #endif - if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize, &allocationCallbacks)) { + if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seekpointCount, &allocationCallbacks)) { + #ifndef DR_FLAC_NO_OGG + drflac__free_from_callbacks(pOggbs, &allocationCallbacks); + #endif return NULL; } - allocationSize += seektableSize; + allocationSize += seekpointCount * sizeof(drflac_seekpoint); } pFlac = (drflac*)drflac__malloc_from_callbacks(allocationSize, &allocationCallbacks); if (pFlac == NULL) { + #ifndef DR_FLAC_NO_OGG + drflac__free_from_callbacks(pOggbs, &allocationCallbacks); + #endif return NULL; } @@ -7859,8 +8038,12 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac #ifndef DR_FLAC_NO_OGG if (init.container == drflac_container_ogg) { - drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize); - *pInternalOggbs = oggbs; + drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + (seekpointCount * sizeof(drflac_seekpoint))); + DRFLAC_COPY_MEMORY(pInternalOggbs, pOggbs, sizeof(*pOggbs)); + + /* At this point the pOggbs object has been handed over to pInternalOggbs and can be freed. */ + drflac__free_from_callbacks(pOggbs, &allocationCallbacks); + pOggbs = NULL; /* The Ogg bistream needs to be layered on top of the original bitstream. */ pFlac->bs.onRead = drflac__on_read_ogg; @@ -7884,7 +8067,7 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac { /* If we have a seektable we need to load it now, making sure we move back to where we were previously. */ if (seektablePos != 0) { - pFlac->seekpointCount = seektableSize / sizeof(*pFlac->pSeekpoints); + pFlac->seekpointCount = seekpointCount; pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize); DRFLAC_ASSERT(pFlac->bs.onSeek != NULL); @@ -7892,18 +8075,20 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac /* Seek to the seektable, then just read directly into our seektable buffer. */ if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) { - if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints, seektableSize) == seektableSize) { - /* Endian swap. */ - drflac_uint32 iSeekpoint; - for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) { + drflac_uint32 iSeekpoint; + + for (iSeekpoint = 0; iSeekpoint < seekpointCount; iSeekpoint += 1) { + if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints + iSeekpoint, DRFLAC_SEEKPOINT_SIZE_IN_BYTES) == DRFLAC_SEEKPOINT_SIZE_IN_BYTES) { + /* Endian swap. */ pFlac->pSeekpoints[iSeekpoint].firstPCMFrame = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstPCMFrame); pFlac->pSeekpoints[iSeekpoint].flacFrameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].flacFrameOffset); pFlac->pSeekpoints[iSeekpoint].pcmFrameCount = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].pcmFrameCount); + } else { + /* Failed to read the seektable. Pretend we don't have one. */ + pFlac->pSeekpoints = NULL; + pFlac->seekpointCount = 0; + break; } - } else { - /* Failed to read the seektable. Pretend we don't have one. */ - pFlac->pSeekpoints = NULL; - pFlac->seekpointCount = 0; } /* We need to seek back to where we were. If this fails it's a critical error. */ @@ -7952,7 +8137,9 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac #ifndef DR_FLAC_NO_STDIO #include <stdio.h> +#ifndef DR_FLAC_NO_WCHAR #include <wchar.h> /* For wcslen(), wcsrtombs() */ +#endif /* drflac_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */ #include <errno.h> @@ -8418,6 +8605,7 @@ fallback, so if you notice your compiler not detecting this properly I'm happy t #endif #endif +#ifndef DR_FLAC_NO_WCHAR static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drflac_allocation_callbacks* pAllocationCallbacks) { if (ppFile != NULL) { @@ -8446,10 +8634,23 @@ static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, cons } #else /* - Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can - think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for - maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility. + Use fopen() on anything other than Windows. Requires a conversion. This is annoying because + fopen() is locale specific. The only real way I can think of to do this is with wcsrtombs(). Note + that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for + maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler + error I'll look into improving compatibility. */ + + /* + Some compilers don't support wchar_t or wcsrtombs() which we're using below. In this case we just + need to abort with an error. If you encounter a compiler lacking such support, add it to this list + and submit a bug report and it'll be added to the library upstream. + */ + #if defined(__DJGPP__) + { + /* Nothing to do here. This will fall through to the error check below. */ + } + #else { mbstate_t mbs; size_t lenMB; @@ -8491,6 +8692,7 @@ static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, cons drflac__free_from_callbacks(pFilePathMB, pAllocationCallbacks); } + #endif if (*ppFile == NULL) { return DRFLAC_ERROR; @@ -8499,6 +8701,7 @@ static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, cons return DRFLAC_SUCCESS; } +#endif static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead) { @@ -8531,6 +8734,7 @@ DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocati return pFlac; } +#ifndef DR_FLAC_NO_WCHAR DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks) { drflac* pFlac; @@ -8548,6 +8752,7 @@ DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_all return pFlac; } +#endif DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) { @@ -8567,6 +8772,7 @@ DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_ return pFlac; } +#ifndef DR_FLAC_NO_WCHAR DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) { drflac* pFlac; @@ -8584,6 +8790,7 @@ DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, dr return pFlac; } +#endif #endif /* DR_FLAC_NO_STDIO */ static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead) @@ -11781,7 +11988,7 @@ DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator return NULL; } - length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData); + length = drflac__le2host_32_ptr_unaligned(pIter->pRunningData); pIter->pRunningData += 4; pComment = pIter->pRunningData; @@ -11851,6 +12058,37 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat /* REVISION HISTORY ================ +v0.12.39 - 2022-09-17 + - Fix compilation with DJGPP. + - Fix compilation error with Visual Studio 2019 and the ARM build. + - Fix an error with SSE 4.1 detection. + - Add support for disabling wchar_t with DR_WAV_NO_WCHAR. + - Improve compatibility with compilers which lack support for explicit struct packing. + - Improve compatibility with low-end and embedded hardware by reducing the amount of stack + allocation when loading an Ogg encapsulated file. + +v0.12.38 - 2022-04-10 + - Fix compilation error on older versions of GCC. + +v0.12.37 - 2022-02-12 + - Improve ARM detection. + +v0.12.36 - 2022-02-07 + - Fix a compilation error with the ARM build. + +v0.12.35 - 2022-02-06 + - Fix a bug due to underestimating the amount of precision required for the prediction stage. + - Fix some bugs found from fuzz testing. + +v0.12.34 - 2022-01-07 + - Fix some misalignment bugs when reading metadata. + +v0.12.33 - 2021-12-22 + - Fix a bug with seeking when the seek table does not start at PCM frame 0. + +v0.12.32 - 2021-12-11 + - Fix a warning with Clang. + v0.12.31 - 2021-08-16 - Silence some warnings. |
