diff options
| author | raysan5 <[email protected]> | 2021-10-06 21:13:17 +0200 |
|---|---|---|
| committer | raysan5 <[email protected]> | 2021-10-06 21:13:17 +0200 |
| commit | 700d448d75debea32572dc87cf3add0f755fed43 (patch) | |
| tree | b26ac6b65012a9dad59bb35d483d2dce8027565b /src/external/sinfl.h | |
| parent | 8722ff7043a1c6844d59a9448e48aa5345c27058 (diff) | |
| download | raylib-700d448d75debea32572dc87cf3add0f755fed43.tar.gz raylib-700d448d75debea32572dc87cf3add0f755fed43.zip | |
Updated external libraries
Diffstat (limited to 'src/external/sinfl.h')
| -rw-r--r-- | src/external/sinfl.h | 284 |
1 files changed, 202 insertions, 82 deletions
diff --git a/src/external/sinfl.h b/src/external/sinfl.h index 37c1aaea..09f50d2b 100644 --- a/src/external/sinfl.h +++ b/src/external/sinfl.h @@ -33,16 +33,16 @@ this file implementation in *one* C or C++ file to prevent collisions. | Compressor name | Compression| Decompress.| Compr. size | Ratio | | ------------------------| -----------| -----------| ----------- | ----- | -| sdefl 1.0 -0 | 127 MB/s | 233 MB/s | 40004116 | 39.88 | -| sdefl 1.0 -1 | 111 MB/s | 259 MB/s | 38940674 | 38.82 | -| sdefl 1.0 -5 | 45 MB/s | 275 MB/s | 36577183 | 36.46 | -| sdefl 1.0 -7 | 38 MB/s | 276 MB/s | 36523781 | 36.41 | -| zlib 1.2.11 -1 | 72 MB/s | 307 MB/s | 42298774 | 42.30 | -| zlib 1.2.11 -6 | 24 MB/s | 313 MB/s | 36548921 | 36.55 | -| zlib 1.2.11 -9 | 20 MB/s | 314 MB/s | 36475792 | 36.48 | | miniz 1.0 -1 | 122 MB/s | 208 MB/s | 48510028 | 48.51 | | miniz 1.0 -6 | 27 MB/s | 260 MB/s | 36513697 | 36.51 | | miniz 1.0 -9 | 23 MB/s | 261 MB/s | 36460101 | 36.46 | +| zlib 1.2.11 -1 | 72 MB/s | 307 MB/s | 42298774 | 42.30 | +| zlib 1.2.11 -6 | 24 MB/s | 313 MB/s | 36548921 | 36.55 | +| zlib 1.2.11 -9 | 20 MB/s | 314 MB/s | 36475792 | 36.48 | +| sdefl 1.0 -0 | 127 MB/s | 371 MB/s | 40004116 | 39.88 | +| sdefl 1.0 -1 | 111 MB/s | 398 MB/s | 38940674 | 38.82 | +| sdefl 1.0 -5 | 45 MB/s | 420 MB/s | 36577183 | 36.46 | +| sdefl 1.0 -7 | 38 MB/s | 423 MB/s | 36523781 | 36.41 | | libdeflate 1.3 -1 | 147 MB/s | 667 MB/s | 39597378 | 39.60 | | libdeflate 1.3 -6 | 69 MB/s | 689 MB/s | 36648318 | 36.65 | | libdeflate 1.3 -9 | 13 MB/s | 672 MB/s | 35197141 | 35.20 | @@ -51,7 +51,7 @@ this file implementation in *one* C or C++ file to prevent collisions. ### Compression Results on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia): -| File | Original | `sdefl 0` | `sdefl 5` | `sdefl 7` | +| File | Original | `sdefl 0` | `sdefl 5` | `sdefl 7` | | :------ | ---------: | -----------------: | ---------: | ----------: | | dickens | 10.192.446 | 4,260,187| 3,845,261| 3,833,657 | | mozilla | 51.220.480 | 20,774,706 | 19,607,009 | 19,565,867 | @@ -121,12 +121,15 @@ extern "C" { #define SINFL_OFF_TBL_SIZE 402 struct sinfl { - int bits, bitcnt; + const unsigned char *bitptr; + unsigned long long bitbuf; + int bitcnt; + unsigned lits[SINFL_LIT_TBL_SIZE]; unsigned dsts[SINFL_OFF_TBL_SIZE]; }; -extern int sinflate(void *out, const void *in, int size); -extern int zsinflate(void *out, const void *in, int size); +extern int sinflate(void *out, int cap, const void *in, int size); +extern int zsinflate(void *out, int cap, const void *in, int size); #ifdef __cplusplus } @@ -137,6 +140,33 @@ extern int zsinflate(void *out, const void *in, int size); #ifdef SINFL_IMPLEMENTATION #include <string.h> /* memcpy, memset */ +#include <assert.h> /* assert */ + +#if defined(__GNUC__) || defined(__clang__) +#define sinfl_likely(x) __builtin_expect((x),1) +#define sinfl_unlikely(x) __builtin_expect((x),0) +#else +#define sinfl_likely(x) (x) +#define sinfl_unlikely(x) (x) +#endif + +#ifndef SINFL_NO_SIMD +#if __x86_64__ || defined(_WIN32) || defined(_WIN64) + #include <emmintrin.h> + #define sinfl_char16 __m128i + #define sinfl_char16_ld(p) _mm_loadu_si128((const __m128i *)(void*)(p)) + #define sinfl_char16_str(d,v) _mm_storeu_si128((__m128i*)(void*)(d), v) + #define sinfl_char16_char(c) _mm_set1_epi8(c) +#elif defined(__arm__) || defined(__aarch64__) + #include <arm_neon.h> + #define sinfl_char16 uint8x16_t + #define sinfl_char16_ld(p) vld1q_u8((const unsigned char*)(p)) + #define sinfl_char16_str(d,v) vst1q_u8((unsigned char*)(d), v) + #define sinfl_char16_char(c) vdupq_n_u8(c) +#else + #define SINFL_NO_SIMD +#endif +#endif static int sinfl_bsr(unsigned n) { @@ -147,20 +177,66 @@ sinfl_bsr(unsigned n) { return 31 - __builtin_clz(n); #endif } +static unsigned long long +sinfl_read64(const void *p) { + unsigned long long n; + memcpy(&n, p, 8); + return n; +} +#ifndef SINFL_NO_SIMD +static unsigned char* +sinfl_write128(unsigned char *dst, sinfl_char16 w) { + sinfl_char16_str(dst, w); + return dst + 8; +} +static void +sinfl_copy128(unsigned char **dst, unsigned char **src) { + sinfl_char16 n = sinfl_char16_ld(*src); + sinfl_char16_str(*dst, n); + *dst += 16, *src += 16; +} +#else +static unsigned char* +sinfl_write64(unsigned char *dst, unsigned long long w) { + memcpy(dst, &w, 8); + return dst + 8; +} +static void +sinfl_copy64(unsigned char **dst, unsigned char **src) { + unsigned long long n; + memcpy(&n, *src, 8); + memcpy(*dst, &n, 8); + *dst += 8, *src += 8; +} +#endif +static void +sinfl_refill(struct sinfl *s) { + s->bitbuf |= sinfl_read64(s->bitptr) << s->bitcnt; + s->bitptr += (63 - s->bitcnt) >> 3; + s->bitcnt |= 56; /* bitcount is in range [56,63] */ +} static int -sinfl_get(const unsigned char **src, const unsigned char *end, struct sinfl *s, - int n) { - const unsigned char *in = *src; - int v = s->bits & ((1 << n)-1); - s->bits >>= n; - s->bitcnt = s->bitcnt - n; - s->bitcnt = s->bitcnt < 0 ? 0 : s->bitcnt; - while (s->bitcnt < 16 && in < end) { - s->bits |= (*in++) << s->bitcnt; - s->bitcnt += 8; - } - *src = in; - return v; +sinfl_peek(struct sinfl *s, int cnt) { + assert(cnt >= 0 && cnt <= 56); + assert(cnt <= s->bitcnt); + return s->bitbuf & ((1ull << cnt) - 1); +} +static void +sinfl_consume(struct sinfl *s, int cnt) { + assert(cnt <= s->bitcnt); + s->bitbuf >>= cnt; + s->bitcnt -= cnt; +} +static int +sinfl__get(struct sinfl *s, int cnt) { + int res = sinfl_peek(s, cnt); + sinfl_consume(s, cnt); + return res; +} +static int +sinfl_get(struct sinfl *s, int cnt) { + sinfl_refill(s); + return sinfl__get(s, cnt); } struct sinfl_gen { int len; @@ -276,22 +352,22 @@ sinfl_build(unsigned *tbl, unsigned char *lens, int tbl_bits, int maxlen, } } static int -sinfl_decode(const unsigned char **in, const unsigned char *end, - struct sinfl *s, const unsigned *tbl, int bit_len) { - int idx = s->bits & ((1 << bit_len) - 1); +sinfl_decode(struct sinfl *s, const unsigned *tbl, int bit_len) { + sinfl_refill(s); + {int idx = sinfl_peek(s, bit_len); unsigned key = tbl[idx]; if (key & 0x10) { /* sub-table lookup */ int len = key & 0x0f; - sinfl_get(in, end, s, bit_len); - idx = s->bits & ((1 << len)-1); + sinfl_consume(s, bit_len); + idx = sinfl_peek(s, len); key = tbl[((key >> 16) & 0xffff) + (unsigned)idx]; } - sinfl_get(in, end, s, key & 0x0f); - return (key >> 16) & 0x0fff; + sinfl_consume(s, key & 0x0f); + return (key >> 16) & 0x0fff;} } static int -sinfl_decompress(unsigned char *out, const unsigned char *in, int size) { +sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size) { static const unsigned char order[] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; static const short dbase[30+2] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577}; @@ -302,19 +378,22 @@ sinfl_decompress(unsigned char *out, const unsigned char *in, int size) { static const unsigned char lbits[29+2] = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4, 4,4,4,5,5,5,5,0,0,0}; + const unsigned char *oe = out + cap; const unsigned char *e = in + size, *o = out; enum sinfl_states {hdr,stored,fixed,dyn,blk}; enum sinfl_states state = hdr; struct sinfl s = {0}; int last = 0; - sinfl_get(&in,e,&s,0); /* buffer input */ - while (in < e || s.bitcnt) { + s.bitptr = in; + while (1) { switch (state) { case hdr: { - int type = 0; /* block header */ - last = sinfl_get(&in,e,&s,1); - type = sinfl_get(&in,e,&s,2); + /* block header */ + int type = 0; + sinfl_refill(&s); + last = sinfl__get(&s,1); + type = sinfl__get(&s,2); switch (type) {default: return (int)(out-o); case 0x00: state = stored; break; @@ -322,10 +401,12 @@ sinfl_decompress(unsigned char *out, const unsigned char *in, int size) { case 0x02: state = dyn; break;} } break; case stored: { - int len; /* uncompressed block */ - sinfl_get(&in,e,&s,s.bitcnt & 7); - len = sinfl_get(&in,e,&s,16); - //int nlen = sinfl_get(&in,e,&s,16); + /* uncompressed block */ + int len; + sinfl_refill(&s); + sinfl__get(&s,s.bitcnt & 7); + len = sinfl__get(&s,16); + //int nlen = sinfl__get(&s,16); // @raysan5: Unused variable? in -= 2; s.bitcnt = 0; if (len > (e-in) || !len) @@ -353,72 +434,111 @@ sinfl_decompress(unsigned char *out, const unsigned char *in, int size) { int n, i; unsigned hlens[SINFL_PRE_TBL_SIZE]; unsigned char nlens[19] = {0}, lens[288+32]; - int nlit = 257 + sinfl_get(&in,e,&s,5); - int ndist = 1 + sinfl_get(&in,e,&s,5); - int nlen = 4 + sinfl_get(&in,e,&s,4); + + sinfl_refill(&s); + {int nlit = 257 + sinfl__get(&s,5); + int ndist = 1 + sinfl__get(&s,5); + int nlen = 4 + sinfl__get(&s,4); for (n = 0; n < nlen; n++) - nlens[order[n]] = (unsigned char)sinfl_get(&in,e,&s,3); + nlens[order[n]] = (unsigned char)sinfl_get(&s,3); sinfl_build(hlens, nlens, 7, 7, 19); /* decode code lengths */ for (n = 0; n < nlit + ndist;) { - int sym = sinfl_decode(&in, e, &s, hlens, 7); + int sym = sinfl_decode(&s, hlens, 7); switch (sym) {default: lens[n++] = (unsigned char)sym; break; - case 16: for (i=3+sinfl_get(&in,e,&s,2);i;i--,n++) lens[n]=lens[n-1]; break; - case 17: for (i=3+sinfl_get(&in,e,&s,3);i;i--,n++) lens[n]=0; break; - case 18: for (i=11+sinfl_get(&in,e,&s,7);i;i--,n++) lens[n]=0; break;} + case 16: for (i=3+sinfl_get(&s,2);i;i--,n++) lens[n]=lens[n-1]; break; + case 17: for (i=3+sinfl_get(&s,3);i;i--,n++) lens[n]=0; break; + case 18: for (i=11+sinfl_get(&s,7);i;i--,n++) lens[n]=0; break;} } /* build lit/dist tables */ sinfl_build(s.lits, lens, 10, 15, nlit); sinfl_build(s.dsts, lens + nlit, 8, 15, ndist); - state = blk; + state = blk;} } break; case blk: { /* decompress block */ - int i, sym = sinfl_decode(&in, e, &s, s.lits, 10); - if (sym > 256) {sym -= 257; /* match symbol */ - {int len = sinfl_get(&in, e, &s, lbits[sym]) + lbase[sym]; - int dsym = sinfl_decode(&in, e, &s, s.dsts, 8); - int offs = sinfl_get(&in, e, &s, dbits[dsym]) + dbase[dsym]; - if (offs > (int)(out-o)) { + int sym = sinfl_decode(&s, s.lits, 10); + if (sym < 256) { + /* literal */ + *out++ = (unsigned char)sym; + } else if (sym > 256) {sym -= 257; /* match symbol */ + sinfl_refill(&s); + {int len = sinfl__get(&s, lbits[sym]) + lbase[sym]; + int dsym = sinfl_decode(&s, s.dsts, 8); + int offs = sinfl__get(&s, dbits[dsym]) + dbase[dsym]; + unsigned char *dst = out, *src = out - offs; + if (sinfl_unlikely(offs > (int)(out-o))) { return (int)(out-o); - } else if (offs == 1) { - /* rle match copying */ - unsigned char c = *(out - offs); - unsigned long w = (c << 24) | (c << 16) | (c << 8) | c; - for (i = 0; i < len >> 2; ++i) { - memcpy(out, &w, 4); - out += 4; + } + out = out + len; + +#ifndef SINFL_NO_SIMD + if (sinfl_likely(oe - out >= 16 * 3)) { + if (offs >= 16) { + /* copy match */ + sinfl_copy128(&dst, &src); + sinfl_copy128(&dst, &src); + do sinfl_copy128(&dst, &src); + while (dst < out); + } else if (offs == 1) { + /* rle match copying */ + sinfl_char16 w = sinfl_char16_char(src[0]); + dst = sinfl_write128(dst, w); + dst = sinfl_write128(dst, w); + do dst = sinfl_write128(dst, w); + while (dst < out); + } else { + *dst++ = *src++; + *dst++ = *src++; + do *dst++ = *src++; + while (dst < out); } - len = len & 3; - } else if (offs >= 4) { - /* copy match */ - int wcnt = len >> 2; - for (i = 0; i < wcnt; ++i) { - unsigned long w = 0; - memcpy(&w, out - offs, 4); - memcpy(out, &w, 4); - out += 4; + } +#else + if (sinfl_likely(oe - out >= 3 * 8 - 3)) { + if (offs >= 8) { + /* copy match */ + sinfl_copy64(&dst, &src); + sinfl_copy64(&dst, &src); + do sinfl_copy64(&dst, &src); + while (dst < out); + } else if (offs == 1) { + /* rle match copying */ + unsigned int c = src[0]; + unsigned int hw = (c << 24u) | (c << 16u) | (c << 8u) | (unsigned)c; + unsigned long long w = (unsigned long long)hw << 32llu | hw; + dst = sinfl_write64(dst, w); + dst = sinfl_write64(dst, w); + do dst = sinfl_write64(dst, w); + while (dst < out); + } else { + *dst++ = *src++; + *dst++ = *src++; + do *dst++ = *src++; + while (dst < out); } - len = len & 3; } - for (i = 0; i < len; ++i) - {*out = *(out-offs), out++;} +#endif + else { + *dst++ = *src++; + *dst++ = *src++; + do *dst++ = *src++; + while (dst < out);} } - } else if (sym == 256) { + } else { /* end of block */ if (last) return (int)(out-o); state = hdr; break; - /* literal */ - } else *out++ = (unsigned char)sym; + } } break;} } return (int)(out-o); } extern int -sinflate(void *out, const void *in, int size) { - return sinfl_decompress((unsigned char*)out, (const unsigned char*)in, size); +sinflate(void *out, int cap, const void *in, int size) { + return sinfl_decompress((unsigned char*)out, cap, (const unsigned char*)in, size); } static unsigned sinfl_adler32(unsigned adler32, const unsigned char *in, int in_len) { @@ -448,11 +568,11 @@ sinfl_adler32(unsigned adler32, const unsigned char *in, int in_len) { } return (unsigned)(s2 << 16) + (unsigned)s1; } extern int -zsinflate(void *out, const void *mem, int size) { +zsinflate(void *out, int cap, const void *mem, int size) { const unsigned char *in = (const unsigned char*)mem; if (size >= 6) { const unsigned char *eob = in + size - 4; - int n = sinfl_decompress((unsigned char*)out, in + 2u, size); + int n = sinfl_decompress((unsigned char*)out, cap, in + 2u, size); unsigned a = sinfl_adler32(1u, (unsigned char*)out, n); unsigned h = eob[0] << 24 | eob[1] << 16 | eob[2] << 8 | eob[3] << 0; return a == h ? n : -1; |
