deps: update zlib to 337322d

PR-URL: https://github.com/nodejs/node/pull/48218
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Luigi Pinca <luigipinca@gmail.com>
This commit is contained in:
Node.js GitHub Bot
2023-05-31 07:24:04 +01:00
committed by GitHub
parent 8aa02e81d0
commit 5fae8bc406
26 changed files with 1273 additions and 28 deletions

4
deps/zlib/BUILD.gn vendored
View File

@@ -515,6 +515,10 @@ if (build_with_chromium) {
data = [ "google/test/data/" ]
if (is_ios) {
bundle_deps = [ "google:zlib_pak_bundle_data" ]
}
deps = [
":zlib",
"google:compression_utils",

View File

@@ -3,7 +3,7 @@ set(CMAKE_ALLOW_LOOSE_LOOP_CONSTRUCTS ON)
project(zlib C)
set(VERSION "1.2.13")
set(VERSION "1.2.13.1")
set(INSTALL_BIN_DIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables")
set(INSTALL_LIB_DIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE PATH "Installation directory for libraries")
@@ -22,6 +22,7 @@ check_include_file(stdint.h HAVE_STDINT_H)
check_include_file(stddef.h HAVE_STDDEF_H)
option(ENABLE_SIMD_OPTIMIZATIONS "Enable all SIMD optimizations" OFF)
option(ENABLE_SIMD_AVX512 "Enable SIMD AXV512 optimizations" OFF)
# TODO(cavalcantii): add support for other OSes (e.g. Android, fuchsia, osx)
# and architectures (e.g. Arm).
@@ -30,8 +31,13 @@ if (ENABLE_SIMD_OPTIMIZATIONS)
add_definitions(-DADLER32_SIMD_SSSE3)
add_definitions(-DINFLATE_CHUNK_READ_64LE)
add_definitions(-DCRC32_SIMD_SSE42_PCLMUL)
if (ENABLE_SIMD_AVX512)
add_definitions(-DCRC32_SIMD_AVX512_PCLMUL)
add_compile_options(-mvpclmulqdq -msse2 -mavx512f -mpclmul)
else()
add_compile_options(-msse4.2 -mpclmul)
endif()
add_definitions(-DDEFLATE_SLIDE_HASH_SSE2)
add_compile_options(-msse4.2 -mpclmul)
# Required by CPU features detection code.
add_definitions(-DX86_NOT_WINDOWS)
# Apparently some environments (e.g. CentOS) require to explicitly link

View File

@@ -257,6 +257,8 @@ int value;
struct inflate_state FAR *state;
if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
if (bits == 0)
return Z_OK;
state = (struct inflate_state FAR *)strm->state;
if (bits < 0) {
state->hold = 0;

View File

@@ -12,7 +12,7 @@
#include <fuzzer/FuzzedDataProvider.h>
#include "third_party/zlib/zlib.h"
#include "zlib.h"
// Fuzzer builds often have NDEBUG set, so roll our own assert macro.
#define ASSERT(cond) \

View File

@@ -31,6 +31,7 @@ int ZLIB_INTERNAL arm_cpu_enable_pmull = 0;
int ZLIB_INTERNAL x86_cpu_enable_sse2 = 0;
int ZLIB_INTERNAL x86_cpu_enable_ssse3 = 0;
int ZLIB_INTERNAL x86_cpu_enable_simd = 0;
int ZLIB_INTERNAL x86_cpu_enable_avx512 = 0;
#ifndef CPU_NO_SIMD
@@ -138,6 +139,10 @@ static void _cpu_check_features(void)
/* On x86 we simply use a instruction to check the CPU features.
* (i.e. CPUID).
*/
#ifdef CRC32_SIMD_AVX512_PCLMUL
#include <immintrin.h>
#include <xsaveintrin.h>
#endif
static void _cpu_check_features(void)
{
int x86_cpu_has_sse2;
@@ -164,6 +169,10 @@ static void _cpu_check_features(void)
x86_cpu_enable_simd = x86_cpu_has_sse2 &&
x86_cpu_has_sse42 &&
x86_cpu_has_pclmulqdq;
#ifdef CRC32_SIMD_AVX512_PCLMUL
x86_cpu_enable_avx512 = _xgetbv(0) & 0x00000040;
#endif
}
#endif
#endif

View File

@@ -14,5 +14,6 @@ extern int arm_cpu_enable_pmull;
extern int x86_cpu_enable_sse2;
extern int x86_cpu_enable_ssse3;
extern int x86_cpu_enable_simd;
extern int x86_cpu_enable_avx512;
void cpu_check_features(void);

14
deps/zlib/crc32.c vendored
View File

@@ -773,7 +773,19 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
}
#endif
#if defined(CRC32_SIMD_SSE42_PCLMUL)
#if defined(CRC32_SIMD_AVX512_PCLMUL)
if (x86_cpu_enable_avx512 && len >= Z_CRC32_AVX512_MINIMUM_LENGTH) {
/* crc32 64-byte chunks */
z_size_t chunk_size = len & ~Z_CRC32_AVX512_CHUNKSIZE_MASK;
crc = ~crc32_avx512_simd_(buf, chunk_size, ~(uint32_t)crc);
/* check remaining data */
len -= chunk_size;
if (!len)
return crc;
/* Fall into the default crc32 for the remaining data. */
buf += chunk_size;
}
#elif defined(CRC32_SIMD_SSE42_PCLMUL)
if (x86_cpu_enable_simd && len >= Z_CRC32_SSE42_MINIMUM_LENGTH) {
/* crc32 16-byte chunks */
z_size_t chunk_size = len & ~Z_CRC32_SSE42_CHUNKSIZE_MASK;

198
deps/zlib/crc32_simd.c vendored
View File

@@ -6,17 +6,207 @@
*/
#include "crc32_simd.h"
#if defined(CRC32_SIMD_SSE42_PCLMUL)
#if defined(CRC32_SIMD_AVX512_PCLMUL)
/*
* crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer
* length must be at least 64, and a multiple of 16. Based on:
* crc32_avx512_simd_(): compute the crc32 of the buffer, where the buffer
* length must be at least 256, and a multiple of 64. Based on:
*
* "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
* V. Gopal, E. Ozturk, et al., 2009, http://intel.ly/2ySEwL0
*/
#include <emmintrin.h>
#include <smmintrin.h>
#include <wmmintrin.h>
#include <immintrin.h>
uint32_t ZLIB_INTERNAL crc32_avx512_simd_( /* AVX512+PCLMUL */
const unsigned char *buf,
z_size_t len,
uint32_t crc)
{
/*
* Definitions of the bit-reflected domain constants k1,k2,k3,k4
* are similar to those given at the end of the paper, and remaining
* constants and CRC32+Barrett polynomials remain unchanged.
*
* Replace the index of x from 128 to 512. As follows:
* k1 = ( x ^ ( 512 * 4 + 32 ) mod P(x) << 32 )' << 1 = 0x011542778a
* k2 = ( x ^ ( 512 * 4 - 32 ) mod P(x) << 32 )' << 1 = 0x01322d1430
* k3 = ( x ^ ( 512 + 32 ) mod P(x) << 32 )' << 1 = 0x0154442bd4
* k4 = ( x ^ ( 512 - 32 ) mod P(x) << 32 )' << 1 = 0x01c6e41596
*/
static const uint64_t zalign(64) k1k2[] = { 0x011542778a, 0x01322d1430,
0x011542778a, 0x01322d1430,
0x011542778a, 0x01322d1430,
0x011542778a, 0x01322d1430 };
static const uint64_t zalign(64) k3k4[] = { 0x0154442bd4, 0x01c6e41596,
0x0154442bd4, 0x01c6e41596,
0x0154442bd4, 0x01c6e41596,
0x0154442bd4, 0x01c6e41596 };
static const uint64_t zalign(16) k5k6[] = { 0x01751997d0, 0x00ccaa009e };
static const uint64_t zalign(16) k7k8[] = { 0x0163cd6124, 0x0000000000 };
static const uint64_t zalign(16) poly[] = { 0x01db710641, 0x01f7011641 };
__m512i x0, x1, x2, x3, x4, x5, x6, x7, x8, y5, y6, y7, y8;
__m128i a0, a1, a2, a3;
/*
* There's at least one block of 256.
*/
x1 = _mm512_loadu_si512((__m512i *)(buf + 0x00));
x2 = _mm512_loadu_si512((__m512i *)(buf + 0x40));
x3 = _mm512_loadu_si512((__m512i *)(buf + 0x80));
x4 = _mm512_loadu_si512((__m512i *)(buf + 0xC0));
x1 = _mm512_xor_si512(x1, _mm512_castsi128_si512(_mm_cvtsi32_si128(crc)));
x0 = _mm512_load_si512((__m512i *)k1k2);
buf += 256;
len -= 256;
/*
* Parallel fold blocks of 256, if any.
*/
while (len >= 256)
{
x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
x6 = _mm512_clmulepi64_epi128(x2, x0, 0x00);
x7 = _mm512_clmulepi64_epi128(x3, x0, 0x00);
x8 = _mm512_clmulepi64_epi128(x4, x0, 0x00);
x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
x2 = _mm512_clmulepi64_epi128(x2, x0, 0x11);
x3 = _mm512_clmulepi64_epi128(x3, x0, 0x11);
x4 = _mm512_clmulepi64_epi128(x4, x0, 0x11);
y5 = _mm512_loadu_si512((__m512i *)(buf + 0x00));
y6 = _mm512_loadu_si512((__m512i *)(buf + 0x40));
y7 = _mm512_loadu_si512((__m512i *)(buf + 0x80));
y8 = _mm512_loadu_si512((__m512i *)(buf + 0xC0));
x1 = _mm512_xor_si512(x1, x5);
x2 = _mm512_xor_si512(x2, x6);
x3 = _mm512_xor_si512(x3, x7);
x4 = _mm512_xor_si512(x4, x8);
x1 = _mm512_xor_si512(x1, y5);
x2 = _mm512_xor_si512(x2, y6);
x3 = _mm512_xor_si512(x3, y7);
x4 = _mm512_xor_si512(x4, y8);
buf += 256;
len -= 256;
}
/*
* Fold into 512-bits.
*/
x0 = _mm512_load_si512((__m512i *)k3k4);
x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
x1 = _mm512_xor_si512(x1, x2);
x1 = _mm512_xor_si512(x1, x5);
x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
x1 = _mm512_xor_si512(x1, x3);
x1 = _mm512_xor_si512(x1, x5);
x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
x1 = _mm512_xor_si512(x1, x4);
x1 = _mm512_xor_si512(x1, x5);
/*
* Single fold blocks of 64, if any.
*/
while (len >= 64)
{
x2 = _mm512_loadu_si512((__m512i *)buf);
x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
x1 = _mm512_xor_si512(x1, x2);
x1 = _mm512_xor_si512(x1, x5);
buf += 64;
len -= 64;
}
/*
* Fold 512-bits to 384-bits.
*/
a0 = _mm_load_si128((__m128i *)k5k6);
a1 = _mm512_extracti32x4_epi32(x1, 0);
a2 = _mm512_extracti32x4_epi32(x1, 1);
a3 = _mm_clmulepi64_si128(a1, a0, 0x00);
a1 = _mm_clmulepi64_si128(a1, a0, 0x11);
a1 = _mm_xor_si128(a1, a3);
a1 = _mm_xor_si128(a1, a2);
/*
* Fold 384-bits to 256-bits.
*/
a2 = _mm512_extracti32x4_epi32(x1, 2);
a3 = _mm_clmulepi64_si128(a1, a0, 0x00);
a1 = _mm_clmulepi64_si128(a1, a0, 0x11);
a1 = _mm_xor_si128(a1, a3);
a1 = _mm_xor_si128(a1, a2);
/*
* Fold 256-bits to 128-bits.
*/
a2 = _mm512_extracti32x4_epi32(x1, 3);
a3 = _mm_clmulepi64_si128(a1, a0, 0x00);
a1 = _mm_clmulepi64_si128(a1, a0, 0x11);
a1 = _mm_xor_si128(a1, a3);
a1 = _mm_xor_si128(a1, a2);
/*
* Fold 128-bits to 64-bits.
*/
a2 = _mm_clmulepi64_si128(a1, a0, 0x10);
a3 = _mm_setr_epi32(~0, 0, ~0, 0);
a1 = _mm_srli_si128(a1, 8);
a1 = _mm_xor_si128(a1, a2);
a0 = _mm_loadl_epi64((__m128i*)k7k8);
a2 = _mm_srli_si128(a1, 4);
a1 = _mm_and_si128(a1, a3);
a1 = _mm_clmulepi64_si128(a1, a0, 0x00);
a1 = _mm_xor_si128(a1, a2);
/*
* Barret reduce to 32-bits.
*/
a0 = _mm_load_si128((__m128i*)poly);
a2 = _mm_and_si128(a1, a3);
a2 = _mm_clmulepi64_si128(a2, a0, 0x10);
a2 = _mm_and_si128(a2, a3);
a2 = _mm_clmulepi64_si128(a2, a0, 0x00);
a1 = _mm_xor_si128(a1, a2);
/*
* Return the crc32.
*/
return _mm_extract_epi32(a1, 1);
}
#elif defined(CRC32_SIMD_SSE42_PCLMUL)
/*
* crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer
* length must be at least 64, and a multiple of 16.
*/
#include <emmintrin.h>
#include <smmintrin.h>
#include <wmmintrin.h>

View File

@@ -19,12 +19,18 @@ uint32_t ZLIB_INTERNAL crc32_sse42_simd_(const unsigned char* buf,
z_size_t len,
uint32_t crc);
uint32_t ZLIB_INTERNAL crc32_avx512_simd_(const unsigned char* buf,
z_size_t len,
uint32_t crc);
/*
* crc32_sse42_simd_ buffer size constraints: see the use in zlib/crc32.c
* for computing the crc32 of an arbitrary length buffer.
*/
#define Z_CRC32_SSE42_MINIMUM_LENGTH 64
#define Z_CRC32_SSE42_CHUNKSIZE_MASK 15
#define Z_CRC32_AVX512_MINIMUM_LENGTH 256
#define Z_CRC32_AVX512_CHUNKSIZE_MASK 63
/*
* CRC32 checksums using ARMv8-a crypto instructions.

View File

@@ -435,7 +435,10 @@ unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s)
unsigned crc;
__m128i x_tmp0, x_tmp1, x_tmp2, crc_fold;
CRC_LOAD(s)
__m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);
__m128i xmm_crc1 = _mm_loadu_si128((__m128i *)s->crc0 + 1);
__m128i xmm_crc2 = _mm_loadu_si128((__m128i *)s->crc0 + 2);
__m128i xmm_crc3 = _mm_loadu_si128((__m128i *)s->crc0 + 3);
/*
* k1
@@ -491,7 +494,6 @@ unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s)
crc = _mm_extract_epi32(xmm_crc3, 2);
return ~crc;
CRC_SAVE(s)
}
#endif /* CRC32_SIMD_SSE42_PCLMUL */

5
deps/zlib/deflate.c vendored
View File

@@ -65,7 +65,7 @@
#endif
const char deflate_copyright[] =
" deflate 1.2.13 Copyright 1995-2022 Jean-loup Gailly and Mark Adler ";
" deflate 1.2.13.1 Copyright 1995-2022 Jean-loup Gailly and Mark Adler ";
/*
If you use the zlib library in a product, an acknowledgment is welcome
in the documentation of your product. If for some reason you cannot
@@ -774,7 +774,8 @@ uLong ZEXPORT deflateBound(strm, sourceLen)
/* if not default parameters, return one of the conservative bounds */
if (s->w_bits != 15 || s->hash_bits != 8 + 7)
return (s->w_bits <= s->hash_bits ? fixedlen : storelen) + wraplen;
return (s->w_bits <= s->hash_bits && s->level ? fixedlen : storelen) +
wraplen;
/* default settings: return tight bound for that case -- ~0.03% overhead
plus a small constant */

View File

@@ -4,6 +4,10 @@
import("//build_overrides/build.gni")
if (build_with_chromium && is_ios) {
import("//build/config/ios/bundle_data_from_filelist.gni")
}
if (build_with_chromium) {
static_library("zip") {
sources = [
@@ -35,6 +39,13 @@ if (build_with_chromium) {
]
public_deps = [ ":compression_utils_portable" ]
}
if (is_ios) {
bundle_data_from_filelist("zlib_pak_bundle_data") {
testonly = true
filelist_name = "test_data.filelist"
}
}
}
# This allows other users of Chromium's zlib library, but don't use Chromium's

32
deps/zlib/google/test_data.filelist vendored Normal file
View File

@@ -0,0 +1,32 @@
# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# NOTE: this file is generated by build/ios/update_bundle_filelist.py
# If it requires updating, you should get a presubmit error with
# instructions on how to regenerate. Otherwise, do not edit.
test/data/Different Encryptions.zip
test/data/Empty Dir Same Name As File.zip
test/data/Mixed Paths.zip
test/data/Parent Dir Same Name As File.zip
test/data/README.md
test/data/Repeated Dir Name.zip
test/data/Repeated File Name With Different Cases.zip
test/data/Repeated File Name.zip
test/data/SJIS Bug 846195.zip
test/data/Windows Special Names.zip
test/data/Wrong CRC.zip
test/data/create_test_zip.sh
test/data/empty.zip
test/data/evil.zip
test/data/evil_via_absolute_file_name.zip
test/data/evil_via_invalid_utf8.zip
test/data/test.zip
test/data/test/foo.txt
test/data/test/foo/bar.txt
test/data/test/foo/bar/.hidden
test/data/test/foo/bar/baz.txt
test/data/test/foo/bar/quux.txt
test/data/test_encrypted.zip
test/data/test_mismatch_size.zip
test/data/test_nocompress.zip
test/data/test_posix_permissions.zip

8
deps/zlib/google/test_data.globlist vendored Normal file
View File

@@ -0,0 +1,8 @@
# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
# See build/ios/update_bundle_filelist.py for details on how .globlist
# files are used to update their .filelist counterparts.
test/data/**
test/data/test/foo/bar/.hidden

View File

@@ -611,7 +611,7 @@ TEST_F(ZipTest, UnzipWindowsSpecialNames) {
"NUL .txt",
"NUL .txt",
"NUL ..txt",
#ifndef OS_MAC
#ifndef OS_APPLE
"Nul.txt",
#endif
"nul.very long extension",
@@ -669,7 +669,7 @@ TEST_F(ZipTest, UnzipWindowsSpecialNames) {
}
TEST_F(ZipTest, UnzipDifferentCases) {
#if defined(OS_WIN) || defined(OS_MAC)
#if defined(OS_WIN) || defined(OS_APPLE)
// Only the first file (with mixed case) is extracted.
EXPECT_FALSE(zip::Unzip(GetDataDirectory().AppendASCII(
"Repeated File Name With Different Cases.zip"),
@@ -711,7 +711,7 @@ TEST_F(ZipTest, UnzipDifferentCasesContinueOnError) {
std::string contents;
#if defined(OS_WIN) || defined(OS_MAC)
#if defined(OS_WIN) || defined(OS_APPLE)
// Only the first file (with mixed case) has been extracted.
EXPECT_THAT(
GetRelativePaths(test_dir_, base::FileEnumerator::FileType::FILES),
@@ -782,7 +782,7 @@ TEST_F(ZipTest, UnzipMixedPaths) {
"Space→ ", //
"c/NUL", // Disappears on Windows
"nul.very long extension", // Disappears on Windows
#ifndef OS_MAC
#ifndef OS_APPLE
"CASE", // Conflicts with "Case"
"case", // Conflicts with "Case"
#endif

5
deps/zlib/gzlib.c vendored
View File

@@ -7,11 +7,14 @@
#if defined(_WIN32) && !defined(__BORLANDC__)
# define LSEEK _lseeki64
# define OPEN open
#else
#if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0
# define LSEEK lseek64
# define OPEN open64
#else
# define LSEEK lseek
# define OPEN open
#endif
#endif
@@ -244,7 +247,7 @@ local gzFile gz_open(path, fd, mode)
#ifdef WIDECHAR
fd == -2 ? _wopen(path, oflag, 0666) :
#endif
open((const char *)path, oflag, 0666));
OPEN((const char *)path, oflag, 0666));
if (state->fd == -1) {
free(state->path);
free(state);

2
deps/zlib/gzwrite.c vendored
View File

@@ -609,7 +609,7 @@ int ZEXPORT gzsetparams(file, level, strategy)
strm = &(state->strm);
/* check that we're writing and that there's no error */
if (state->mode != GZ_WRITE || state->err != Z_OK)
if (state->mode != GZ_WRITE || state->err != Z_OK || state->direct)
return Z_STREAM_ERROR;
/* if no change is requested, then do nothing */

2
deps/zlib/inflate.c vendored
View File

@@ -256,6 +256,8 @@ int value;
struct inflate_state FAR *state;
if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
if (bits == 0)
return Z_OK;
state = (struct inflate_state FAR *)strm->state;
if (bits < 0) {
state->hold = 0;

View File

@@ -9,7 +9,7 @@
#define MAXBITS 15
const char inflate_copyright[] =
" inflate 1.2.13 Copyright 1995-2022 Mark Adler ";
" inflate 1.2.13.1 Copyright 1995-2022 Mark Adler ";
/*
If you use the zlib library in a product, an acknowledgment is welcome
in the documentation of your product. If for some reason you cannot
@@ -62,7 +62,7 @@ unsigned short FAR *work;
35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
static const unsigned short lext[31] = { /* Length codes 257..285 extra */
16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 194, 65};
19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 76};
static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,

357
deps/zlib/patches/0011-avx512.patch vendored Normal file
View File

@@ -0,0 +1,357 @@
From 87fc8e3e38323cfdabf8da3927488e3e57073b02 Mon Sep 17 00:00:00 2001
From: Jia Liu <jia3.liu@intel.com>
Date: Thu, 30 Mar 2023 11:13:16 +0800
Subject: [PATCH] Enabled AVX512 for CRC32
Enabled AVX512 for CRC32 that provide best of known performance
beyond current SSE SIMD optimization. It enables multiple folding
operations and AVX512 new instructions, providing ~3.5X CRC32
performance and ~3.7% gain on Zlib_bench gzip performance.
---
CMakeLists.txt | 8 +-
cpu_features.c | 9 +++
cpu_features.h | 1 +
crc32.c | 14 +++-
crc32_simd.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++++-
crc32_simd.h | 6 ++
6 files changed, 230 insertions(+), 6 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f06e193..d45b902 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,6 +22,7 @@ check_include_file(stdint.h HAVE_STDINT_H)
check_include_file(stddef.h HAVE_STDDEF_H)
option(ENABLE_SIMD_OPTIMIZATIONS "Enable all SIMD optimizations" OFF)
+option(ENABLE_SIMD_AVX512 "Enable SIMD AXV512 optimizations" OFF)
# TODO(cavalcantii): add support for other OSes (e.g. Android, fuchsia, osx)
# and architectures (e.g. Arm).
@@ -30,8 +31,13 @@ if (ENABLE_SIMD_OPTIMIZATIONS)
add_definitions(-DADLER32_SIMD_SSSE3)
add_definitions(-DINFLATE_CHUNK_READ_64LE)
add_definitions(-DCRC32_SIMD_SSE42_PCLMUL)
+ if (ENABLE_SIMD_AVX512)
+ add_definitions(-DCRC32_SIMD_AVX512_PCLMUL)
+ add_compile_options(-mvpclmulqdq -msse2 -mavx512f -mpclmul)
+ else()
+ add_compile_options(-msse4.2 -mpclmul)
+ endif()
add_definitions(-DDEFLATE_SLIDE_HASH_SSE2)
- add_compile_options(-msse4.2 -mpclmul)
# Required by CPU features detection code.
add_definitions(-DX86_NOT_WINDOWS)
# Apparently some environments (e.g. CentOS) require to explicitly link
diff --git a/cpu_features.c b/cpu_features.c
index 877d5f2..ac6ee88 100644
--- a/cpu_features.c
+++ b/cpu_features.c
@@ -31,6 +31,7 @@ int ZLIB_INTERNAL arm_cpu_enable_pmull = 0;
int ZLIB_INTERNAL x86_cpu_enable_sse2 = 0;
int ZLIB_INTERNAL x86_cpu_enable_ssse3 = 0;
int ZLIB_INTERNAL x86_cpu_enable_simd = 0;
+int ZLIB_INTERNAL x86_cpu_enable_avx512 = 0;
#ifndef CPU_NO_SIMD
@@ -138,6 +139,10 @@ static void _cpu_check_features(void)
/* On x86 we simply use a instruction to check the CPU features.
* (i.e. CPUID).
*/
+#ifdef CRC32_SIMD_AVX512_PCLMUL
+#include <immintrin.h>
+#include <xsaveintrin.h>
+#endif
static void _cpu_check_features(void)
{
int x86_cpu_has_sse2;
@@ -164,6 +169,10 @@ static void _cpu_check_features(void)
x86_cpu_enable_simd = x86_cpu_has_sse2 &&
x86_cpu_has_sse42 &&
x86_cpu_has_pclmulqdq;
+
+#ifdef CRC32_SIMD_AVX512_PCLMUL
+ x86_cpu_enable_avx512 = _xgetbv(0) & 0x00000040;
+#endif
}
#endif
#endif
diff --git a/cpu_features.h b/cpu_features.h
index 279246c..aed3e83 100644
--- a/cpu_features.h
+++ b/cpu_features.h
@@ -14,5 +14,6 @@ extern int arm_cpu_enable_pmull;
extern int x86_cpu_enable_sse2;
extern int x86_cpu_enable_ssse3;
extern int x86_cpu_enable_simd;
+extern int x86_cpu_enable_avx512;
void cpu_check_features(void);
diff --git a/crc32.c b/crc32.c
index 4486098..acb6972 100644
--- a/crc32.c
+++ b/crc32.c
@@ -773,7 +773,19 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
}
#endif
-#if defined(CRC32_SIMD_SSE42_PCLMUL)
+#if defined(CRC32_SIMD_AVX512_PCLMUL)
+ if (x86_cpu_enable_avx512 && len >= Z_CRC32_AVX512_MINIMUM_LENGTH) {
+ /* crc32 64-byte chunks */
+ z_size_t chunk_size = len & ~Z_CRC32_AVX512_CHUNKSIZE_MASK;
+ crc = ~crc32_avx512_simd_(buf, chunk_size, ~(uint32_t)crc);
+ /* check remaining data */
+ len -= chunk_size;
+ if (!len)
+ return crc;
+ /* Fall into the default crc32 for the remaining data. */
+ buf += chunk_size;
+ }
+#elif defined(CRC32_SIMD_SSE42_PCLMUL)
if (x86_cpu_enable_simd && len >= Z_CRC32_SSE42_MINIMUM_LENGTH) {
/* crc32 16-byte chunks */
z_size_t chunk_size = len & ~Z_CRC32_SSE42_CHUNKSIZE_MASK;
diff --git a/crc32_simd.c b/crc32_simd.c
index d80beba..7428270 100644
--- a/crc32_simd.c
+++ b/crc32_simd.c
@@ -6,17 +6,207 @@
*/
#include "crc32_simd.h"
-
-#if defined(CRC32_SIMD_SSE42_PCLMUL)
+#if defined(CRC32_SIMD_AVX512_PCLMUL)
/*
- * crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer
- * length must be at least 64, and a multiple of 16. Based on:
+ * crc32_avx512_simd_(): compute the crc32 of the buffer, where the buffer
+ * length must be at least 256, and a multiple of 64. Based on:
*
* "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
* V. Gopal, E. Ozturk, et al., 2009, http://intel.ly/2ySEwL0
*/
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include <wmmintrin.h>
+#include <immintrin.h>
+
+uint32_t ZLIB_INTERNAL crc32_avx512_simd_( /* AVX512+PCLMUL */
+ const unsigned char *buf,
+ z_size_t len,
+ uint32_t crc)
+{
+ /*
+ * Definitions of the bit-reflected domain constants k1,k2,k3,k4
+ * are similar to those given at the end of the paper, and remaining
+ * constants and CRC32+Barrett polynomials remain unchanged.
+ *
+ * Replace the index of x from 128 to 512. As follows:
+ * k1 = ( x ^ ( 512 * 4 + 32 ) mod P(x) << 32 )' << 1 = 0x011542778a
+ * k2 = ( x ^ ( 512 * 4 - 32 ) mod P(x) << 32 )' << 1 = 0x01322d1430
+ * k3 = ( x ^ ( 512 + 32 ) mod P(x) << 32 )' << 1 = 0x0154442bd4
+ * k4 = ( x ^ ( 512 - 32 ) mod P(x) << 32 )' << 1 = 0x01c6e41596
+ */
+ static const uint64_t zalign(64) k1k2[] = { 0x011542778a, 0x01322d1430,
+ 0x011542778a, 0x01322d1430,
+ 0x011542778a, 0x01322d1430,
+ 0x011542778a, 0x01322d1430 };
+ static const uint64_t zalign(64) k3k4[] = { 0x0154442bd4, 0x01c6e41596,
+ 0x0154442bd4, 0x01c6e41596,
+ 0x0154442bd4, 0x01c6e41596,
+ 0x0154442bd4, 0x01c6e41596 };
+ static const uint64_t zalign(16) k5k6[] = { 0x01751997d0, 0x00ccaa009e };
+ static const uint64_t zalign(16) k7k8[] = { 0x0163cd6124, 0x0000000000 };
+ static const uint64_t zalign(16) poly[] = { 0x01db710641, 0x01f7011641 };
+ __m512i x0, x1, x2, x3, x4, x5, x6, x7, x8, y5, y6, y7, y8;
+ __m128i a0, a1, a2, a3;
+
+ /*
+ * There's at least one block of 256.
+ */
+ x1 = _mm512_loadu_si512((__m512i *)(buf + 0x00));
+ x2 = _mm512_loadu_si512((__m512i *)(buf + 0x40));
+ x3 = _mm512_loadu_si512((__m512i *)(buf + 0x80));
+ x4 = _mm512_loadu_si512((__m512i *)(buf + 0xC0));
+
+ x1 = _mm512_xor_si512(x1, _mm512_castsi128_si512(_mm_cvtsi32_si128(crc)));
+
+ x0 = _mm512_load_si512((__m512i *)k1k2);
+
+ buf += 256;
+ len -= 256;
+
+ /*
+ * Parallel fold blocks of 256, if any.
+ */
+ while (len >= 256)
+ {
+ x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
+ x6 = _mm512_clmulepi64_epi128(x2, x0, 0x00);
+ x7 = _mm512_clmulepi64_epi128(x3, x0, 0x00);
+ x8 = _mm512_clmulepi64_epi128(x4, x0, 0x00);
+
+
+ x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
+ x2 = _mm512_clmulepi64_epi128(x2, x0, 0x11);
+ x3 = _mm512_clmulepi64_epi128(x3, x0, 0x11);
+ x4 = _mm512_clmulepi64_epi128(x4, x0, 0x11);
+
+ y5 = _mm512_loadu_si512((__m512i *)(buf + 0x00));
+ y6 = _mm512_loadu_si512((__m512i *)(buf + 0x40));
+ y7 = _mm512_loadu_si512((__m512i *)(buf + 0x80));
+ y8 = _mm512_loadu_si512((__m512i *)(buf + 0xC0));
+
+ x1 = _mm512_xor_si512(x1, x5);
+ x2 = _mm512_xor_si512(x2, x6);
+ x3 = _mm512_xor_si512(x3, x7);
+ x4 = _mm512_xor_si512(x4, x8);
+
+ x1 = _mm512_xor_si512(x1, y5);
+ x2 = _mm512_xor_si512(x2, y6);
+ x3 = _mm512_xor_si512(x3, y7);
+ x4 = _mm512_xor_si512(x4, y8);
+
+ buf += 256;
+ len -= 256;
+ }
+
+ /*
+ * Fold into 512-bits.
+ */
+ x0 = _mm512_load_si512((__m512i *)k3k4);
+
+ x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
+ x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
+ x1 = _mm512_xor_si512(x1, x2);
+ x1 = _mm512_xor_si512(x1, x5);
+
+ x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
+ x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
+ x1 = _mm512_xor_si512(x1, x3);
+ x1 = _mm512_xor_si512(x1, x5);
+
+ x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
+ x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
+ x1 = _mm512_xor_si512(x1, x4);
+ x1 = _mm512_xor_si512(x1, x5);
+
+ /*
+ * Single fold blocks of 64, if any.
+ */
+ while (len >= 64)
+ {
+ x2 = _mm512_loadu_si512((__m512i *)buf);
+
+ x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
+ x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
+ x1 = _mm512_xor_si512(x1, x2);
+ x1 = _mm512_xor_si512(x1, x5);
+
+ buf += 64;
+ len -= 64;
+ }
+
+ /*
+ * Fold 512-bits to 384-bits.
+ */
+ a0 = _mm_load_si128((__m128i *)k5k6);
+
+ a1 = _mm512_extracti32x4_epi32(x1, 0);
+ a2 = _mm512_extracti32x4_epi32(x1, 1);
+
+ a3 = _mm_clmulepi64_si128(a1, a0, 0x00);
+ a1 = _mm_clmulepi64_si128(a1, a0, 0x11);
+
+ a1 = _mm_xor_si128(a1, a3);
+ a1 = _mm_xor_si128(a1, a2);
+
+ /*
+ * Fold 384-bits to 256-bits.
+ */
+ a2 = _mm512_extracti32x4_epi32(x1, 2);
+ a3 = _mm_clmulepi64_si128(a1, a0, 0x00);
+ a1 = _mm_clmulepi64_si128(a1, a0, 0x11);
+ a1 = _mm_xor_si128(a1, a3);
+ a1 = _mm_xor_si128(a1, a2);
+
+ /*
+ * Fold 256-bits to 128-bits.
+ */
+ a2 = _mm512_extracti32x4_epi32(x1, 3);
+ a3 = _mm_clmulepi64_si128(a1, a0, 0x00);
+ a1 = _mm_clmulepi64_si128(a1, a0, 0x11);
+ a1 = _mm_xor_si128(a1, a3);
+ a1 = _mm_xor_si128(a1, a2);
+
+ /*
+ * Fold 128-bits to 64-bits.
+ */
+ a2 = _mm_clmulepi64_si128(a1, a0, 0x10);
+ a3 = _mm_setr_epi32(~0, 0, ~0, 0);
+ a1 = _mm_srli_si128(a1, 8);
+ a1 = _mm_xor_si128(a1, a2);
+
+ a0 = _mm_loadl_epi64((__m128i*)k7k8);
+ a2 = _mm_srli_si128(a1, 4);
+ a1 = _mm_and_si128(a1, a3);
+ a1 = _mm_clmulepi64_si128(a1, a0, 0x00);
+ a1 = _mm_xor_si128(a1, a2);
+
+ /*
+ * Barret reduce to 32-bits.
+ */
+ a0 = _mm_load_si128((__m128i*)poly);
+
+ a2 = _mm_and_si128(a1, a3);
+ a2 = _mm_clmulepi64_si128(a2, a0, 0x10);
+ a2 = _mm_and_si128(a2, a3);
+ a2 = _mm_clmulepi64_si128(a2, a0, 0x00);
+ a1 = _mm_xor_si128(a1, a2);
+
+ /*
+ * Return the crc32.
+ */
+ return _mm_extract_epi32(a1, 1);
+}
+
+#elif defined(CRC32_SIMD_SSE42_PCLMUL)
+
+/*
+ * crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer
+ * length must be at least 64, and a multiple of 16.
+ */
+
#include <emmintrin.h>
#include <smmintrin.h>
#include <wmmintrin.h>
diff --git a/crc32_simd.h b/crc32_simd.h
index c0346dc..8462464 100644
--- a/crc32_simd.h
+++ b/crc32_simd.h
@@ -19,12 +19,18 @@ uint32_t ZLIB_INTERNAL crc32_sse42_simd_(const unsigned char* buf,
z_size_t len,
uint32_t crc);
+uint32_t ZLIB_INTERNAL crc32_avx512_simd_(const unsigned char* buf,
+ z_size_t len,
+ uint32_t crc);
+
/*
* crc32_sse42_simd_ buffer size constraints: see the use in zlib/crc32.c
* for computing the crc32 of an arbitrary length buffer.
*/
#define Z_CRC32_SSE42_MINIMUM_LENGTH 64
#define Z_CRC32_SSE42_CHUNKSIZE_MASK 15
+#define Z_CRC32_AVX512_MINIMUM_LENGTH 256
+#define Z_CRC32_AVX512_CHUNKSIZE_MASK 63
/*
* CRC32 checksums using ARMv8-a crypto instructions.
--
2.34.1

40
deps/zlib/patches/0012-lfs-open64.patch vendored Normal file
View File

@@ -0,0 +1,40 @@
From 6f21cb4b209d750486ede5472fdf7e35cf5ac3aa Mon Sep 17 00:00:00 2001
From: Ramin Halavati <rhalavati@chromium.org>
Date: Wed, 17 May 2023 15:21:43 +0200
Subject: [PATCH] Add open64 for Large File System support to gzlib.
---
third_party/zlib/gzlib.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/third_party/zlib/gzlib.c b/third_party/zlib/gzlib.c
index 55da46a453fd1..bbdb797e8079d 100644
--- a/third_party/zlib/gzlib.c
+++ b/third_party/zlib/gzlib.c
@@ -7,11 +7,14 @@
#if defined(_WIN32) && !defined(__BORLANDC__)
# define LSEEK _lseeki64
+# define OPEN open
#else
#if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0
# define LSEEK lseek64
+# define OPEN open64
#else
# define LSEEK lseek
+# define OPEN open
#endif
#endif
@@ -244,7 +247,7 @@ local gzFile gz_open(path, fd, mode)
#ifdef WIDECHAR
fd == -2 ? _wopen(path, oflag, 0666) :
#endif
- open((const char *)path, oflag, 0666));
+ OPEN((const char *)path, oflag, 0666));
if (state->fd == -1) {
free(state->path);
free(state);
--
2.40.1.606.ga4b1b128d6-goog

6
deps/zlib/zconf.h vendored
View File

@@ -253,7 +253,11 @@
#endif
#ifdef Z_SOLO
typedef unsigned long z_size_t;
# ifdef _WIN64
typedef unsigned long long z_size_t;
# else
typedef unsigned long z_size_t;
# endif
#else
# define z_longlong long long
# if defined(NO_SIZE_T)

View File

@@ -243,7 +243,11 @@
#endif
#ifdef Z_SOLO
typedef unsigned long z_size_t;
# ifdef _WIN64
typedef unsigned long long z_size_t;
# else
typedef unsigned long z_size_t;
# endif
#else
# define z_longlong long long
# if defined(NO_SIZE_T)

551
deps/zlib/zconf.h.in vendored Normal file
View File

@@ -0,0 +1,551 @@
/* zconf.h -- configuration of the zlib compression library
* Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#ifndef ZCONF_H
#define ZCONF_H
/*
* If you *really* need a unique prefix for all types and library functions,
* compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
* Even better than compiling with -DZ_PREFIX would be to use configure to set
* this permanently in zconf.h using "./configure --zprefix".
*/
#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */
# define Z_PREFIX_SET
/* all linked symbols and init macros */
# define _dist_code z__dist_code
# define _length_code z__length_code
# define _tr_align z__tr_align
# define _tr_flush_bits z__tr_flush_bits
# define _tr_flush_block z__tr_flush_block
# define _tr_init z__tr_init
# define _tr_stored_block z__tr_stored_block
# define _tr_tally z__tr_tally
# define adler32 z_adler32
# define adler32_combine z_adler32_combine
# define adler32_combine64 z_adler32_combine64
# define adler32_z z_adler32_z
# ifndef Z_SOLO
# define compress z_compress
# define compress2 z_compress2
# define compressBound z_compressBound
# endif
# define crc32 z_crc32
# define crc32_combine z_crc32_combine
# define crc32_combine64 z_crc32_combine64
# define crc32_combine_gen z_crc32_combine_gen
# define crc32_combine_gen64 z_crc32_combine_gen64
# define crc32_combine_op z_crc32_combine_op
# define crc32_z z_crc32_z
# define deflate z_deflate
# define deflateBound z_deflateBound
# define deflateCopy z_deflateCopy
# define deflateEnd z_deflateEnd
# define deflateGetDictionary z_deflateGetDictionary
# define deflateInit z_deflateInit
# define deflateInit2 z_deflateInit2
# define deflateInit2_ z_deflateInit2_
# define deflateInit_ z_deflateInit_
# define deflateParams z_deflateParams
# define deflatePending z_deflatePending
# define deflatePrime z_deflatePrime
# define deflateReset z_deflateReset
# define deflateResetKeep z_deflateResetKeep
# define deflateSetDictionary z_deflateSetDictionary
# define deflateSetHeader z_deflateSetHeader
# define deflateTune z_deflateTune
# define deflate_copyright z_deflate_copyright
# define get_crc_table z_get_crc_table
# ifndef Z_SOLO
# define gz_error z_gz_error
# define gz_intmax z_gz_intmax
# define gz_strwinerror z_gz_strwinerror
# define gzbuffer z_gzbuffer
# define gzclearerr z_gzclearerr
# define gzclose z_gzclose
# define gzclose_r z_gzclose_r
# define gzclose_w z_gzclose_w
# define gzdirect z_gzdirect
# define gzdopen z_gzdopen
# define gzeof z_gzeof
# define gzerror z_gzerror
# define gzflush z_gzflush
# define gzfread z_gzfread
# define gzfwrite z_gzfwrite
# define gzgetc z_gzgetc
# define gzgetc_ z_gzgetc_
# define gzgets z_gzgets
# define gzoffset z_gzoffset
# define gzoffset64 z_gzoffset64
# define gzopen z_gzopen
# define gzopen64 z_gzopen64
# ifdef _WIN32
# define gzopen_w z_gzopen_w
# endif
# define gzprintf z_gzprintf
# define gzputc z_gzputc
# define gzputs z_gzputs
# define gzread z_gzread
# define gzrewind z_gzrewind
# define gzseek z_gzseek
# define gzseek64 z_gzseek64
# define gzsetparams z_gzsetparams
# define gztell z_gztell
# define gztell64 z_gztell64
# define gzungetc z_gzungetc
# define gzvprintf z_gzvprintf
# define gzwrite z_gzwrite
# endif
# define inflate z_inflate
# define inflateBack z_inflateBack
# define inflateBackEnd z_inflateBackEnd
# define inflateBackInit z_inflateBackInit
# define inflateBackInit_ z_inflateBackInit_
# define inflateCodesUsed z_inflateCodesUsed
# define inflateCopy z_inflateCopy
# define inflateEnd z_inflateEnd
# define inflateGetDictionary z_inflateGetDictionary
# define inflateGetHeader z_inflateGetHeader
# define inflateInit z_inflateInit
# define inflateInit2 z_inflateInit2
# define inflateInit2_ z_inflateInit2_
# define inflateInit_ z_inflateInit_
# define inflateMark z_inflateMark
# define inflatePrime z_inflatePrime
# define inflateReset z_inflateReset
# define inflateReset2 z_inflateReset2
# define inflateResetKeep z_inflateResetKeep
# define inflateSetDictionary z_inflateSetDictionary
# define inflateSync z_inflateSync
# define inflateSyncPoint z_inflateSyncPoint
# define inflateUndermine z_inflateUndermine
# define inflateValidate z_inflateValidate
# define inflate_copyright z_inflate_copyright
# define inflate_fast z_inflate_fast
# define inflate_table z_inflate_table
# ifndef Z_SOLO
# define uncompress z_uncompress
# define uncompress2 z_uncompress2
# endif
# define zError z_zError
# ifndef Z_SOLO
# define zcalloc z_zcalloc
# define zcfree z_zcfree
# endif
# define zlibCompileFlags z_zlibCompileFlags
# define zlibVersion z_zlibVersion
/* all zlib typedefs in zlib.h and zconf.h */
# define Byte z_Byte
# define Bytef z_Bytef
# define alloc_func z_alloc_func
# define charf z_charf
# define free_func z_free_func
# ifndef Z_SOLO
# define gzFile z_gzFile
# endif
# define gz_header z_gz_header
# define gz_headerp z_gz_headerp
# define in_func z_in_func
# define intf z_intf
# define out_func z_out_func
# define uInt z_uInt
# define uIntf z_uIntf
# define uLong z_uLong
# define uLongf z_uLongf
# define voidp z_voidp
# define voidpc z_voidpc
# define voidpf z_voidpf
/* all zlib structs in zlib.h and zconf.h */
# define gz_header_s z_gz_header_s
# define internal_state z_internal_state
#endif
#if defined(__MSDOS__) && !defined(MSDOS)
# define MSDOS
#endif
#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2)
# define OS2
#endif
#if defined(_WINDOWS) && !defined(WINDOWS)
# define WINDOWS
#endif
#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__)
# ifndef WIN32
# define WIN32
# endif
#endif
#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32)
# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__)
# ifndef SYS16BIT
# define SYS16BIT
# endif
# endif
#endif
/*
* Compile with -DMAXSEG_64K if the alloc function cannot allocate more
* than 64k bytes at a time (needed on systems with 16-bit int).
*/
#ifdef SYS16BIT
# define MAXSEG_64K
#endif
#ifdef MSDOS
# define UNALIGNED_OK
#endif
#ifdef __STDC_VERSION__
# ifndef STDC
# define STDC
# endif
# if __STDC_VERSION__ >= 199901L
# ifndef STDC99
# define STDC99
# endif
# endif
#endif
#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus))
# define STDC
#endif
#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__))
# define STDC
#endif
#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32))
# define STDC
#endif
#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__))
# define STDC
#endif
#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */
# define STDC
#endif
#ifndef STDC
# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
# define const /* note: need a more gentle solution here */
# endif
#endif
#if defined(ZLIB_CONST) && !defined(z_const)
# define z_const const
#else
# define z_const
#endif
#ifdef Z_SOLO
# ifdef _WIN64
typedef unsigned long long z_size_t;
# else
typedef unsigned long z_size_t;
# endif
#else
# define z_longlong long long
# if defined(NO_SIZE_T)
typedef unsigned NO_SIZE_T z_size_t;
# elif defined(STDC)
# include <stddef.h>
typedef size_t z_size_t;
# else
typedef unsigned long z_size_t;
# endif
# undef z_longlong
#endif
/* Maximum value for memLevel in deflateInit2 */
#ifndef MAX_MEM_LEVEL
# ifdef MAXSEG_64K
# define MAX_MEM_LEVEL 8
# else
# define MAX_MEM_LEVEL 9
# endif
#endif
/* Maximum value for windowBits in deflateInit2 and inflateInit2.
* WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
* created by gzip. (Files created by minigzip can still be extracted by
* gzip.)
*/
#ifndef MAX_WBITS
# define MAX_WBITS 15 /* 32K LZ77 window */
#endif
/* The memory requirements for deflate are (in bytes):
(1 << (windowBits+2)) + (1 << (memLevel+9))
that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values)
plus a few kilobytes for small objects. For example, if you want to reduce
the default memory requirements from 256K to 128K, compile with
make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
Of course this will generally degrade compression (there's no free lunch).
The memory requirements for inflate are (in bytes) 1 << windowBits
that is, 32K for windowBits=15 (default value) plus about 7 kilobytes
for small objects.
*/
/* Type declarations */
#ifndef OF /* function prototypes */
# ifdef STDC
# define OF(args) args
# else
# define OF(args) ()
# endif
#endif
#ifndef Z_ARG /* function prototypes for stdarg */
# if defined(STDC) || defined(Z_HAVE_STDARG_H)
# define Z_ARG(args) args
# else
# define Z_ARG(args) ()
# endif
#endif
/* The following definitions for FAR are needed only for MSDOS mixed
* model programming (small or medium model with some far allocations).
* This was tested only with MSC; for other MSDOS compilers you may have
* to define NO_MEMCPY in zutil.h. If you don't need the mixed model,
* just define FAR to be empty.
*/
#ifdef SYS16BIT
# if defined(M_I86SM) || defined(M_I86MM)
/* MSC small or medium model */
# define SMALL_MEDIUM
# ifdef _MSC_VER
# define FAR _far
# else
# define FAR far
# endif
# endif
# if (defined(__SMALL__) || defined(__MEDIUM__))
/* Turbo C small or medium model */
# define SMALL_MEDIUM
# ifdef __BORLANDC__
# define FAR _far
# else
# define FAR far
# endif
# endif
#endif
#if defined(WINDOWS) || defined(WIN32)
/* If building or using zlib as a DLL, define ZLIB_DLL.
* This is not mandatory, but it offers a little performance increase.
*/
# ifdef ZLIB_DLL
# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500))
# ifdef ZLIB_INTERNAL
# define ZEXTERN extern __declspec(dllexport)
# else
# define ZEXTERN extern __declspec(dllimport)
# endif
# endif
# endif /* ZLIB_DLL */
/* If building or using zlib with the WINAPI/WINAPIV calling convention,
* define ZLIB_WINAPI.
* Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
*/
# ifdef ZLIB_WINAPI
# ifdef FAR
# undef FAR
# endif
# ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
# endif
# include <windows.h>
/* No need for _export, use ZLIB.DEF instead. */
/* For complete Windows compatibility, use WINAPI, not __stdcall. */
# define ZEXPORT WINAPI
# ifdef WIN32
# define ZEXPORTVA WINAPIV
# else
# define ZEXPORTVA FAR CDECL
# endif
# endif
#endif
#if defined (__BEOS__)
# ifdef ZLIB_DLL
# ifdef ZLIB_INTERNAL
# define ZEXPORT __declspec(dllexport)
# define ZEXPORTVA __declspec(dllexport)
# else
# define ZEXPORT __declspec(dllimport)
# define ZEXPORTVA __declspec(dllimport)
# endif
# endif
#endif
#ifndef ZEXTERN
# define ZEXTERN extern
#endif
#ifndef ZEXPORT
# define ZEXPORT
#endif
#ifndef ZEXPORTVA
# define ZEXPORTVA
#endif
#ifndef FAR
# define FAR
#endif
#if !defined(__MACTYPES__)
typedef unsigned char Byte; /* 8 bits */
#endif
typedef unsigned int uInt; /* 16 bits or more */
typedef unsigned long uLong; /* 32 bits or more */
#ifdef SMALL_MEDIUM
/* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
# define Bytef Byte FAR
#else
typedef Byte FAR Bytef;
#endif
typedef char FAR charf;
typedef int FAR intf;
typedef uInt FAR uIntf;
typedef uLong FAR uLongf;
#ifdef STDC
typedef void const *voidpc;
typedef void FAR *voidpf;
typedef void *voidp;
#else
typedef Byte const *voidpc;
typedef Byte FAR *voidpf;
typedef Byte *voidp;
#endif
#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC)
# include <limits.h>
# if (UINT_MAX == 0xffffffffUL)
# define Z_U4 unsigned
# elif (ULONG_MAX == 0xffffffffUL)
# define Z_U4 unsigned long
# elif (USHRT_MAX == 0xffffffffUL)
# define Z_U4 unsigned short
# endif
#endif
#ifdef Z_U4
typedef Z_U4 z_crc_t;
#else
typedef unsigned long z_crc_t;
#endif
#ifdef HAVE_UNISTD_H /* may be set to #if 1 by ./configure */
# define Z_HAVE_UNISTD_H
#endif
#ifdef HAVE_STDARG_H /* may be set to #if 1 by ./configure */
# define Z_HAVE_STDARG_H
#endif
#ifdef STDC
# ifndef Z_SOLO
# include <sys/types.h> /* for off_t */
# endif
#endif
#if defined(STDC) || defined(Z_HAVE_STDARG_H)
# ifndef Z_SOLO
# include <stdarg.h> /* for va_list */
# endif
#endif
#ifdef _WIN32
# ifndef Z_SOLO
# include <stddef.h> /* for wchar_t */
# endif
#endif
/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and
* "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even
* though the former does not conform to the LFS document), but considering
* both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as
* equivalently requesting no 64-bit operations
*/
#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1
# undef _LARGEFILE64_SOURCE
#endif
#ifndef Z_HAVE_UNISTD_H
# ifdef __WATCOMC__
# define Z_HAVE_UNISTD_H
# endif
#endif
#ifndef Z_HAVE_UNISTD_H
# if defined(_LARGEFILE64_SOURCE) && !defined(_WIN32)
# define Z_HAVE_UNISTD_H
# endif
#endif
#ifndef Z_SOLO
# if defined(Z_HAVE_UNISTD_H)
# include <unistd.h> /* for SEEK_*, off_t, and _LFS64_LARGEFILE */
# ifdef VMS
# include <unixio.h> /* for off_t */
# endif
# ifndef z_off_t
# define z_off_t off_t
# endif
# endif
#endif
#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0
# define Z_LFS64
#endif
#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64)
# define Z_LARGE64
#endif
#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64)
# define Z_WANT64
#endif
#if !defined(SEEK_SET) && !defined(Z_SOLO)
# define SEEK_SET 0 /* Seek from beginning of file. */
# define SEEK_CUR 1 /* Seek from current position. */
# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */
#endif
#ifndef z_off_t
# define z_off_t long
#endif
#if !defined(_WIN32) && defined(Z_LARGE64)
# define z_off64_t off64_t
#else
# if defined(_WIN32) && !defined(__GNUC__)
# define z_off64_t __int64
# else
# define z_off64_t z_off_t
# endif
#endif
/* MVS linker does not support external names larger than 8 bytes */
#if defined(__MVS__)
#pragma map(deflateInit_,"DEIN")
#pragma map(deflateInit2_,"DEIN2")
#pragma map(deflateEnd,"DEEND")
#pragma map(deflateBound,"DEBND")
#pragma map(inflateInit_,"ININ")
#pragma map(inflateInit2_,"ININ2")
#pragma map(inflateEnd,"INEND")
#pragma map(inflateSync,"INSY")
#pragma map(inflateSetDictionary,"INSEDI")
#pragma map(compressBound,"CMBND")
#pragma map(inflate_table,"INTABL")
#pragma map(inflate_fast,"INFA")
#pragma map(inflate_copyright,"INCOPY")
#endif
#endif /* ZCONF_H */

4
deps/zlib/zlib.3 vendored
View File

@@ -1,4 +1,4 @@
.TH ZLIB 3 "13 Oct 2022"
.TH ZLIB 3 "xx Oct 2022"
.SH NAME
zlib \- compression/decompression library
.SH SYNOPSIS
@@ -105,7 +105,7 @@ before asking for help.
Send questions and/or comments to zlib@gzip.org,
or (for the Windows DLL version) to Gilles Vollant (info@winimage.com).
.SH AUTHORS AND LICENSE
Version 1.2.13
Version 1.2.13.1
.LP
Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler
.LP

8
deps/zlib/zlib.h vendored
View File

@@ -1,5 +1,5 @@
/* zlib.h -- interface of the 'zlib' general purpose compression library
version 1.2.13, October 13th, 2022
version 1.2.13.1, October xxth, 2022
Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler
@@ -37,12 +37,12 @@
extern "C" {
#endif
#define ZLIB_VERSION "1.2.13"
#define ZLIB_VERNUM 0x12d0
#define ZLIB_VERSION "1.2.13.1-motley"
#define ZLIB_VERNUM 0x12d1
#define ZLIB_VER_MAJOR 1
#define ZLIB_VER_MINOR 2
#define ZLIB_VER_REVISION 13
#define ZLIB_VER_SUBREVISION 0
#define ZLIB_VER_SUBREVISION 1
/*
The 'zlib' compression library provides in-memory compression and