dropshell release 2025.0513.2134
Some checks failed
Dropshell Test / Build_and_Test (push) Has been cancelled

This commit is contained in:
Your Name
2025-05-13 21:34:59 +12:00
parent adcb3567d4
commit bd1ad20990
1055 changed files with 168339 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
[cxx]
cppflags = -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=4
cflags = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef -Wpointer-arith
cxxppflags = -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=4
cxxflags = -std=c++11 -Wno-deprecated-declarations
gtest_dep = //contrib/pzstd:gtest
[httpserver]
port = 0

View File

@@ -0,0 +1 @@
c8dec2e8da52d483f6dd7c6cd2ad694e8e6fed2b

View File

@@ -0,0 +1,9 @@
task:
name: FreeBSD (make check)
freebsd_instance:
matrix:
image_family: freebsd-14-2
install_script: pkg install -y gmake coreutils
script: |
MOREFLAGS="-Werror" gmake -j all
gmake check

View File

@@ -0,0 +1,21 @@
# Set the default behavior
* text eol=lf
# Explicitly declare source files
*.c text eol=lf
*.h text eol=lf
# Denote files that should not be modified.
*.odt binary
*.png binary
# Visual Studio
*.sln text eol=crlf
*.vcxproj* text eol=crlf
*.vcproj* text eol=crlf
*.suo binary
*.rc text eol=crlf
# Windows
*.bat text eol=crlf
*.cmd text eol=crlf

View File

@@ -0,0 +1,35 @@
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''
---
**Describe the bug**
A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1. Downloads data '...'
2. Run '...' with flags '...'
3. Scroll up on the log to '....'
4. See error
**Expected behavior**
A clear and concise description of what you expected to happen.
**Screenshots and charts**
If applicable, add screenshots and charts to help explain your problem.
**Desktop (please complete the following information):**
- OS: [e.g. Mac]
- Version [e.g. 22]
- Compiler [e.g. gcc]
- Flags [e.g. O2]
- Other relevant hardware specs [e.g. Dual-core]
- Build system [e.g. Makefile]
**Additional context**
Add any other context about the problem here.

View File

@@ -0,0 +1,20 @@
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.

View File

@@ -0,0 +1,6 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"

View File

@@ -0,0 +1,39 @@
name: Android NDK Build
on:
pull_request:
branches: [ dev, release, actionsTest ]
push:
branches: [ actionsTest, '*ndk*' ]
permissions: read-all
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Set up JDK 17
uses: actions/setup-java@3a4f6e1af504cf6a31855fa899c6aa5355ba6c12 # v4.7.0
with:
java-version: '17'
distribution: 'temurin'
- name: Setup Android SDK
uses: android-actions/setup-android@9fc6c4e9069bf8d3d10b2204b1fb8f6ef7065407 # v3.2.2
- name: Install Android NDK
run: |
sdkmanager --install "ndk;27.0.12077973"
echo "ANDROID_NDK_HOME=$ANDROID_SDK_ROOT/ndk/27.0.12077973" >> $GITHUB_ENV
- name: Build with NDK
run: |
export PATH=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin:$PATH
make CC=aarch64-linux-android21-clang \
AR=llvm-ar \
RANLIB=llvm-ranlib \
STRIP=llvm-strip

View File

@@ -0,0 +1,104 @@
name: facebook/zstd/commit
on:
push:
branches:
- dev
pull_request:
branches:
- dev
permissions: read-all
jobs:
short-tests-0:
runs-on: ubuntu-latest
services:
docker:
image: fbopensource/zstd-circleci-primary:0.0.1
options: --entrypoint /bin/bash
steps:
- uses: actions/checkout@v4
- name: Install Dependencies
run: |
sudo apt-get update
sudo apt-get install libcurl4-gnutls-dev
- name: Test
run: |
./tests/test-license.py
cc -v
CFLAGS="-O0 -Werror -pedantic" make allmost; make clean
make c99build; make clean
make c11build; make clean
make -j regressiontest; make clean
make check; make clean
make cxxtest; make clean
short-tests-1:
runs-on: ubuntu-latest
services:
docker:
image: fbopensource/zstd-circleci-primary:0.0.1
options: --entrypoint /bin/bash
steps:
- uses: actions/checkout@v4
- name: Install Dependencies
run: |
sudo apt-get update
sudo apt-get install gcc-powerpc-linux-gnu gcc-arm-linux-gnueabi gcc-aarch64-linux-gnu libc6-dev-ppc64-powerpc-cross libcurl4-gnutls-dev lib64gcc-13-dev-powerpc-cross
- name: gnu90 build
run: make gnu90build && make clean
- name: gnu99 build
run: make gnu99build && make clean
- name: ppc64 build
run: make ppc64build V=1 && make clean
- name: ppc build
run: make ppcbuild V=1 && make clean
- name: arm build
run: make armbuild V=1 && make clean
- name: aarch64 build
run: make aarch64build V=1 && make clean
- name: test-legacy
run: make -C tests test-legacy V=1 && make clean
- name: test-longmatch
run: make -C tests test-longmatch V=1 && make clean
- name: libzstd-nomt build
run: make -C lib libzstd-nomt V=1 && make clean
regression-test:
runs-on: ubuntu-latest
services:
docker:
image: fbopensource/zstd-circleci-primary:0.0.1
options: --entrypoint /bin/bash
env:
CIRCLE_ARTIFACTS: "/tmp/circleci-artifacts"
steps:
- uses: actions/checkout@v4
- name: restore_cache
uses: actions/cache@v4
with:
key: regression-cache-{{ checksum "tests/regression/data.c" }}-v0
path: tests/regression/cache
restore-keys: regression-cache-{{ checksum "tests/regression/data.c" }}-v0
- name: Install Dependencies
run: |
sudo apt-get update
sudo apt-get install libcurl4-gnutls-dev
- name: Regression Test
run: |
make -C programs zstd
make -C tests/regression test
mkdir -p $CIRCLE_ARTIFACTS
./tests/regression/test \
--cache tests/regression/cache \
--output $CIRCLE_ARTIFACTS/results.csv \
--zstd programs/zstd
echo "NOTE: The new results.csv is uploaded as an artifact to this job"
echo " If this fails, go to the Artifacts pane in CircleCI, "
echo " download /tmp/circleci-artifacts/results.csv, and if they "
echo " are still good, copy it into the repo and commit it."
echo "> diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv"
diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv
- uses: actions/upload-artifact@v4
with:
path: "/tmp/circleci-artifacts"

View File

@@ -0,0 +1,313 @@
name: dev-long-tests
# Tests longer than 10mn
concurrency:
group: long-${{ github.ref }}
cancel-in-progress: true
on:
pull_request:
branches: [ dev, release, actionsTest ]
permissions: read-all
jobs:
make-all:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make all
run: make all
# lasts ~24mn
make-test:
runs-on: ubuntu-latest
env:
DEVNULLRIGHTS: 1
READFROMBLOCKDEVICE: 1
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make test
run: make test
# lasts ~26mn
make-test-macos:
runs-on: macos-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make test on macos
run: make test
# lasts ~24mn
make-test-32bit:
runs-on: ubuntu-latest
env:
DEVNULLRIGHTS: 1
READFROMBLOCKDEVICE: 1
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make test # note: `make -j test success` seems to require a clean state
run: |
sudo apt-get -qqq update
make libc6install
make clean
CFLAGS="-m32 -O2" make -j test V=1
no-intrinsics-fuzztest:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: no intrinsics fuzztest
run: MOREFLAGS="-DZSTD_NO_INTRINSICS" make -C tests fuzztest
tsan-zstreamtest:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: thread sanitizer zstreamtest
run: CC=clang ZSTREAM_TESTTIME=-T3mn make tsan-test-zstream
uasan-zstreamtest:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: ub + address sanitizer on zstreamtest
run: CC=clang make uasan-test-zstream
# lasts ~15mn
tsan-fuzztest:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: thread sanitizer fuzztest
run: CC=clang make tsan-fuzztest
big-tests-zstreamtest32:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: zstream tests in 32bit mode, with big tests
run: |
sudo apt-get -qqq update
make libc6install
CC=clang make -C tests test-zstream32 FUZZER_FLAGS="--big-tests"
# lasts ~23mn
gcc-8-asan-ubsan-testzstd:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: gcc-8 + ASan + UBSan + Test Zstd
# See https://askubuntu.com/a/1428822
run: |
echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu focal main universe" | sudo tee -a /etc/apt/sources.list
sudo apt-get -qqq update
make gcc8install
CC=gcc-8 make -j uasan-test-zstd </dev/null V=1
clang-asan-ubsan-testzstd:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: clang + ASan + UBSan + Test Zstd
run: CC=clang make -j uasan-test-zstd </dev/null V=1
gcc-asan-ubsan-testzstd-32bit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: ASan + UBSan + Test Zstd, 32bit mode
run: |
sudo apt-get -qqq update
make libc6install
make -j uasan-test-zstd32 V=1
# Note : external libraries must be turned off when using MSAN tests,
# because they are not msan-instrumented,
# so any data coming from these libraries is always considered "uninitialized"
gcc-8-asan-ubsan-fuzz:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: gcc-8 + ASan + UBSan + Fuzz Test
# See https://askubuntu.com/a/1428822
run: |
echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu focal main universe" | sudo tee -a /etc/apt/sources.list
sudo apt-get -qqq update
make gcc8install
CC=gcc-8 FUZZER_FLAGS="--long-tests" make clean uasan-fuzztest
clang-asan-ubsan-fuzz:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: clang + ASan + UBSan + Fuzz Test
run: CC=clang FUZZER_FLAGS="--long-tests" make clean uasan-fuzztest
gcc-asan-ubsan-fuzz32:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: ASan + UBSan + Fuzz Test 32bit
run: |
sudo apt-get -qqq update
make libc6install
CFLAGS="-O3 -m32" FUZZER_FLAGS="--long-tests" make uasan-fuzztest
clang-asan-fuzz32:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: clang + ASan + Fuzz Test 32bit
run: |
sudo apt-get -qqq update
make libc6install
CC=clang CFLAGS="-O3 -m32" FUZZER_FLAGS="--long-tests" make asan-fuzztest
# The following test seems to have issues on github CI specifically,
# it does not provide the `__mulodi4` instruction emulation
# required for signed 64-bit multiplication.
# Replaced by asan-only test (above)
#
# clang-asan-ubsan-fuzz32:
# runs-on: ubuntu-20.04
# steps:
# - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
# - name: clang + ASan + UBSan + Fuzz Test 32bit
# run: |
# sudo apt-get -qqq update
# make libc6install
# CC=clang CFLAGS="-O3 -m32" FUZZER_FLAGS="--long-tests" make uasan-fuzztest
asan-ubsan-regression:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: ASan + UBSan + Regression Test
run: make -j uasanregressiontest
clang-asan-ubsan-regression:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: clang + ASan + UBSan + Regression Test
run: CC=clang make -j uasanregressiontest
msan-regression:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: MSan + Regression Test
run: make -j msanregressiontest
clang-msan-fuzz-unoptimized:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: clang + MSan + Fuzz Test
run: |
sudo apt-get -qqq update
sudo apt-get install clang
CC=clang MOREFLAGS="-O0" make clean msan-fuzztest
clang-msan-fuzz:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: clang + MSan + Fuzz Test
run: |
sudo apt-get -qqq update
sudo apt-get install clang
CC=clang FUZZER_FLAGS="--long-tests" make clean msan-fuzztest
# lasts ~24mn
clang-msan-testzstd:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: clang + MSan + Test Zstd
run: |
sudo apt-get update
sudo apt-get install clang
CC=clang make msan-test-zstd HAVE_ZLIB=0 HAVE_LZ4=0 HAVE_LZMA=0 V=1
armfuzz:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Qemu ARM emulation + Fuzz Test
run: |
sudo apt-get -qqq update
make arminstall
make armfuzz
valgrind-fuzz-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: valgrind + fuzz test stack mode # ~ 7mn
shell: 'script -q -e -c "bash {0}"'
run: |
sudo apt-get -qqq update
make valgrindinstall
make -C tests test-valgrind
make clean
make -C tests test-fuzzer-stackmode
mingw-long-test:
runs-on: windows-latest
defaults:
run:
shell: msys2 {0}
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- uses: msys2/setup-msys2@d44ca8e88d8b43d56cf5670f91747359d5537f97 # tag=v2.26.0
with:
msystem: MINGW64
install: make
update: true
# Based on https://ariya.io/2020/07/on-github-actions-with-msys2
- name: install mingw gcc
run: pacman --noconfirm -S gcc
- name: MINGW64 gcc fuzztest
run: |
export CC="gcc"
export CXX="g++"
export FUZZERTEST="-T2mn"
export ZSTREAM_TESTTIME="-T2mn"
echo "Testing $CC $CXX MINGW64"
make -v
$CC --version
$CXX --version
make -C tests fuzztest
# lasts ~20mn
oss-fuzz:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
sanitizer: [address, undefined, memory]
steps:
- name: Build Fuzzers (${{ matrix.sanitizer }})
id: build
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
with:
oss-fuzz-project-name: 'zstd'
dry-run: false
sanitizer: ${{ matrix.sanitizer }}
- name: Run Fuzzers (${{ matrix.sanitizer }})
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
with:
oss-fuzz-project-name: 'zstd'
fuzz-seconds: 600
dry-run: false
sanitizer: ${{ matrix.sanitizer }}
- name: Upload Crash
uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # tag=v4.3.1
if: failure() && steps.build.outcome == 'success'
with:
name: ${{ matrix.sanitizer }}-artifacts
path: ./out/artifacts

View File

@@ -0,0 +1,725 @@
name: dev-short-tests
# Faster tests: mostly build tests, along with some other
# misc tests
concurrency:
group: fast-${{ github.ref }}
cancel-in-progress: true
on:
pull_request:
branches: [ dev, release, actionsTest ]
permissions: read-all
jobs:
linux-kernel:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: linux kernel, library + build + test
run: make -C contrib/linux-kernel test CFLAGS="-Werror -Wunused-const-variable -Wunused-but-set-variable"
benchmarking:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make benchmarking
run: make benchmarking
check-32bit: # designed to catch https://github.com/facebook/zstd/issues/2428
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make check on 32-bit
run: |
sudo apt update
APT_PACKAGES="gcc-multilib" make apt-install
CFLAGS="-m32 -O1 -fstack-protector" make check V=1
build-c89:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: ensure zstd can be build with c89/c90 compilers (+ long long support + variadic macros)
run: |
make c89build V=1
build-zstd-dll:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: build zstd bin against a dynamic lib (debuglevel for more dependencies)
run: |
make -C lib lib-mt-release
DEBUGLEVEL=2 make -C programs zstd-dll
gcc-7-libzstd:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: gcc-7 + libzstdmt compilation
# See https://askubuntu.com/a/1428822
run: |
echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu focal main universe" | sudo tee -a /etc/apt/sources.list
sudo apt-get -qqq update
make gcc7install
CC=gcc-7 CFLAGS=-Werror make -j all
make clean
LDFLAGS=-Wl,--no-undefined make -C lib libzstd-mt
# candidate test (for discussion) : underlink test
# LDFLAGS=-Wl,--no-undefined : will make the linker fail if dll is underlinked
cmake-build-and-test-check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: cmake build and test
run: |
sudo apt install liblzma-dev
FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild V=1
cmake-source-directory-with-spaces:
runs-on: ${{ matrix.os }}
strategy:
matrix:
include:
- os: ubuntu-latest
generator: "Unix Makefiles"
- os: windows-latest
generator: "NMake Makefiles"
- os: macos-latest
generator: "Unix Makefiles"
env:
SRC_DIR: "source directory with spaces"
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
with:
path: "${{ env.SRC_DIR }}"
- uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0
if: ${{ matrix.generator == 'NMake Makefiles' }}
- name: cmake build on a source directory with spaces
run: |
cmake -S "${{ env.SRC_DIR }}/build/cmake" -B build -DBUILD_TESTING=ON -G "${{ matrix.generator }}" -DCMAKE_BUILD_TYPE=Release --install-prefix "${{ runner.temp }}/install"
cmake --build build --config Release
cmake --install build --config Release
cpp-gnu90-c99-compatibility:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: C++, gnu90 and c99 compatibility
run: |
make cxxtest
make clean
make gnu90build
make clean
make c99build
make clean
make travis-install # just ensures `make install` works
mingw-cross-compilation:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: mingw cross-compilation
run: |
# sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix; (doesn't work)
sudo apt-get -qqq update
sudo apt-get install gcc-mingw-w64
CC=x86_64-w64-mingw32-gcc CXX=x86_64-w64-mingw32-g++ CFLAGS="-Werror -O1" make zstd
armbuild:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: ARM Build Test
run: |
sudo apt-get -qqq update
make arminstall
make armbuild
bourne-shell:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Bourne shell compatibility (shellcheck)
run: |
wget https://github.com/koalaman/shellcheck/releases/download/v0.7.1/shellcheck-v0.7.1.linux.x86_64.tar.xz
tar -xf shellcheck-v0.7.1.linux.x86_64.tar.xz
shellcheck-v0.7.1/shellcheck --shell=sh --severity=warning --exclude=SC2010 tests/playTests.sh
zlib-wrapper:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: install valgrind
run: |
sudo apt-get -qqq update
make valgrindinstall V=1
- name: zlib wrapper test
run: make -C zlibWrapper test V=1
- name: zlib wrapper test under valgrind
run: make -C zlibWrapper test-valgrind V=1
lz4-threadpool-libs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: LZ4, thread pool, and libs build testslib wrapper test
run: |
make lz4install
make -C tests test-lz4
make check < /dev/null | tee # mess with lz4 console detection
make clean
make -C tests test-pool
make clean
bash tests/libzstd_builds.sh
gcc-make-all-avx2:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Make all, with AVX2
run: |
sudo apt-get -qqq update
make libc6install
CFLAGS="-Werror -mavx2" make -j all
gcc-make-all-32bit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Make all, 32bit mode
run: |
sudo apt-get -qqq update
make libc6install
CFLAGS="-Werror -m32" make -j all32
gcc-make-all-32bit-avx2:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Make all, 32bit + AVX2 mode
run: |
sudo apt-get -qqq update
make libc6install
CPPFLAGS="-DSTATIC_BMI2=1" CFLAGS="-Werror -m32 -mavx2 -mbmi2" make -j all32
gcc-8-make:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: gcc-8 build
# See https://askubuntu.com/a/1428822
run: |
echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu focal main universe" | sudo tee -a /etc/apt/sources.list
sudo apt-get -qqq update
make gcc8install
CC=gcc-8 CFLAGS="-Werror" make -j all
make-external-compressors:
strategy:
matrix:
include:
- name: "no external compressors"
flags: "HAVE_ZLIB=0 HAVE_LZ4=0 HAVE_LZMA=0"
- name: "only zlib"
flags: "HAVE_ZLIB=1 HAVE_LZ4=0 HAVE_LZMA=0"
- name: "only lz4"
flags: "HAVE_ZLIB=0 HAVE_LZ4=1 HAVE_LZMA=0"
- name: "only lzma"
flags: "HAVE_ZLIB=0 HAVE_LZ4=0 HAVE_LZMA=1"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Build with ${{matrix.name}}
run: |
sudo apt install liblzma-dev
${{matrix.flags}} make zstd
implicit-fall-through:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: -Wimplicit-fallthrough build
run: |
make clean
CC=gcc MOREFLAGS="-Werror -Wimplicit-fallthrough=2 -O0" make -C lib -j libzstd.a ZSTD_LEGACY_SUPPORT=0
make clean
CC=clang MOREFLAGS="-Werror -Wimplicit-fallthrough -O0" make -C lib -j libzstd.a ZSTD_LEGACY_SUPPORT=0
meson-linux:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Install packages
run: |
sudo apt-get update
sudo apt-get -y install build-essential python3-pip ninja-build liblz4-dev liblzma-dev
pip install --pre meson
- name: Build with Meson
run: |
meson setup \
--buildtype=debugoptimized \
-Db_lundef=false \
-Dauto_features=enabled \
-Dbin_programs=true \
-Dbin_tests=true \
-Dbin_contrib=true \
-Ddefault_library=both \
build/meson mesonBuild
ninja -C mesonBuild/
meson test -C mesonBuild/ --print-errorlogs
meson install -C mesonBuild --destdir staging/
meson-mingw-cross-compilation:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Install packages
run: |
sudo apt-get -qqq update
sudo apt-get -y install build-essential python3-pip ninja-build {gcc,g++}-mingw-w64-x86-64
pip install --pre meson
- name: Build with Meson
run: |
cat > cross.ini <<EOF
[binaries]
ar = 'x86_64-w64-mingw32-ar'
c = 'x86_64-w64-mingw32-gcc'
cpp = 'x86_64-w64-mingw32-g++'
ld = 'x86_64-w64-mingw32-ld'
objcopy = 'x86_64-w64-mingw32-objcopy'
objdump = 'x86_64-w64-mingw32-objdump'
strip = 'x86_64-w64-mingw32-strip'
windres = 'x86_64-w64-mingw32-windres'
[host_machine]
system = 'windows'
endian = 'little'
cpu_family = 'x86_64'
cpu = 'x86_64'
EOF
# pzstd doesn't build; skip -Dbin_contrib=true
meson setup \
--buildtype=debugoptimized \
--cross-file=cross.ini \
-Db_lundef=false \
-Dbin_programs=true \
-Dbin_tests=true \
-Ddefault_library=both \
build/meson builddir
ninja -C builddir/
if grep -- -pthread builddir/meson-private/libzstd.pc; then
echo "Error: found stray pthread dependency"
exit 1
fi
meson-windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Install packages
run: pip install --pre meson
- name: Configure with Meson
run: |
meson setup --vsenv build/meson/ builddir -Dbin_tests=true -Dbin_programs=true -Dbin_contrib=true
- name: Build with Meson
run: |
meson compile -C builddir/
- name: Test with Meson
run: |
meson test -C builddir/ --print-errorlogs
- name: Install with Meson
run: |
meson install -C builddir --destdir staging/
cmake-visual-2022:
strategy:
matrix:
include:
- generator: "Visual Studio 17 2022"
flags: "-A x64"
- generator: "Visual Studio 17 2022"
flags: "-A Win32"
- generator: "MinGW Makefiles"
- generator: "Visual Studio 17 2022"
flags: "-T ClangCL"
- generator: "Visual Studio 17 2022"
flags: "-T ClangCL -A x64 -DCMAKE_C_FLAGS=/arch:AVX2"
runs-on: windows-2022
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Add MSBuild to PATH
uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v2.0.0
- name: Build & Test
working-directory: ${{env.GITHUB_WORKSPACE}}
run: |
cd build\cmake
mkdir build
cd build
cmake.exe -G "${{matrix.generator}}" ${{matrix.flags}} -DCMAKE_BUILD_TYPE=Debug -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZSTREAM_FLAGS=-T30s -DZSTD_FUZZER_FLAGS=-T30s -DZSTD_FULLBENCH_FLAGS=-i0 ..
cmake.exe --build .
ctest.exe -V -C Debug
msbuild-visual-studio:
strategy:
fail-fast: false # 'false' means Don't stop matrix workflows even if some matrix failed.
matrix:
include: [
{ name: "VS 2022 x64 Debug", platform: x64, configuration: Debug, toolset: v143, runner: "windows-2022", arch: "" },
{ name: "VS 2022 Win32 Debug", platform: Win32, configuration: Debug, toolset: v143, runner: "windows-2022", arch: "" },
{ name: "VS 2022 x64 Release", platform: x64, configuration: Release, toolset: v143, runner: "windows-2022", arch: ""},
{ name: "VS 2022 Win32 Release", platform: Win32, configuration: Release, toolset: v143, runner: "windows-2022", arch: ""},
{ name: "VS 2019 x64 Release", platform: Win32, configuration: Release, toolset: v142, runner: "windows-2019", arch: ""},
{ name: "VS 2019 Win32 Release", platform: x64, configuration: Release, toolset: v142, runner: "windows-2019", arch: ""},
{ name: "VS 2022 x64 Release AVX2", platform: x64, configuration: Release, toolset: v143, runner: "windows-2022", arch: "AdvancedVectorExtensions2" },
]
runs-on: ${{matrix.runner}}
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Add MSBuild to PATH
uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v2.0.0
- name: Build ${{matrix.name}}
working-directory: ${{env.GITHUB_WORKSPACE}}
# See https://docs.microsoft.com/visualstudio/msbuild/msbuild-command-line-reference
if: ${{ matrix.arch == '' }}
run: >
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=${{matrix.toolset}}
/t:Clean,Build /p:Platform=${{matrix.platform}} /p:Configuration=${{matrix.configuration}} /warnaserror
- name: Build ${{matrix.name}}
working-directory: ${{env.GITHUB_WORKSPACE}}
# See https://docs.microsoft.com/visualstudio/msbuild/msbuild-command-line-reference
if: ${{ matrix.arch != '' }}
run: >
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=${{matrix.toolset}}
/t:Clean,Build /p:Platform=${{matrix.platform}} /p:Configuration=${{matrix.configuration}} /warnaserror
/p:InstructionSet=${{matrix.arch}}
# This tests that we don't accidentally grow the size too much.
# If the size grows intentionally, you can raise these numbers.
# But we do need to think about binary size, since it is a concern.
libzstd-size:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: libzstd size test
run: |
make clean && make -j -C lib libzstd && ./tests/check_size.py lib/libzstd.so 1100000
make clean && make -j -C lib libzstd ZSTD_LIB_COMPRESSION=0 ZSTD_LIB_DICTBUILDER=0 && ./tests/check_size.py lib/libzstd.so 400000
make clean && make -j -C lib libzstd ZSTD_LIB_MINIFY=1 && ./tests/check_size.py lib/libzstd.so 300000
make clean && make -j -C lib libzstd ZSTD_LIB_MINIFY=1 ZSTD_LIB_COMPRESSION=0 ZSTD_LIB_DICTBUILDER=0 && ./tests/check_size.py lib/libzstd.so 80000
minimal-decompressor-macros:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: minimal decompressor macros
run: |
make clean && make -j all ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror"
make clean && make check ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror"
make clean && make -j all MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X1 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT"
make clean && make check MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X1 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT"
make clean && make -j all MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG"
make clean && make check MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG"
make clean && make -j all MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
make clean && make check MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
make clean && make check ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP=1 MOREFLAGS="-Werror"
make clean && make check ZSTD_LIB_EXCLUDE_COMPRESSORS_GREEDY_AND_UP=1 MOREFLAGS="-Werror"
dynamic-bmi2:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: dynamic bmi2 tests
run: |
make clean && make -j check MOREFLAGS="-O0 -Werror -mbmi2"
make clean && make -j check MOREFLAGS="-O0 -Werror -DDYNAMIC_BMI2=1"
make clean && make -j check MOREFLAGS="-O0 -Werror -DDYNAMIC_BMI2=1 -mbmi2"
make clean && make -j check MOREFLAGS="-O0 -Werror -DDYNAMIC_BMI2=0"
make clean && make -j check MOREFLAGS="-O0 -Werror -DDYNAMIC_BMI2=0 -mbmi2"
test-variants:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make all variants & validate
run: |
make -j -C programs allVariants MOREFLAGS=-O0
./tests/test-variants.sh
qemu-consistency:
name: QEMU ${{ matrix.name }}
runs-on: ubuntu-24.04
strategy:
fail-fast: false # 'false' means Don't stop matrix workflows even if some matrix failed.
matrix:
include: [
{ name: ARM, xcc_pkg: gcc-arm-linux-gnueabi, xcc: arm-linux-gnueabi-gcc, xemu_pkg: qemu-system-arm, xemu: qemu-arm-static },
{ name: ARM64, xcc_pkg: gcc-aarch64-linux-gnu, xcc: aarch64-linux-gnu-gcc, xemu_pkg: qemu-system-aarch64,xemu: qemu-aarch64-static },
{ name: PPC, xcc_pkg: gcc-powerpc-linux-gnu, xcc: powerpc-linux-gnu-gcc, xemu_pkg: qemu-system-ppc, xemu: qemu-ppc-static },
{ name: PPC64LE, xcc_pkg: gcc-powerpc64le-linux-gnu, xcc: powerpc64le-linux-gnu-gcc, xemu_pkg: qemu-system-ppc, xemu: qemu-ppc64le-static },
{ name: S390X, xcc_pkg: gcc-s390x-linux-gnu, xcc: s390x-linux-gnu-gcc, xemu_pkg: qemu-system-s390x, xemu: qemu-s390x-static },
{ name: MIPS, xcc_pkg: gcc-mips-linux-gnu, xcc: mips-linux-gnu-gcc, xemu_pkg: qemu-system-mips, xemu: qemu-mips-static },
{ name: RISC-V, xcc_pkg: gcc-riscv64-linux-gnu, xcc: riscv64-linux-gnu-gcc, xemu_pkg: qemu-system-riscv64,xemu: qemu-riscv64-static },
{ name: M68K, xcc_pkg: gcc-m68k-linux-gnu, xcc: m68k-linux-gnu-gcc, xemu_pkg: qemu-system-m68k, xemu: qemu-m68k-static },
{ name: SPARC, xcc_pkg: gcc-sparc64-linux-gnu, xcc: sparc64-linux-gnu-gcc, xemu_pkg: qemu-system-sparc, xemu: qemu-sparc64-static },
]
env: # Set environment variables
XCC: ${{ matrix.xcc }}
XEMU: ${{ matrix.xemu }}
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: apt update & install
run: |
sudo apt-get update
sudo apt-get install gcc-multilib g++-multilib qemu-utils qemu-user-static
sudo apt-get install ${{ matrix.xcc_pkg }} ${{ matrix.xemu_pkg }}
- name: Environment info
run: |
echo && which $XCC
echo && $XCC --version
echo && $XCC -v # Show built-in specs
echo && which $XEMU
echo && $XEMU --version
- name: ARM
if: ${{ matrix.name == 'ARM' }}
run: |
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
- name: ARM64
if: ${{ matrix.name == 'ARM64' }}
run: |
make clean
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make -j check
# This test is only compatible with standard libraries that support BTI (Branch Target Identification).
# Unfortunately, the standard library provided on Ubuntu 24.04 does not have this feature enabled.
# make clean
# LDFLAGS="-static -z force-bti" MOREFLAGS="-mbranch-protection=standard" CC=$XCC QEMU_SYS=$XEMU make check V=1
- name: PPC
if: ${{ matrix.name == 'PPC' }}
run: |
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
- name: PPC64LE
if: ${{ matrix.name == 'PPC64LE' }}
run: |
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
- name: S390X
if: ${{ matrix.name == 'S390X' }}
run: |
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
- name: MIPS
if: ${{ matrix.name == 'MIPS' }}
run: |
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
- name: RISC-V
if: ${{ matrix.name == 'RISC-V' }}
run: |
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
- name: M68K
if: ${{ matrix.name == 'M68K' }}
run: |
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
- name: SPARC
if: ${{ matrix.name == 'SPARC' }}
run: |
LDFLAGS="-static" CC=$XCC QEMU_SYS=$XEMU make clean check
mingw-short-test:
runs-on: windows-latest
strategy:
fail-fast: false # 'false' means Don't stop matrix workflows even if some matrix failed.
matrix:
include: [
{ compiler: gcc, msystem: MINGW32, cflags: "-Werror"},
{ compiler: gcc, msystem: MINGW64, cflags: "-Werror"},
{ compiler: clang, msystem: MINGW64, cflags: "--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion -Wno-unused-command-line-argument"},
]
defaults:
run:
shell: msys2 {0}
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- uses: msys2/setup-msys2@d44ca8e88d8b43d56cf5670f91747359d5537f97 # tag=v2.26.0
with:
msystem: ${{ matrix.msystem }}
install: make diffutils
update: true
# Based on https://ariya.io/2020/07/on-github-actions-with-msys2
- name: install mingw gcc i686
if: ${{ (matrix.msystem == 'MINGW32') && (matrix.compiler == 'gcc') }}
run: pacman --noconfirm -S mingw-w64-i686-gcc
- name: install mingw gcc x86_64
if: ${{ (matrix.msystem == 'MINGW64') && (matrix.compiler == 'gcc') }}
run: pacman --noconfirm -S mingw-w64-x86_64-gcc
- name: install mingw clang i686
if: ${{ (matrix.msystem == 'MINGW32') && (matrix.compiler == 'clang') }}
run: pacman --noconfirm -S mingw-w64-i686-clang
- name: install mingw clang x86_64
if: ${{ (matrix.msystem == 'MINGW64') && (matrix.compiler == 'clang') }}
run: pacman --noconfirm -S mingw-w64-x86_64-clang
- name: run mingw tests
run: |
make -v
export CC=${{ matrix.compiler }}
$CC --version
CFLAGS="${{ matrix.cflags }}" make -j allzstd
echo "Testing $CC ${{ matrix.msystem }}"
make clean
MSYS="" make check
visual-runtime-tests:
runs-on: windows-latest
strategy:
matrix:
platform: [x64, Win32]
configuration: [Release]
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Add MSBuild to PATH
uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # tag=v2.0.0
- name: Build and run tests
working-directory: ${{env.GITHUB_WORKSPACE}}
env:
ZSTD_BIN: ./zstd.exe
DATAGEN_BIN: ./datagen.exe
# See https://docs.microsoft.com/visualstudio/msbuild/msbuild-command-line-reference
run: |
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v142 /t:Clean,Build /p:Platform=${{matrix.platform}} /p:Configuration=${{matrix.configuration}}
COPY build\VS2010\bin\${{matrix.platform}}_${{matrix.configuration}}\*.exe tests\
CD tests
sh -e playTests.sh
.\fuzzer.exe -T2m
# Following instructions at: https://github.com/marketplace/actions/install-cygwin-action
cygwin-tests:
runs-on: windows-latest
steps:
- run: git config --global core.autocrlf input
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- uses: cygwin/cygwin-install-action@f61179d72284ceddc397ed07ddb444d82bf9e559 # tag=v5
with:
platform: x86_64
packages: >-
autoconf,
automake,
gcc-g++,
make,
mingw64-x86_64-gcc-g++,
patch,
perl
- name: cygwin tests
shell: C:\cygwin\bin\bash.exe --noprofile --norc -eo pipefail '{0}'
run: >-
export PATH=/usr/bin:$(cygpath ${SYSTEMROOT})/system32 &&
export CFLAGS="-Werror -O1" &&
ls &&
make -j allzstd &&
make -C tests fuzzer &&
./tests/fuzzer.exe -v -T1m
- name: cygwin install test
shell: C:\cygwin\bin\bash.exe --noprofile --norc -eo pipefail '{0}'
run: >-
make -j &&
make install
pkg-config:
runs-on: ubuntu-latest
container:
image: debian:testing
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Install dependencies
run: |
apt -y update
apt -y install --no-install-recommends gcc libc6-dev make pkg-config
- name: Build and install
run: make -C lib install
- name: Test pkg-config
run: |
cc -Wall -Wextra -Wpedantic -Werror -o simple examples/simple_compression.c $(pkg-config --cflags --libs libzstd)
./simple LICENSE
versions-compatibility:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Versions Compatibility Test
run: |
make -C tests versionsTest
clangbuild:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make clangbuild
run: |
make clangbuild
gcc-pgo:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Build PGO Zstd with GCC
env:
CC: gcc
run: |
make -C programs zstd-pgo
./programs/zstd -b
clang-pgo:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Build PGO Zstd with Clang
env:
CC: clang
run: |
sudo apt install -y llvm
llvm-profdata --version
make -C programs zstd-pgo
./programs/zstd -b
intel-cet-compatibility:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: Build Zstd
run: |
make -j zstd V=1
readelf -n zstd
- name: Get Intel SDE
run: |
curl -LO https://downloadmirror.intel.com/813591/sde-external-9.33.0-2024-01-07-lin.tar.xz
tar xJvf sde-external-9.33.0-2024-01-07-lin.tar.xz
- name: Configure Permissions
run: |
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
- name: Run Under SDE
run: |
sde-external-9.33.0-2024-01-07-lin/sde -cet -cet-raise 0 -cet-endbr-exe -cet-stderr -cet-abort -- ./zstd -b3
icx:
# install instructions: https://www.intel.com/content/www/us/en/docs/oneapi/installation-guide-linux/2025-0/apt-005.html
name: icx-check
runs-on: ubuntu-latest
steps:
- name: install icx
run: |
# download the key to system keyring
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
| gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
# add signed entry to apt sources and configure the APT client to use Intel repository:
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
sudo apt-get update
sudo apt-get install -y intel-basekit intel-hpckit
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
- name: make check
run: |
source /opt/intel/oneapi/setvars.sh
make CC=icx check

View File

@@ -0,0 +1,38 @@
name: facebook/zstd/nightly
on:
schedule:
- cron: '0 0 * * *'
push:
branches:
- release
- dev
- '*nightly*'
permissions: read-all
jobs:
regression-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install dependencies
run: sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev
- name: Regression Test
run: |
make -C programs zstd
make -C tests/regression test
# Longer tests
#- make -C tests test-zstd-nolegacy && make clean
#- pyenv global 3.4.4; make -C tests versionsTest && make clean
#- make zlibwrapper && make clean
#- gcc -v; make -C tests test32 MOREFLAGS="-I/usr/include/x86_64-linux-gnu" && make clean
#- make uasan && make clean
#- make asan32 && make clean
#- make -C tests test32 CC=clang MOREFLAGS="-g -fsanitize=address -I/usr/include/x86_64-linux-gnu"
# Valgrind tests
#- CFLAGS="-O1 -g" make -C zlibWrapper valgrindTest && make clean
#- make -C tests valgrindTest && make clean
# ARM, AArch64, PowerPC, PowerPC64 tests
#- make ppctest && make clean
#- make ppc64test && make clean
#- make armtest && make clean
#- make aarch64test && make clean

View File

@@ -0,0 +1,73 @@
name: publish-release-artifacts
on:
release:
types:
- published
permissions: read-all
jobs:
publish-release-artifacts:
permissions:
contents: write # to fetch code and upload artifacts
runs-on: ubuntu-latest
if: startsWith(github.ref, 'refs/tags/')
steps:
- name: Checkout
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
- name: Archive
env:
RELEASE_SIGNING_KEY: ${{ secrets.RELEASE_SIGNING_KEY }}
RELEASE_SIGNING_KEY_PASSPHRASE: ${{ secrets.RELEASE_SIGNING_KEY_PASSPHRASE }}
run: |
# compute file name
export TAG="$(echo "$GITHUB_REF" | sed -n 's_^refs/tags/__p')"
if [ -z "$TAG" ]; then
echo "action must be run on a tag. GITHUB_REF is not a tag: $GITHUB_REF"
exit 1
fi
# Attempt to extract "1.2.3" from "v1.2.3" to maintain artifact name backwards compat.
# Otherwise, degrade to using full tag.
export VERSION="$(echo "$TAG" | sed 's_^v\([0-9]\+\.[0-9]\+\.[0-9]\+\)$_\1_')"
export ZSTD_VERSION="zstd-$VERSION"
# archive
git archive $TAG \
--prefix $ZSTD_VERSION/ \
--format tar \
-o $ZSTD_VERSION.tar
# Do the rest of the work in a sub-dir so we can glob everything we want to publish.
mkdir artifacts/
mv $ZSTD_VERSION.tar artifacts/
cd artifacts/
# compress
zstd -k -19 $ZSTD_VERSION.tar
gzip -k -9 $ZSTD_VERSION.tar
# we only publish the compressed tarballs
rm $ZSTD_VERSION.tar
# hash
sha256sum $ZSTD_VERSION.tar.zst > $ZSTD_VERSION.tar.zst.sha256
sha256sum $ZSTD_VERSION.tar.gz > $ZSTD_VERSION.tar.gz.sha256
# sign
if [ -n "$RELEASE_SIGNING_KEY" ]; then
export GPG_BATCH_OPTS="--batch --no-use-agent --pinentry-mode loopback --no-tty --yes"
echo "$RELEASE_SIGNING_KEY" | gpg $GPG_BATCH_OPTS --import
gpg $GPG_BATCH_OPTS --armor --sign --sign-with signing@zstd.net --detach-sig --passphrase "$RELEASE_SIGNING_KEY_PASSPHRASE" --output $ZSTD_VERSION.tar.zst.sig $ZSTD_VERSION.tar.zst
gpg $GPG_BATCH_OPTS --armor --sign --sign-with signing@zstd.net --detach-sig --passphrase "$RELEASE_SIGNING_KEY_PASSPHRASE" --output $ZSTD_VERSION.tar.gz.sig $ZSTD_VERSION.tar.gz
fi
- name: Publish
uses: skx/github-action-publish-binaries@b9ca5643b2f1d7371a6cba7f35333f1461bbc703 # tag=release-2.0
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
args: artifacts/*

View File

@@ -0,0 +1,64 @@
name: Scorecards supply-chain security
on:
# Only the default branch is supported.
branch_protection_rule:
schedule:
- cron: '22 21 * * 2'
push:
# TODO: Add release branch when supported?
branches: [ "dev" ]
# Declare default permissions as read only.
permissions: read-all
jobs:
analysis:
name: Scorecards analysis
if: github.repository == 'facebook/zstd'
runs-on: ubuntu-latest
permissions:
# Needed to upload the results to code-scanning dashboard.
security-events: write
# Used to receive a badge.
id-token: write
# Needs for private repositories.
contents: read
actions: read
steps:
- name: "Checkout code"
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
with:
persist-credentials: false
- name: "Run analysis"
uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # tag=v2.4.0
with:
results_file: results.sarif
results_format: sarif
# (Optional) Read-only PAT token. Uncomment the `repo_token` line below if:
# - you want to enable the Branch-Protection check on a *public* repository, or
# - you are installing Scorecards on a *private* repository
# To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat.
# repo_token: ${{ secrets.SCORECARD_READ_TOKEN }}
# Publish the results for public repositories to enable scorecard badges. For more details, see
# https://github.com/ossf/scorecard-action#publishing-results.
# For private repositories, `publish_results` will automatically be set to `false`, regardless
# of the value entered here.
publish_results: true
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # tag=v4.3.1
with:
name: SARIF file
path: results.sarif
retention-days: 5
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 # tag=v3.28.9
with:
sarif_file: results.sarif

View File

@@ -0,0 +1,58 @@
name: windows-artifacts
on:
push:
branches: [ test_artifacts, win_artifacts ]
release:
types:
- published
permissions: read-all
jobs:
windows-artifacts:
# see https://ariya.io/2020/07/on-github-actions-with-msys2
runs-on: windows-latest
# see https://github.com/msys2/setup-msys2
strategy:
matrix:
include:
- { msystem: mingw64, env: x86_64, ziparch: win64 }
- { msystem: mingw32, env: i686, ziparch: win32 }
defaults:
run:
shell: msys2 {0}
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v3
- uses: msys2/setup-msys2@d44ca8e88d8b43d56cf5670f91747359d5537f97 # tag=v2.26.0
with:
msystem: ${{ matrix.msystem }}
install: make zlib git p7zip mingw-w64-${{matrix.env}}-gcc
update: true
- name: display versions
run: |
make -v
cc -v
- name: Building zlib to static link
run: |
git clone --depth 1 --branch v1.2.11 https://github.com/madler/zlib
make -C zlib -f win32/Makefile.gcc libz.a
- name: Building zstd programs
run: |
CPPFLAGS=-I../zlib LDFLAGS=../zlib/libz.a make -j allzstd MOREFLAGS=-static V=1
- name: Create artifacts
run: |
./lib/dll/example/build_package.bat
mv bin/ zstd-${{ github.ref_name }}-${{matrix.ziparch}}/
7z a -tzip -mx9 zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip zstd-${{ github.ref_name }}-${{matrix.ziparch}}/
cd ..
- name: Publish zstd-$VERSION-${{matrix.ziparch}}.zip
uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # tag=v4.3.1
with:
path: ${{ github.workspace }}/zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip
name: zstd-${{ github.ref_name }}-${{matrix.ziparch}}.zip

63
build_arm64/_deps/zstd-src/.gitignore vendored Normal file
View File

@@ -0,0 +1,63 @@
# Object files
*.o
*.ko
*.dSYM
# Libraries
*.lib
*.a
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
*.framework
*.xcframework
# Executables
/zstd
zstdmt
*.exe
*.out
*.app
# Test artefacts
tmp*
*.zst
*.zstd
dictionary.
dictionary
NUL
cmakebuild/
install/
# Build artefacts
contrib/linux-kernel/linux/
projects/
bin/
.buckd/
buck-out/
build-*
*.gcda
# IDE
.clang_complete
compile_flags.txt
.clang-format
# Other files
.directory
_codelite/
_zstdbench/
*.idea
*.swp
.DS_Store
googletest/
*.d
*.vscode
*.code-workspace
compile_commands.json
.clangd
perf.data
perf.data.old

View File

@@ -0,0 +1,863 @@
V1.5.7 (Feb 2025)
fix: compression bug in 32-bit mode associated with long-lasting sessions
api: new method `ZSTD_compressSequencesAndLiterals()` (#4217, #4232)
api: `ZSTD_getFrameHeader()` works on skippable frames (#4228)
perf: substantial compression speed improvements (up to +30%) on small data, by @TocarIP (#4144) and @cyan4973 (#4165)
perf: improved compression speed (~+5%) for dictionary compression at low levels (#4170)
perf: much faster speed for `--patch-from` at high compression levels (#4276)
perf: higher `--patch-from` compression ratios, notably at high levels (#4288)
perf: better speed for binaries on Windows (@pps83) and when compiled with Visual Studio (@MessyHack)
perf: slight compression ratio improvement thanks to better block boundaries (#4136, #4176, #4178)
perf: slight compression ratio improvement for `dfast`, aka levels 3 and 4 (#4171)
perf: runtime bmi2 detection enabled on x86 32-bit mode (#4251)
cli: multi-threading as default CLI setting, by @daniellerozenblit
cli: new `--max` command (#4290)
build: improve `msbuild` version autodetection, support VS2022, by @ManuelBlanc
build: fix `meson` build by @artem and @Victor-C-Zhang, and on Windows by @bgilbert
build: compatibility with Apple Framework, by @Treata11
build: improve icc/icx compatibility, by @josepho0918 and @luau-project
build: improve compatibility with Android NDK, by Adenilson Cavalcanti
portability: linux kernel branch, with improved support for Sequence producers (@embg, @gcabiddu, @cyan4973)
portability: improved qnx compatibility, suggested by @rainbowball
portability: improved install script for FreeBSD, by @sunpoet
portability: fixed test suite compatibility with gnu hurd, by @diegonc
doc: clarify specification, by @elasota
misc: improved tests/decodecorpus validation tool (#4102), by antmicro
V1.5.6 (Mar 2024)
api: Promote `ZSTD_c_targetCBlockSize` to Stable API by @felixhandte
api: new `ZSTD_d_maxBlockSize` experimental parameter, to reduce streaming decompression memory, by @terrelln
perf: improve performance of param `ZSTD_c_targetCBlockSize`, by @Cyan4973
perf: improved compression of arrays of integers at high compression, by @Cyan4973
lib: reduce binary size with selective build-time exclusion, by @felixhandte
lib: improved huffman speed on small data and linux kernel, by @terrelln
lib: accept dictionaries with partial literal tables, by @terrelln
lib: fix CCtx size estimation with external sequence producer, by @embg
lib: fix corner case decoder behaviors, by @Cyan4973 and @aimuz
lib: fix zdict prototype mismatch in static_only mode, by @ldv-alt
lib: fix several bugs in magicless-format decoding, by @embg
cli: add common compressed file types to `--exclude-compressed`` by @daniellerozenblit
cli: fix mixing `-c` and `-o` commands with `--rm`, by @Cyan4973
cli: fix erroneous exclusion of hidden files with `--output-dir-mirror` by @felixhandte
cli: improved time accuracy on BSD, by @felixhandte
cli: better errors on argument parsing, by @KapJI
tests: better compatibility with older versions of `grep`, by @Cyan4973
tests: lorem ipsum generator as default backup content, by @Cyan4973
build: cmake improvements by @terrelln, @sighingnow, @gjasny, @JohanMabille, @Saverio976, @gruenich, @teo-tsirpanis
build: bazel support, by @jondo2010
build: fix cross-compiling for AArch64 with lld by @jcelerier
build: fix Apple platform compatibility, by @nidhijaju
build: fix Visual 2012 and lower compatibility, by @Cyan4973
build: improve win32 support, by @DimitriPapadopoulos
build: better C90 compliance for zlibWrapper, by @emaste
port: make: fat binaries on macos, by @mredig
port: ARM64EC compatibility for Windows, by @dunhor
port: QNX support by @klausholstjacobsen
port: MSYS2 and Cygwin makefile installation and test support, by @QBos07
port: risc-v support validation in CI, by @Cyan4973
port: sparc64 support validation in CI, by @Cyan4973
port: AIX compatibility, by @likema
port: HP-UX compatibility, by @likema
doc: Improved specification accuracy, by @elasota
bug: Fix and deprecate ZSTD_generateSequences (#3981)
v1.5.5 (Apr 2023)
fix: fix rare corruption bug affecting the high compression mode, reported by @danlark1 (#3517, @terrelln)
perf: improve mid-level compression speed (#3529, #3533, #3543, @yoniko and #3552, @terrelln)
lib: deprecated bufferless block-level API (#3534) by @terrelln
cli: mmap large dictionaries to save memory, by @daniellerozenblit
cli: improve speed of --patch-from mode (~+50%) (#3545) by @daniellerozenblit
cli: improve i/o speed (~+10%) when processing lots of small files (#3479) by @felixhandte
cli: zstd no longer crashes when requested to write into write-protected directory (#3541) by @felixhandte
cli: fix decompression into block device using -o, reported by @georgmu (#3583)
build: fix zstd CLI compiled with lzma support but not zlib support (#3494) by @Hello71
build: fix cmake does no longer require 3.18 as minimum version (#3510) by @kou
build: fix MSVC+ClangCL linking issue (#3569) by @tru
build: fix zstd-dll, version of zstd CLI that links to the dynamic library (#3496) by @yoniko
build: fix MSVC warnings (#3495) by @embg
doc: updated zstd specification to clarify corner cases, by @Cyan4973
doc: document how to create fat binaries for macos (#3568) by @rickmark
misc: improve seekable format ingestion speed (~+100%) for very small chunk sizes (#3544) by @Cyan4973
misc: tests/fullbench can benchmark multiple files (#3516) by @dloidolt
v1.5.4 (Feb 2023)
perf: +20% faster huffman decompression for targets that can't compile x64 assembly (#3449, @terrelln)
perf: up to +10% faster streaming compression at levels 1-2 (#3114, @embg)
perf: +4-13% for levels 5-12 by optimizing function generation (#3295, @terrelln)
pref: +3-11% compression speed for `arm` target (#3199, #3164, #3145, #3141, #3138, @JunHe77 and #3139, #3160, @danlark1)
perf: +5-30% faster dictionary compression at levels 1-4 (#3086, #3114, #3152, @embg)
perf: +10-20% cold dict compression speed by prefetching CDict tables (#3177, @embg)
perf: +1% faster compression by removing a branch in ZSTD_fast_noDict (#3129, @felixhandte)
perf: Small compression ratio improvements in high compression mode (#2983, #3391, @Cyan4973 and #3285, #3302, @daniellerozenblit)
perf: small speed improvement by better detecting `STATIC_BMI2` for `clang` (#3080, @TocarIP)
perf: Improved streaming performance when `ZSTD_c_stableInBuffer` is set (#2974, @Cyan4973)
cli: Asynchronous I/O for improved cli speed (#2975, #2985, #3021, #3022, @yoniko)
cli: Change `zstdless` behavior to align with `zless` (#2909, @binhdvo)
cli: Keep original file if `-c` or `--stdout` is given (#3052, @dirkmueller)
cli: Keep original files when result is concatenated into a single output with `-o` (#3450, @Cyan4973)
cli: Preserve Permissions and Ownership of regular files (#3432, @felixhandte)
cli: Print zlib/lz4/lzma library versions with `-vv` (#3030, @terrelln)
cli: Print checksum value for single frame files with `-lv` (#3332, @Cyan4973)
cli: Print `dictID` when present with `-lv` (#3184, @htnhan)
cli: when `stderr` is *not* the console, disable status updates, but preserve final summary (#3458, @Cyan4973)
cli: support `--best` and `--no-name` in `gzip` compatibility mode (#3059, @dirkmueller)
cli: support for `posix` high resolution timer `clock_gettime()`, for improved benchmark accuracy (#3423, @Cyan4973)
cli: improved help/usage (`-h`, `-H`) formatting (#3094, @dirkmueller and #3385, @jonpalmisc)
cli: Fix better handling of bogus numeric values (#3268, @ctkhanhly)
cli: Fix input consists of multiple files _and_ `stdin` (#3222, @yoniko)
cli: Fix tiny files passthrough (#3215, @cgbur)
cli: Fix for `-r` on empty directory (#3027, @brailovich)
cli: Fix empty string as argument for `--output-dir-*` (#3220, @embg)
cli: Fix decompression memory usage reported by `-vv --long` (#3042, @u1f35c, and #3232, @zengyijing)
cli: Fix infinite loop when empty input is passed to trainer (#3081, @terrelln)
cli: Fix `--adapt` doesn't work when `--no-progress` is also set (#3354, @terrelln)
api: Support for Block-Level Sequence Producer (#3333, @embg)
api: Support for in-place decompression (#3432, @terrelln)
api: New `ZSTD_CCtx_setCParams()` function, set all parameters defined in a `ZSTD_compressionParameters` structure (#3403, @Cyan4973)
api: Streaming decompression detects incorrect header ID sooner (#3175, @Cyan4973)
api: Window size resizing optimization for edge case (#3345, @daniellerozenblit)
api: More accurate error codes for busy-loop scenarios (#3413, #3455, @Cyan4973)
api: Fix limit overflow in `compressBound` and `decompressBound` (#3362, #3373, Cyan4973) reported by @nigeltao
api: Deprecate several advanced experimental functions: streaming (#3408, @embg), copy (#3196, @mileshu)
bug: Fix corruption that rarely occurs in 32-bit mode with wlog=25 (#3361, @terrelln)
bug: Fix for block-splitter (#3033, @Cyan4973)
bug: Fixes for Sequence Compression API (#3023, #3040, @Cyan4973)
bug: Fix leaking thread handles on Windows (#3147, @animalize)
bug: Fix timing issues with cmake/meson builds (#3166, #3167, #3170, @Cyan4973)
build: Allow user to select legacy level for cmake (#3050, @shadchin)
build: Enable legacy support by default in cmake (#3079, @niamster)
build: Meson build script improvements (#3039, #3120, #3122, #3327, #3357, @eli-schwartz and #3276, @neheb)
build: Add aarch64 to supported architectures for zstd_trace (#3054, @ooosssososos)
build: support AIX architecture (#3219, @qiongsiwu)
build: Fix `ZSTD_LIB_MINIFY` build macro, which now reduces static library size by half (#3366, @terrelln)
build: Fix Windows issues with Multithreading translation layer (#3364, #3380, @yoniko) and ARM64 target (#3320, @cwoffenden)
build: Fix `cmake` script (#3382, #3392, @terrelln and #3252 @Tachi107 and #3167 @Cyan4973)
doc: Updated man page, providing more details for `--train` mode (#3112, @Cyan4973)
doc: Add decompressor errata document (#3092, @terrelln)
misc: Enable Intel CET (#2992, #2994, @hjl-tools)
misc: Fix `contrib/` seekable format (#3058, @yhoogstrate and #3346, @daniellerozenblit)
misc: Improve speed of the one-file library generator (#3241, @wahern and #3005, @cwoffenden)
v1.5.3 (dev version, unpublished)
v1.5.2 (Jan, 2022)
perf: Regain Minimal memset()-ing During Reuse of Compression Contexts (@Cyan4973, #2969)
build: Build Zstd with `noexecstack` on All Architectures (@felixhandte, #2964)
doc: Clarify Licensing (@terrelln, #2981)
v1.5.1 (Dec, 2021)
perf: rebalanced compression levels, to better match the intended speed/level curve, by @senhuang42
perf: faster huffman decoder, using x64 assembly, by @terrelln
perf: slightly faster high speed modes (strategies fast & dfast), by @felixhandte
perf: improved binary size and faster compilation times, by @terrelln
perf: new row64 mode, used notably in level 12, by @senhuang42
perf: faster mid-level compression speed in presence of highly repetitive patterns, by @senhuang42
perf: minor compression ratio improvements for small data at high levels, by @cyan4973
perf: reduced stack usage (mostly useful for Linux Kernel), by @terrelln
perf: faster compression speed on incompressible data, by @bindhvo
perf: on-demand reduced ZSTD_DCtx state size, using build macro ZSTD_DECODER_INTERNAL_BUFFER, at a small cost of performance, by @bindhvo
build: allows hiding static symbols in the dynamic library, using build macro, by @skitt
build: support for m68k (Motorola 68000's), by @cyan4973
build: improved AIX support, by @Helflym
build: improved meson unofficial build, by @eli-schwartz
cli : custom memory limit when training dictionary (#2925), by @embg
cli : report advanced parameters information when compressing in very verbose mode (`-vv`), by @Svetlitski-FB
v1.5.0 (May 11, 2021)
api: Various functions promoted from experimental to stable API: (#2579-2581, @senhuang42)
`ZSTD_defaultCLevel()`
`ZSTD_getDictID_fromCDict()`
api: Several experimental functions have been deprecated and will emit a compiler warning (#2582, @senhuang42)
`ZSTD_compress_advanced()`
`ZSTD_compress_usingCDict_advanced()`
`ZSTD_compressBegin_advanced()`
`ZSTD_compressBegin_usingCDict_advanced()`
`ZSTD_initCStream_srcSize()`
`ZSTD_initCStream_usingDict()`
`ZSTD_initCStream_usingCDict()`
`ZSTD_initCStream_advanced()`
`ZSTD_initCStream_usingCDict_advanced()`
`ZSTD_resetCStream()`
api: ZSTDMT_NBWORKERS_MAX reduced to 64 for 32-bit environments (@Cyan4973)
perf: Significant speed improvements for middle compression levels (#2494, @senhuang42 @terrelln)
perf: Block splitter to improve compression ratio, enabled by default for high compression levels (#2447, @senhuang42)
perf: Decompression loop refactor, speed improvements on `clang` and for `--long` modes (#2614 #2630, @Cyan4973)
perf: Reduced stack usage during compression and decompression entropy stage (#2522 #2524, @terrelln)
bug: Improve setting permissions of created files (#2525, @felixhandte)
bug: Fix large dictionary non-determinism (#2607, @terrelln)
bug: Fix non-determinism test failures on Linux i686 (#2606, @terrelln)
bug: Fix various dedicated dictionary search bugs (#2540 #2586, @senhuang42 @felixhandte)
bug: Ensure `ZSTD_estimateCCtxSize*() `monotonically increases with compression level (#2538, @senhuang42)
bug: Fix --patch-from mode parameter bound bug with small files (#2637, @occivink)
bug: Fix UBSAN error in decompression (#2625, @terrelln)
bug: Fix superblock compression divide by zero bug (#2592, @senhuang42)
bug: Make the number of physical CPU cores detection more robust (#2517, @PaulBone)
doc: Improve `zdict.h` dictionary training API documentation (#2622, @terrelln)
doc: Note that public `ZSTD_free*()` functions accept NULL pointers (#2521, @animalize)
doc: Add style guide docs for open source contributors (#2626, @Cyan4973)
tests: Better regression test coverage for different dictionary modes (#2559, @senhuang42)
tests: Better test coverage of index reduction (#2603, @terrelln)
tests: OSS-Fuzz coverage for seekable format (#2617, @senhuang42)
tests: Test coverage for ZSTD threadpool API (#2604, @senhuang42)
build: Dynamic library built multithreaded by default (#2584, @senhuang42)
build: Move `zstd_errors.h` and `zdict.h` to `lib/` root (#2597, @terrelln)
build: Allow `ZSTDMT_JOBSIZE_MIN` to be configured at compile-time, reduce default to 512KB (#2611, @Cyan4973)
build: Single file library build script moved to `build/` directory (#2618, @felixhandte)
build: `ZBUFF_*()` is no longer built by default (#2583, @senhuang42)
build: Fixed Meson build (#2548, @SupervisedThinking @kloczek)
build: Fix excessive compiler warnings with clang-cl and CMake (#2600, @nickhutchinson)
build: Detect presence of `md5` on Darwin (#2609, @felixhandte)
build: Avoid SIGBUS on armv6 (#2633, @bmwiedmann)
cli: `--progress` flag added to always display progress bar (#2595, @senhuang42)
cli: Allow reading from block devices with `--force` (#2613, @felixhandte)
cli: Fix CLI filesize display bug (#2550, @Cyan4973)
cli: Fix windows CLI `--filelist` end-of-line bug (#2620, @Cyan4973)
contrib: Various fixes for linux kernel patch (#2539, @terrelln)
contrib: Seekable format - Decompression hanging edge case fix (#2516, @senhuang42)
contrib: Seekable format - New seek table-only API (#2113 #2518, @mdittmer @Cyan4973)
contrib: Seekable format - Fix seek table descriptor check when loading (#2534, @foxeng)
contrib: Seekable format - Decompression fix for large offsets, (#2594, @azat)
misc: Automatically published release tarballs available on Github (#2535, @felixhandte)
v1.4.9 (Mar 1, 2021)
bug: Use `umask()` to Constrain Created File Permissions (#2495, @felixhandte)
bug: Make Simple Single-Pass Functions Ignore Advanced Parameters (#2498, @terrelln)
api: Add (De)Compression Tracing Functionality (#2482, @terrelln)
api: Support References to Multiple DDicts (#2446, @senhuang42)
api: Add Function to Generate Skippable Frame (#2439, @senhuang42)
perf: New Algorithms for the Long Distance Matcher (#2483, @mpu)
perf: Performance Improvements for Long Distance Matcher (#2464, @mpu)
perf: Don't Shrink Window Log when Streaming with a Dictionary (#2451, @terrelln)
cli: Fix `--output-dir-mirror` rejection of `..` -containing paths (#2512, @felixhandte)
cli: Allow Input From Console When `-f`/`--force` is Passed (#2466, @felixhandte)
cli: Improve Help Message (#2500, @senhuang42)
tests: Remove Flaky Tests (#2455, #2486, #2445, @Cyan4973)
tests: Correctly Invoke md5 Utility on NetBSD (#2492, @niacat)
tests: Avoid Using `stat -c` on NetBSD (#2513, @felixhandte)
build: Zstd CLI Can Now be Linked to Dynamic `libzstd` (#2457, #2454 @Cyan4973)
build: Hide and Avoid Using Static-Only Symbols (#2501, #2504, @skitt)
build: CMake: Enable Only C for lib/ and programs/ Projects (#2498, @concatime)
build: CMake: Use `configure_file()` to Create the `.pc` File (#2462, @lazka)
build: Fix Fuzzer Compiler Detection & Update UBSAN Flags (#2503, @terrelln)
build: Add Guards for `_LARGEFILE_SOURCE` and `_LARGEFILE64_SOURCE` (#2444, @indygreg)
build: Improve `zlibwrapper` Makefile (#2437, @Cyan4973)
contrib: Add `recover_directory` Program (#2473, @terrelln)
doc: Change License Year to 2021 (#2452 & #2465, @terrelln & @senhuang42)
doc: Fix Typos (#2459, @ThomasWaldmann)
v1.4.8 (Dec 18, 2020)
hotfix: wrong alignment of an internal buffer
v1.4.7 (Dec 16, 2020)
perf: stronger --long mode at high compression levels, by @senhuang42
perf: stronger --patch-from at high compression levels, thanks to --long improvements
perf: faster dictionary compression at medium compression levels, by @felixhandte
perf: small speed & memory usage improvements for ZSTD_compress2(), by @terrelln
perf: improved fast compression speeds with Visual Studio, by @animalize
cli : Set nb of threads with environment variable ZSTD_NBTHREADS, by @senhuang42
cli : accept decompressing files with *.zstd suffix
cli : provide a condensed summary by default when processing multiple files
cli : fix : stdin input no longer confused as user prompt
cli : improve accuracy of several error messages
api : new sequence ingestion API, by @senhuang42
api : shared thread pool: control total nb of threads used by multiple compression jobs, by @marxin
api : new ZSTD_getDictID_fromCDict(), by @LuAPi
api : zlibWrapper only uses public API, and is compatible with dynamic library, by @terrelln
api : fix : multithreaded compression has predictable output even in special cases (see #2327) (issue not accessible from cli)
api : fix : dictionary compression correctly respects dictionary compression level (see #2303) (issue not accessible from cli)
build: fix cmake script when using path with spaces, by @terrelln
build: improved compile-time detection of aarch64/neon platforms, by @bsdimp
build: Fix building on AIX 5.1, by @likema
build: compile paramgrill with cmake on Windows, requested by @mirh
doc : clarify repcode updates in format specification, by @felixhandte
v1.4.6
fix : Always return dstSize_tooSmall when that is the case
fix : Fix ZSTD_initCStream_advanced() with static allocation and no dictionary
perf: Improve small block decompression speed by 20%+, by @terrelln
perf: Reduce compression stack usage by 1 KB, by @terrelln
perf: Improve decompression speed by improving ZSTD_wildcopy, by @helloguo (#2252, #2256)
perf: Improve histogram construction, by @cyan4973 (#2253)
cli : Add --output-dir-mirror option, by @xxie24 (#2219)
cli : Warn when (de)compressing multiple files into a single output, by @senhuang42 (#2279)
cli : Improved progress bar and status summary when (de)compressing multiple files, by @senhuang42 (#2283)
cli : Call stat less often, by @felixhandte (#2262)
cli : Allow --patch-from XXX and --filelist XXX in addition to --patch-from=XXX and --filelist=XXX, by @cyan4973 (#2250)
cli : Allow --patch-from to compress stdin with --stream-size, by @bimbashrestha (#2206)
api : Do not install zbuff.h, since it has long been deprecated, by @cyan4973 (#2166).
api : Fix ZSTD_CCtx_setParameter() with ZSTD_c_compressionLevel to make 0 mean default level, by @i-do-cpp (#2291)
api : Rename ZSTDMT_NBTHREADS_MAX to ZSTDMT_NBWORKERS_MAX, by @marxin (#2228).
build: Install pkg-config file with CMake and MinGW, by @tonytheodore (#2183)
build: Install DLL with CMake on Windows, by @BioDataAnalysis (#2221)
build: Fix DLL install location with CMake, by @xantares and @bimbashrestha (#2186)
build: Add ZSTD_NO_UNUSED_FUNCTIONS macro to hide unused functions
build: Add ZSTD_NO_INTRINSICS macro to avoid explicit intrinsics
build: Add STATIC_BMI2 macro for compile time detection of BMI2 on MSVC, by @Niadb (#2258)
build: Fix -Wcomma warnings, by @cwoffenden
build: Remove distutils requirement for meson build, by @neheb (#2197)
build: Fix cli compilation with uclibc
build: Fix cli compilation without st_mtime, by @ffontaine (#2246)
build: Fix shadowing warnings in library
build: Fix single file library compilation with Enscripten, by @yoshihitoh (#2227)
misc: Improve single file library and include dictBuilder, by @cwoffenden
misc: Allow compression dictionaries with missing symbols
misc: Add freestanding translation script in contrib/freestanding_lib
misc: Collect all of zstd's libc dependencies into zstd_deps.h
doc : Add ZSTD_versionString() to manual, by @animalize
doc : Fix documentation for ZSTD_CCtxParams_setParameter(), by @felixhandte (#2270)
v1.4.5 (May 22, 2020)
fix : Compression ratio regression on huge files (> 3 GB) using high levels (--ultra) and multithreading, by @terrelln
perf: Improved decompression speed: x64 : +10% (clang) / +5% (gcc); ARM : from +15% to +50%, depending on SoC, by @terrelln
perf: Automatically downsizes ZSTD_DCtx when too large for too long (#2069, by @bimbashreshta)
perf: Improved fast compression speed on aarch64 (#2040, ~+3%, by @caoyzh)
perf: Small level 1 compression speed gains (depending on compiler)
cli : New --patch-from command, create and apply patches from files, by @bimbashreshta
cli : New --filelist= : Provide a list of files to operate upon from a file
cli : -b -d command can now benchmark decompression on multiple files
cli : New --no-content-size command
cli : New --show-default-cparams information command
api : ZDICT_finalizeDictionary() is promoted to stable (#2111)
api : new experimental parameter ZSTD_d_stableOutBuffer (#2094)
build: Generate a single-file libzstd library (#2065, by @cwoffenden)
build: Relative includes no longer require -I compiler flags for zstd lib subdirs (#2103, by @felixhandte)
build: zstd now compiles cleanly under -pedantic (#2099)
build: zstd now compiles with make-4.3
build: Support mingw cross-compilation from Linux, by @Ericson2314
build: Meson multi-thread build fix on windows
build: Some misc icc fixes backed by new ci test on travis
misc: bitflip analyzer tool, by @felixhandte
misc: Extend largeNbDicts benchmark to compression
misc: Edit-distance match finder in contrib/
doc : Improved beginner CONTRIBUTING.md docs
doc : New issue templates for zstd
v1.4.4 (Nov 6, 2019)
perf: Improved decompression speed, by > 10%, by @terrelln
perf: Better compression speed when re-using a context, by @felixhandte
perf: Fix compression ratio when compressing large files with small dictionary, by @senhuang42
perf: zstd reference encoder can generate RLE blocks, by @bimbashrestha
perf: minor generic speed optimization, by @davidbolvansky
api: new ability to extract sequences from the parser for analysis, by @bimbashrestha
api: fixed decoding of magic-less frames, by @terrelln
api: fixed ZSTD_initCStream_advanced() performance with fast modes, reported by @QrczakMK
cli: Named pipes support, by @bimbashrestha
cli: short tar's extension support, by @stokito
cli: command --output-dir-flat= , generates target files into requested directory, by @senhuang42
cli: commands --stream-size=# and --size-hint=#, by @nmagerko
cli: command --exclude-compressed, by @shashank0791
cli: faster `-t` test mode
cli: improved some error messages, by @vangyzen
cli: fix command `-D dictionary` on Windows, reported by @artyompetrov
cli: fix rare deadlock condition within dictionary builder, by @terrelln
build: single-file decoder with emscripten compilation script, by @cwoffenden
build: fixed zlibWrapper compilation on Visual Studio, reported by @bluenlive
build: fixed deprecation warning for certain gcc version, reported by @jasonma163
build: fix compilation on old gcc versions, by @cemeyer
build: improved installation directories for cmake script, by Dmitri Shubin
pack: modified pkgconfig, for better integration into openwrt, requested by @neheb
misc: Improved documentation : ZSTD_CLEVEL, DYNAMIC_BMI2, ZSTD_CDict, function deprecation, zstd format
misc: fixed educational decoder : accept larger literals section, and removed UNALIGNED() macro
v1.4.3 (Aug 20, 2019)
bug: Fix Dictionary Compression Ratio Regression by @cyan4973 (#1709)
bug: Fix Buffer Overflow in legacy v0.3 decompression by @felixhandte (#1722)
build: Add support for IAR C/C++ Compiler for Arm by @joseph0918 (#1705)
v1.4.2 (Jul 26, 2019)
bug: Fix bug in zstd-0.5 decoder by @terrelln (#1696)
bug: Fix seekable decompression in-memory API by @iburinoc (#1695)
misc: Validate blocks are smaller than size limit by @vivekmg (#1685)
misc: Restructure source files by @ephiepark (#1679)
v1.4.1 (Jul 20, 2019)
bug: Fix data corruption in niche use cases by @terrelln (#1659)
bug: Fuzz legacy modes, fix uncovered bugs by @terrelln (#1593, #1594, #1595)
bug: Fix out of bounds read by @terrelln (#1590)
perf: Improve decode speed by ~7% @mgrice (#1668)
perf: Slightly improved compression ratio of level 3 and 4 (ZSTD_dfast) by @cyan4973 (#1681)
perf: Slightly faster compression speed when re-using a context by @cyan4973 (#1658)
perf: Improve compression ratio for small windowLog by @cyan4973 (#1624)
perf: Faster compression speed in high compression mode for repetitive data by @terrelln (#1635)
api: Add parameter to generate smaller dictionaries by @tyler-tran (#1656)
cli: Recognize symlinks when built in C99 mode by @felixhandte (#1640)
cli: Expose cpu load indicator for each file on -vv mode by @ephiepark (#1631)
cli: Restrict read permissions on destination files by @chungy (#1644)
cli: zstdgrep: handle -f flag by @felixhandte (#1618)
cli: zstdcat: follow symlinks by @vejnar (#1604)
doc: Remove extra size limit on compressed blocks by @felixhandte (#1689)
doc: Fix typo by @yk-tanigawa (#1633)
doc: Improve documentation on streaming buffer sizes by @cyan4973 (#1629)
build: CMake: support building with LZ4 @leeyoung624 (#1626)
build: CMake: install zstdless and zstdgrep by @leeyoung624 (#1647)
build: CMake: respect existing uninstall target by @j301scott (#1619)
build: Make: skip multithread tests when built without support by @michaelforney (#1620)
build: Make: Fix examples/ test target by @sjnam (#1603)
build: Meson: rename options out of deprecated namespace by @lzutao (#1665)
build: Meson: fix build by @lzutao (#1602)
build: Visual Studio: don't export symbols in static lib by @scharan (#1650)
build: Visual Studio: fix linking by @absotively (#1639)
build: Fix MinGW-W64 build by @myzhang1029 (#1600)
misc: Expand decodecorpus coverage by @ephiepark (#1664)
v1.4.0 (Apr 17, 2019)
perf: Improve level 1 compression speed in most scenarios by 6% by @gbtucker and @terrelln
api: Move the advanced API, including all functions in the staging section, to the stable section
api: Make ZSTD_e_flush and ZSTD_e_end block for maximum forward progress
api: Rename ZSTD_CCtxParam_getParameter to ZSTD_CCtxParams_getParameter
api: Rename ZSTD_CCtxParam_setParameter to ZSTD_CCtxParams_setParameter
api: Don't export ZSTDMT functions from the shared library by default
api: Require ZSTD_MULTITHREAD to be defined to use ZSTDMT
api: Add ZSTD_decompressBound() to provide an upper bound on decompressed size by @shakeelrao
api: Fix ZSTD_decompressDCtx() corner cases with a dictionary
api: Move ZSTD_getDictID_*() functions to the stable section
api: Add ZSTD_c_literalCompressionMode flag to enable or disable literal compression by @terrelln
api: Allow compression parameters to be set when a dictionary is used
api: Allow setting parameters before or after ZSTD_CCtx_loadDictionary() is called
api: Fix ZSTD_estimateCStreamSize_usingCCtxParams()
api: Setting ZSTD_d_maxWindowLog to 0 means use the default
cli: Ensure that a dictionary is not used to compress itself by @shakeelrao
cli: Add --[no-]compress-literals flag to enable or disable literal compression
doc: Update the examples to use the advanced API
doc: Explain how to transition from old streaming functions to the advanced API in the header
build: Improve the Windows release packages
build: Improve CMake build by @hjmjohnson
build: Build fixes for FreeBSD by @lwhsu
build: Remove redundant warnings by @thatsafunnyname
build: Fix tests on OpenBSD by @bket
build: Extend fuzzer build system to work with the new clang engine
build: CMake now creates the libzstd.so.1 symlink
build: Improve Menson build by @lzutao
misc: Fix symbolic link detection on FreeBSD
misc: Use physical core count for -T0 on FreeBSD by @cemeyer
misc: Fix zstd --list on truncated files by @kostmo
misc: Improve logging in debug mode by @felixhandte
misc: Add CirrusCI tests by @lwhsu
misc: Optimize dictionary memory usage in corner cases
misc: Improve the dictionary builder on small or homogeneous data
misc: Fix spelling across the repo by @jsoref
v1.3.8 (Dec 28, 2018)
perf: better decompression speed on large files (+7%) and cold dictionaries (+15%)
perf: slightly better compression ratio at high compression modes
api : finalized advanced API, last stage before "stable" status
api : new --rsyncable mode, by @terrelln
api : support decompression of empty frames into NULL (used to be an error) (#1385)
build: new set of macros to build a minimal size decoder, by @felixhandte
build: fix compilation on MIPS32, reported by @clbr (#1441)
build: fix compilation with multiple -arch flags, by @ryandesign
build: highly upgraded meson build, by @lzutao
build: improved buck support, by @obelisk
build: fix cmake script : can create debug build, by @pitrou
build: Makefile : grep works on both colored consoles and systems without color support
build: fixed zstd-pgo, by @bmwiedemann
cli : support ZSTD_CLEVEL environment variable, by @yijinfb (#1423)
cli : --no-progress flag, preserving final summary (#1371), by @terrelln
cli : ensure destination file is not source file (#1422)
cli : clearer error messages, especially when input file not present
doc : clarified zstd_compression_format.md, by @ulikunitz
misc: fixed zstdgrep, returns 1 on failure, by @lzutao
misc: NEWS renamed as CHANGELOG, in accordance with fboss
v1.3.7 (Oct 20, 2018)
perf: slightly better decompression speed on clang (depending on hardware target)
fix : performance of dictionary compression for small input < 4 KB at levels 9 and 10
build: no longer build backtrace by default in release mode; restrict further automatic mode
build: control backtrace support through build macro BACKTRACE
misc: added man pages for zstdless and zstdgrep, by @samrussell
v1.3.6 (Oct 6, 2018)
perf: much faster dictionary builder, by @jenniferliu
perf: faster dictionary compression on small data when using multiple contexts, by @felixhandte
perf: faster dictionary decompression when using a very large number of dictionaries simultaneously
cli : fix : does no longer overwrite destination when source does not exist (#1082)
cli : new command --adapt, for automatic compression level adaptation
api : fix : block api can be streamed with > 4 GB, reported by @catid
api : reduced ZSTD_DDict size by 2 KB
api : minimum negative compression level is defined, and can be queried using ZSTD_minCLevel().
build: support Haiku target, by @korli
build: Read Legacy format is limited to v0.5+ by default. Can be changed at compile time with macro ZSTD_LEGACY_SUPPORT.
doc : zstd_compression_format.md updated to match wording in IETF RFC 8478
misc: tests/paramgrill, a parameter optimizer, by @GeorgeLu97
v1.3.5 (Jun 29, 2018)
perf: much faster dictionary compression, by @felixhandte
perf: small quality improvement for dictionary generation, by @terrelln
perf: slightly improved high compression levels (notably level 19)
mem : automatic memory release for long duration contexts
cli : fix : overlapLog can be manually set
cli : fix : decoding invalid lz4 frames
api : fix : performance degradation for dictionary compression when using advanced API, by @terrelln
api : change : clarify ZSTD_CCtx_reset() vs ZSTD_CCtx_resetParameters(), by @terrelln
build: select custom libzstd scope through control macros, by @GeorgeLu97
build: OpenBSD patch, by @bket
build: make and make all are compatible with -j
doc : clarify zstd_compression_format.md, updated for IETF RFC process
misc: pzstd compatible with reproducible compilation, by @lamby
v1.3.4 (Mar 27, 2018)
perf: faster speed (especially decoding speed) on recent cpus (haswell+)
perf: much better performance associating --long with multi-threading, by @terrelln
perf: better compression at levels 13-15
cli : asynchronous compression by default, for faster experience (use --single-thread for former behavior)
cli : smoother status report in multi-threading mode
cli : added command --fast=#, for faster compression modes
cli : fix crash when not overwriting existing files, by Pádraig Brady (@pixelb)
api : `nbThreads` becomes `nbWorkers` : 1 triggers asynchronous mode
api : compression levels can be negative, for even more speed
api : ZSTD_getFrameProgression() : get precise progress status of ZSTDMT anytime
api : ZSTDMT can accept new compression parameters during compression
api : implemented all advanced dictionary decompression prototypes
build: improved meson recipe, by Shawn Landden (@shawnl)
build: VS2017 scripts, by @HaydnTrigg
misc: all /contrib projects fixed
misc: added /contrib/docker script by @gyscos
v1.3.3 (Dec 21, 2017)
perf: faster zstd_opt strategy (levels 16-19)
fix : bug #944 : multithreading with shared dictionary and large data, reported by @gsliepen
cli : fix : content size written in header by default
cli : fix : improved LZ4 format support, by @felixhandte
cli : new : hidden command `-S`, to benchmark multiple files while generating one result per file
api : fix : support large skippable frames, by @terrelln
api : fix : streaming interface was adding a useless 3-bytes null block to small frames
api : change : when setting `pledgedSrcSize`, use `ZSTD_CONTENTSIZE_UNKNOWN` macro value to mean "unknown"
build: fix : compilation under rhel6 and centos6, reported by @pixelb
build: added `check` target
v1.3.2 (Oct 10, 2017)
new : long range mode, using --long command, by Stella Lau (@stellamplau)
new : ability to generate and decode magicless frames (#591)
changed : maximum nb of threads reduced to 200, to avoid address space exhaustion in 32-bits mode
fix : multi-threading compression works with custom allocators
fix : ZSTD_sizeof_CStream() was over-evaluating memory usage
fix : a rare compression bug when compression generates very large distances and bunch of other conditions (only possible at --ultra -22)
fix : 32-bits build can now decode large offsets (levels 21+)
cli : added LZ4 frame support by default, by Felix Handte (@felixhandte)
cli : improved --list output
cli : new : can split input file for dictionary training, using command -B#
cli : new : clean operation artefact on Ctrl-C interruption
cli : fix : do not change /dev/null permissions when using command -t with root access, reported by @mike155 (#851)
cli : fix : write file size in header in multiple-files mode
api : added macro ZSTD_COMPRESSBOUND() for static allocation
api : experimental : new advanced decompression API
api : fix : sizeof_CCtx() used to over-estimate
build: fix : no-multithread variant compiles without pool.c dependency, reported by Mitchell Blank Jr (@mitchblank) (#819)
build: better compatibility with reproducible builds, by Bernhard M. Wiedemann (@bmwiedemann) (#818)
example : added streaming_memory_usage
license : changed /examples license to BSD + GPLv2
license : fix a few header files to reflect new license (#825)
v1.3.1 (Aug 21, 2017)
New license : BSD + GPLv2
perf: substantially decreased memory usage in Multi-threading mode, thanks to reports by Tino Reichardt (@mcmilk)
perf: Multi-threading supports up to 256 threads. Cap at 256 when more are requested (#760)
cli : improved and fixed --list command, by @ib (#772)
cli : command -vV to list supported formats, by @ib (#771)
build : fixed binary variants, reported by @svenha (#788)
build : fix Visual compilation for non x86/x64 targets, reported by Greg Slazinski (@GregSlazinski) (#718)
API exp : breaking change : ZSTD_getframeHeader() provides more information
API exp : breaking change : pinned down values of error codes
doc : fixed huffman example, by Ulrich Kunitz (@ulikunitz)
new : contrib/adaptive-compression, I/O driven compression strength, by Paul Cruz (@paulcruz74)
new : contrib/long_distance_matching, statistics by Stella Lau (@stellamplau)
updated : contrib/linux-kernel, by Nick Terrell (@terrelln)
v1.3.0 (Jul 6, 2017)
cli : new : `--list` command, by Paul Cruz
cli : changed : xz/lzma support enabled by default
cli : changed : `-t *` continue processing list after a decompression error
API : added : ZSTD_versionString()
API : promoted to stable status : ZSTD_getFrameContentSize(), by Sean Purcell
API exp : new advanced API : ZSTD_compress_generic(), ZSTD_CCtx_setParameter()
API exp : new : API for static or external allocation : ZSTD_initStatic?Ctx()
API exp : added : ZSTD_decompressBegin_usingDDict(), requested by Guy Riddle (#700)
API exp : clarified memory estimation / measurement functions.
API exp : changed : strongest strategy renamed ZSTD_btultra, fastest strategy ZSTD_fast set to 1
tools : decodecorpus can generate random dictionary-compressed samples, by Paul Cruz
new : contrib/seekable_format, demo and API, by Sean Purcell
changed : contrib/linux-kernel, updated version and license, by Nick Terrell
v1.2.0 (May 5, 2017)
cli : changed : Multithreading enabled by default (use target zstd-nomt or HAVE_THREAD=0 to disable)
cli : new : command -T0 means "detect and use nb of cores", by Sean Purcell
cli : new : zstdmt symlink hardwired to `zstd -T0`
cli : new : command --threads=# (#671)
cli : changed : cover dictionary builder by default, for improved quality, by Nick Terrell
cli : new : commands --train-cover and --train-legacy, to select dictionary algorithm and parameters
cli : experimental targets `zstd4` and `xzstd4`, with support for lz4 format, by Sean Purcell
cli : fix : does not output compressed data on console
cli : fix : ignore symbolic links unless --force specified,
API : breaking change : ZSTD_createCDict_advanced(), only use compressionParameters as argument
API : added : prototypes ZSTD_*_usingCDict_advanced(), for direct control over frameParameters.
API : improved: ZSTDMT_compressCCtx() reduced memory usage
API : fix : ZSTDMT_compressCCtx() now provides srcSize in header (#634)
API : fix : src size stored in frame header is controlled at end of frame
API : fix : enforced consistent rules for pledgedSrcSize==0 (#641)
API : fix : error code "GENERIC" replaced by "dstSizeTooSmall" when appropriate
build: improved cmake script, by @Majlen
build: enabled Multi-threading support for *BSD, by Baptiste Daroussin
tools: updated Paramgrill. Command -O# provides best parameters for sample and speed target.
new : contrib/linux-kernel version, by Nick Terrell
v1.1.4 (Mar 18, 2017)
cli : new : can compress in *.gz format, using --format=gzip command, by Przemyslaw Skibinski
cli : new : advanced benchmark command --priority=rt
cli : fix : write on sparse-enabled file systems in 32-bits mode, by @ds77
cli : fix : --rm remains silent when input is stdin
cli : experimental : xzstd, with support for xz/lzma decoding, by Przemyslaw Skibinski
speed : improved decompression speed in streaming mode for single shot scenarios (+5%)
memory: DDict (decompression dictionary) memory usage down from 150 KB to 20 KB
arch: 32-bits variant able to generate and decode very long matches (>32 MB), by Sean Purcell
API : new : ZSTD_findFrameCompressedSize(), ZSTD_getFrameContentSize(), ZSTD_findDecompressedSize()
API : changed : dropped support of legacy versions <= v0.3 (can be changed by modifying ZSTD_LEGACY_SUPPORT value)
build : new: meson build system in contrib/meson, by Dima Krasner
build : improved cmake script, by @Majlen
build : added -Wformat-security flag, as recommended by Padraig Brady
doc : new : educational decoder, by Sean Purcell
v1.1.3 (Feb 7, 2017)
cli : zstd can decompress .gz files (can be disabled with `make zstd-nogz` or `make HAVE_ZLIB=0`)
cli : new : experimental target `make zstdmt`, with multi-threading support
cli : new : improved dictionary builder "cover" (experimental), by Nick Terrell, based on prior work by Giuseppe Ottaviano.
cli : new : advanced commands for detailed parameters, by Przemyslaw Skibinski
cli : fix zstdless on Mac OS-X, by Andrew Janke
cli : fix #232 "compress non-files"
dictBuilder : improved dictionary generation quality, thanks to Nick Terrell
API : new : lib/compress/ZSTDMT_compress.h multithreading API (experimental)
API : new : ZSTD_create?Dict_byReference(), requested by Bartosz Taudul
API : new : ZDICT_finalizeDictionary()
API : fix : ZSTD_initCStream_usingCDict() properly writes dictID into frame header, by Gregory Szorc (#511)
API : fix : all symbols properly exposed in libzstd, by Nick Terrell
build : support for Solaris target, by Przemyslaw Skibinski
doc : clarified specification, by Sean Purcell
v1.1.2 (Dec 15, 2016)
API : streaming : decompression : changed : automatic implicit reset when chain-decoding new frames without init
API : experimental : added : dictID retrieval functions, and ZSTD_initCStream_srcSize()
API : zbuff : changed : prototypes now generate deprecation warnings
lib : improved : faster decompression speed at ultra compression settings and 32-bits mode
lib : changed : only public ZSTD_ symbols are now exposed
lib : changed : reduced usage of stack memory
lib : fixed : several corner case bugs, by Nick Terrell
cli : new : gzstd, experimental version able to decode .gz files, by Przemyslaw Skibinski
cli : new : preserve file attributes
cli : new : added zstdless and zstdgrep tools
cli : fixed : status displays total amount decoded, even for file consisting of multiple frames (like pzstd)
cli : fixed : zstdcat
zlib_wrapper : added support for gz* functions, by Przemyslaw Skibinski
install : better compatibility with FreeBSD, by Dimitry Andric
source tree : changed : zbuff source files moved to lib/deprecated
v1.1.1 (Nov 2, 2016)
New : command -M#, --memory=, --memlimit=, --memlimit-decompress= to limit allowed memory consumption
New : doc/zstd_manual.html, by Przemyslaw Skibinski
Improved : slightly better compression ratio at --ultra levels (>= 20)
Improved : better memory usage when using streaming compression API, thanks to @Rogier-5 report
Added : API : ZSTD_initCStream_usingCDict(), ZSTD_initDStream_usingDDict() (experimental section)
Added : example/multiple_streaming_compression.c
Changed : zstd_errors.h is now installed within /include (and replaces errors_public.h)
Updated man page
Fixed : zstd-small, zstd-compress and zstd-decompress compilation targets
v1.1.0 (Sep 28, 2016)
New : contrib/pzstd, parallel version of zstd, by Nick Terrell
added : NetBSD install target (#338)
Improved : speed for batches of small files
Improved : speed of zlib wrapper, by Przemyslaw Skibinski
Changed : libzstd on Windows supports legacy formats, by Christophe Chevalier
Fixed : CLI -d output to stdout by default when input is stdin (#322)
Fixed : CLI correctly detects console on Mac OS-X
Fixed : CLI supports recursive mode `-r` on Mac OS-X
Fixed : Legacy decoders use unified error codes, reported by benrg (#341), fixed by Przemyslaw Skibinski
Fixed : compatibility with OpenBSD, reported by Juan Francisco Cantero Hurtado (#319)
Fixed : compatibility with Hurd, by Przemyslaw Skibinski (#365)
Fixed : zstd-pgo, reported by octoploid (#329)
v1.0.0 (Sep 1, 2016)
Change Licensing, all project is now BSD, Copyright Facebook
Small decompression speed improvement
API : Streaming API supports legacy format
API : ZDICT_getDictID(), ZSTD_sizeof_{CCtx, DCtx, CStream, DStream}(), ZSTD_setDStreamParameter()
CLI supports legacy formats v0.4+
Fixed : compression fails on certain huge files, reported by Jesse McGrew
Enhanced documentation, by Przemyslaw Skibinski
v0.8.1 (Aug 18, 2016)
New streaming API
Changed : --ultra now enables levels beyond 19
Changed : -i# now selects benchmark time in second
Fixed : ZSTD_compress* can now compress > 4 GB in a single pass, reported by Nick Terrell
Fixed : speed regression on specific patterns (#272)
Fixed : support for Z_SYNC_FLUSH, by Dmitry Krot (#291)
Fixed : ICC compilation, by Przemyslaw Skibinski
v0.8.0 (Aug 2, 2016)
Improved : better speed on clang and gcc -O2, thanks to Eric Biggers
New : Build on FreeBSD and DragonFly, thanks to JrMarino
Changed : modified API : ZSTD_compressEnd()
Fixed : legacy mode with ZSTD_HEAPMODE=0, by Christopher Bergqvist
Fixed : premature end of frame when zero-sized raw block, reported by Eric Biggers
Fixed : large dictionaries (> 384 KB), reported by Ilona Papava
Fixed : checksum correctly checked in single-pass mode
Fixed : combined --test amd --rm, reported by Andreas M. Nilsson
Modified : minor compression level adaptations
Updated : compression format specification to v0.2.0
changed : zstd.h moved to /lib directory
v0.7.5 (Aug 1, 2016)
Transition version, supporting decoding of v0.8.x
v0.7.4 (Jul 17, 2016)
Added : homebrew for Mac, by Daniel Cade
Added : more examples
Fixed : segfault when using small dictionaries, reported by Felix Handte
Modified : default compression level for CLI is now 3
Updated : specification, to v0.1.1
v0.7.3 (Jul 9, 2016)
New : compression format specification
New : `--` separator, stating that all following arguments are file names. Suggested by Chip Turner.
New : `ZSTD_getDecompressedSize()`
New : OpenBSD target, by Juan Francisco Cantero Hurtado
New : `examples` directory
fixed : dictBuilder using HC levels, reported by Bartosz Taudul
fixed : legacy support from ZSTD_decompress_usingDDict(), reported by Felix Handte
fixed : multi-blocks decoding with intermediate uncompressed blocks, reported by Greg Slazinski
modified : removed "mem.h" and "error_public.h" dependencies from "zstd.h" (experimental section)
modified : legacy functions no longer need magic number
v0.7.2 (Jul 4, 2016)
fixed : ZSTD_decompressBlock() using multiple consecutive blocks. Reported by Greg Slazinski.
fixed : potential segfault on very large files (many gigabytes). Reported by Chip Turner.
fixed : CLI displays system error message when destination file cannot be created (#231). Reported by Chip Turner.
v0.7.1 (Jun 23, 2016)
fixed : ZBUFF_compressEnd() called multiple times with too small `dst` buffer, reported by Christophe Chevalier
fixed : dictBuilder fails if first sample is too small, reported by Руслан Ковалёв
fixed : corruption issue, reported by cj
modified : checksum enabled by default in command line mode
v0.7.0 (Jun 17, 2016)
New : Support for directory compression, using `-r`, thanks to Przemyslaw Skibinski
New : Command `--rm`, to remove source file after successful de/compression
New : Visual build scripts, by Christophe Chevalier
New : Support for Sparse File-systems (do not use space for zero-filled sectors)
New : Frame checksum support
New : Support pass-through mode (when using `-df`)
API : more efficient Dictionary API : `ZSTD_compress_usingCDict()`, `ZSTD_decompress_usingDDict()`
API : create dictionary files from custom content, by Giuseppe Ottaviano
API : support for custom malloc/free functions
New : controllable Dictionary ID
New : Support for skippable frames
v0.6.1 (May 13, 2016)
New : zlib wrapper API, thanks to Przemyslaw Skibinski
New : Ability to compile compressor / decompressor separately
Changed : new lib directory structure
Fixed : Legacy codec v0.5 compatible with dictionary decompression
Fixed : Decoder corruption error (#173)
Fixed : null-string roundtrip (#176)
New : benchmark mode can select directory as input
Experimental : midipix support, VMS support
v0.6.0 (Apr 13, 2016)
Stronger high compression modes, thanks to Przemyslaw Skibinski
API : ZSTD_getFrameParams() provides size of decompressed content
New : highest compression modes require `--ultra` command to fully unleash their capacity
Fixed : zstd cli return error code > 0 and removes dst file artifact when decompression fails, thanks to Chip Turner
v0.5.1 (Feb 18, 2016)
New : Optimal parsing => Very high compression modes, thanks to Przemyslaw Skibinski
Changed : Dictionary builder integrated into libzstd and zstd cli
Changed (!) : zstd cli now uses "multiple input files" as default mode. See `zstd -h`.
Fix : high compression modes for big-endian platforms
New : zstd cli : `-t` | `--test` command
v0.5.0 (Feb 5, 2016)
New : dictionary builder utility
Changed : streaming & dictionary API
Improved : better compression of small data
v0.4.7 (Jan 22, 2016)
Improved : small compression speed improvement in HC mode
Changed : `zstd_decompress.c` has ZSTD_LEGACY_SUPPORT to 0 by default
fix : bt search bug
v0.4.6 (Jan 13, 2016)
fix : fast compression mode on Windows
New : cmake configuration file, thanks to Artyom Dymchenko
Improved : high compression mode on repetitive data
New : block-level API
New : ZSTD_duplicateCCtx()
v0.4.5 (Dec 18, 2015)
new : -m/--multiple : compress/decompress multiple files
v0.4.4 (Dec 14, 2015)
Fixed : high compression modes for Windows 32 bits
new : external dictionary API extended to buffered mode and accessible through command line
new : windows DLL project, thanks to Christophe Chevalier
v0.4.3 (Dec 7, 2015)
new : external dictionary API
new : zstd-frugal
v0.4.2 (Dec 2, 2015)
Generic minor improvements for small blocks
Fixed : big-endian compatibility, by Peter Harris (#85)
v0.4.1 (Dec 1, 2015)
Fixed : ZSTD_LEGACY_SUPPORT=0 build mode (reported by Luben)
removed `zstd.c`
v0.4.0 (Nov 29, 2015)
Command line utility compatible with high compression levels
Removed zstdhc => merged into zstd
Added : ZBUFF API (see zstd_buffered.h)
Rolling buffer support
v0.3.6 (Nov 10, 2015)
small blocks params
v0.3.5 (Nov 9, 2015)
minor generic compression improvements
v0.3.4 (Nov 6, 2015)
Faster fast cLevels
v0.3.3 (Nov 5, 2015)
Small compression ratio improvement
v0.3.2 (Nov 2, 2015)
Fixed Visual Studio
v0.3.1 (Nov 2, 2015)
Small compression ratio improvement
v0.3 (Oct 30, 2015)
HC mode : compression levels 2-26
v0.2.2 (Oct 28, 2015)
Fix : Visual Studio 2013 & 2015 release compilation, by Christophe Chevalier
v0.2.1 (Oct 24, 2015)
Fix : Read errors, advanced fuzzer tests, by Hanno Böck
v0.2.0 (Oct 22, 2015)
**Breaking format change**
Faster decompression speed
Can still decode v0.1 format
v0.1.3 (Oct 15, 2015)
fix uninitialization warning, reported by Evan Nemerson
v0.1.2 (Sep 11, 2015)
frame concatenation support
v0.1.1 (Aug 27, 2015)
fix compression bug
detects write-flush errors
v0.1.0 (Aug 25, 2015)
first release

View File

@@ -0,0 +1,5 @@
# Code of Conduct
Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
Please read the [full text](https://code.fb.com/codeofconduct/)
so that you can understand what actions will and will not be tolerated.

View File

@@ -0,0 +1,489 @@
# Contributing to Zstandard
We want to make contributing to this project as easy and transparent as
possible.
## Our Development Process
New versions are being developed in the "dev" branch,
or in their own feature branch.
When they are deemed ready for a release, they are merged into "release".
As a consequence, all contributions must stage first through "dev"
or their own feature branch.
## Pull Requests
We actively welcome your pull requests.
1. Fork the repo and create your branch from `dev`.
2. If you've added code that should be tested, add tests.
3. If you've changed APIs, update the documentation.
4. Ensure the test suite passes.
5. Make sure your code lints.
6. If you haven't already, complete the Contributor License Agreement ("CLA").
## Contributor License Agreement ("CLA")
In order to accept your pull request, we need you to submit a CLA. You only need
to do this once to work on any of Facebook's open source projects.
Complete your CLA here: <https://code.facebook.com/cla>
## Workflow
Zstd uses a branch-based workflow for making changes to the codebase. Typically, zstd
will use a new branch per sizable topic. For smaller changes, it is okay to lump multiple
related changes into a branch.
Our contribution process works in three main stages:
1. Local development
* Update:
* Checkout your fork of zstd if you have not already
```
git checkout https://github.com/<username>/zstd
cd zstd
```
* Update your local dev branch
```
git pull https://github.com/facebook/zstd dev
git push origin dev
```
* Topic and development:
* Make a new branch on your fork about the topic you're developing for
```
# branch names should be concise but sufficiently informative
git checkout -b <branch-name>
git push origin <branch-name>
```
* Make commits and push
```
# make some changes =
git add -u && git commit -m <message>
git push origin <branch-name>
```
* Note: run local tests to ensure that your changes didn't break existing functionality
* Quick check
```
make check
```
* Longer check
```
make test
```
2. Code Review and CI tests
* Ensure CI tests pass:
* Before sharing anything to the community, create a pull request in your own fork against the dev branch
and make sure that all GitHub Actions CI tests pass. See the Continuous Integration section below for more information.
* Ensure that static analysis passes on your development machine. See the Static Analysis section
below to see how to do this.
* Create a pull request:
* When you are ready to share you changes to the community, create a pull request from your branch
to facebook:dev. You can do this very easily by clicking 'Create Pull Request' on your fork's home
page.
* From there, select the branch where you made changes as your source branch and facebook:dev
as the destination.
* Examine the diff presented between the two branches to make sure there is nothing unexpected.
* Write a good pull request description:
* While there is no strict template that our contributors follow, we would like them to
sufficiently summarize and motivate the changes they are proposing. We recommend all pull requests,
at least indirectly, address the following points.
* Is this pull request important and why?
* Is it addressing an issue? If so, what issue? (provide links for convenience please)
* Is this a new feature? If so, why is it useful and/or necessary?
* Are there background references and documents that reviewers should be aware of to properly assess this change?
* Note: make sure to point out any design and architectural decisions that you made and the rationale behind them.
* Note: if you have been working with a specific user and would like them to review your work, make sure you mention them using (@<username>)
* Submit the pull request and iterate with feedback.
3. Merge and Release
* Getting approval:
* You will have to iterate on your changes with feedback from other collaborators to reach a point
where your pull request can be safely merged.
* To avoid too many comments on style and convention, make sure that you have a
look at our style section below before creating a pull request.
* Eventually, someone from the zstd team will approve your pull request and not long after merge it into
the dev branch.
* Housekeeping:
* Most PRs are linked with one or more Github issues. If this is the case for your PR, make sure
the corresponding issue is mentioned. If your change 'fixes' or completely addresses the
issue at hand, then please indicate this by requesting that an issue be closed by commenting.
* Just because your changes have been merged does not mean the topic or larger issue is complete. Remember
that the change must make it to an official zstd release for it to be meaningful. We recommend
that contributors track the activity on their pull request and corresponding issue(s) page(s) until
their change makes it to the next release of zstd. Users will often discover bugs in your code or
suggest ways to refine and improve your initial changes even after the pull request is merged.
## Static Analysis
Static analysis is a process for examining the correctness or validity of a program without actually
executing it. It usually helps us find many simple bugs. Zstd uses clang's `scan-build` tool for
static analysis. You can install it by following the instructions for your OS on https://clang-analyzer.llvm.org/scan-build.
Once installed, you can ensure that our static analysis tests pass on your local development machine
by running:
```
make staticAnalyze
```
In general, you can use `scan-build` to static analyze any build script. For example, to static analyze
just `contrib/largeNbDicts` and nothing else, you can run:
```
scan-build make -C contrib/largeNbDicts largeNbDicts
```
### Pitfalls of static analysis
`scan-build` is part of our regular CI suite. Other static analyzers are not.
It can be useful to look at additional static analyzers once in a while (and we do), but it's not a good idea to multiply the nb of analyzers run continuously at each commit and PR. The reasons are :
- Static analyzers are full of false positive. The signal to noise ratio is actually pretty low.
- A good CI policy is "zero-warning tolerance". That means that all issues must be solved, including false positives. This quickly becomes a tedious workload.
- Multiple static analyzers will feature multiple kind of false positives, sometimes applying to the same code but in different ways leading to :
+ tortuous code, trying to please multiple constraints, hurting readability and therefore maintenance. Sometimes, such complexity introduce other more subtle bugs, that are just out of scope of the analyzers.
+ sometimes, these constraints are mutually exclusive : if one try to solve one, the other static analyzer will complain, they can't be both happy at the same time.
- As if that was not enough, the list of false positives change with each version. It's hard enough to follow one static analyzer, but multiple ones with their own update agenda, this quickly becomes a massive velocity reducer.
This is different from running a static analyzer once in a while, looking at the output, and __cherry picking__ a few warnings that seem helpful, either because they detected a genuine risk of bug, or because it helps expressing the code in a way which is more readable or more difficult to misuse. These kinds of reports can be useful, and are accepted.
## Continuous Integration
CI tests run every time a pull request (PR) is created or updated. The exact tests
that get run will depend on the destination branch you specify. Some tests take
longer to run than others. Currently, our CI is set up to run a short
series of tests when creating a PR to the dev branch and a longer series of tests
when creating a PR to the release branch. You can look in the configuration files
of the respective CI platform for more information on what gets run when.
Most people will just want to create a PR with the destination set to their local dev
branch of zstd. You can then find the status of the tests on the PR's page. You can also
re-run tests and cancel running tests from the PR page or from the respective CI's dashboard.
Almost all of zstd's CI runs on GitHub Actions (configured at `.github/workflows`), which will automatically run on PRs to your
own fork. A small number of tests run on other services (e.g. Travis CI, Circle CI, Appveyor).
These require work to set up on your local fork, and (at least for Travis CI) cost money.
Therefore, if the PR on your local fork passes GitHub Actions, feel free to submit a PR
against the main repo.
### Third-party CI
A small number of tests cannot run on GitHub Actions, or have yet to be migrated.
For these, we use a variety of third-party services (listed below). It is not necessary to set
these up on your fork in order to contribute to zstd; however, we do link to instructions for those
who want earlier signal.
| Service | Purpose | Setup Links | Config Path |
|-----------|------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|
| Travis CI | Used for testing on non-x86 architectures such as PowerPC | https://docs.travis-ci.com/user/tutorial/#to-get-started-with-travis-ci-using-github <br> https://github.com/marketplace/travis-ci | `.travis.yml` |
| AppVeyor | Used for some Windows testing (e.g. cygwin, mingw) | https://www.appveyor.com/blog/2018/10/02/github-apps-integration/ <br> https://github.com/marketplace/appveyor | `appveyor.yml` |
| Cirrus CI | Used for testing on FreeBSD | https://github.com/marketplace/cirrus-ci/ | `.cirrus.yml` |
| Circle CI | Historically was used to provide faster signal,<br/> but we may be able to migrate these to Github Actions | https://circleci.com/docs/2.0/getting-started/#setting-up-circleci <br> https://youtu.be/Js3hMUsSZ2c <br> https://circleci.com/docs/2.0/enable-checks/ | `.circleci/config.yml` |
Note: the instructions linked above mostly cover how to set up a repository with CI from scratch.
The general idea should be the same for setting up CI on your fork of zstd, but you may have to
follow slightly different steps. In particular, please ignore any instructions related to setting up
config files (since zstd already has configs for each of these services).
## Performance
Performance is extremely important for zstd and we only merge pull requests whose performance
landscape and corresponding trade-offs have been adequately analyzed, reproduced, and presented.
This high bar for performance means that every PR which has the potential to
impact performance takes a very long time for us to properly review. That being said, we
always welcome contributions to improve performance (or worsen performance for the trade-off of
something else). Please keep the following in mind before submitting a performance related PR:
1. Zstd isn't as old as gzip but it has been around for time now and its evolution is
very well documented via past Github issues and pull requests. It may be the case that your
particular performance optimization has already been considered in the past. Please take some
time to search through old issues and pull requests using keywords specific to your
would-be PR. Of course, just because a topic has already been discussed (and perhaps rejected
on some grounds) in the past, doesn't mean it isn't worth bringing up again. But even in that case,
it will be helpful for you to have context from that topic's history before contributing.
2. The distinction between noise and actual performance gains can unfortunately be very subtle
especially when microbenchmarking extremely small wins or losses. The only remedy to getting
something subtle merged is extensive benchmarking. You will be doing us a great favor if you
take the time to run extensive, long-duration, and potentially cross-(os, platform, process, etc)
benchmarks on your end before submitting a PR. Of course, you will not be able to benchmark
your changes on every single processor and os out there (and neither will we) but do that best
you can:) We've added some things to think about when benchmarking below in the Benchmarking
Performance section which might be helpful for you.
3. Optimizing performance for a certain OS, processor vendor, compiler, or network system is a perfectly
legitimate thing to do as long as it does not harm the overall performance health of Zstd.
This is a hard balance to strike but please keep in mind other aspects of Zstd when
submitting changes that are clang-specific, windows-specific, etc.
## Benchmarking Performance
Performance microbenchmarking is a tricky subject but also essential for Zstd. We value empirical
testing over theoretical speculation. This guide it not perfect but for most scenarios, it
is a good place to start.
### Stability
Unfortunately, the most important aspect in being able to benchmark reliably is to have a stable
benchmarking machine. A virtual machine, a machine with shared resources, or your laptop
will typically not be stable enough to obtain reliable benchmark results. If you can get your
hands on a desktop, this is usually a better scenario.
Of course, benchmarking can be done on non-hyper-stable machines as well. You will just have to
do a little more work to ensure that you are in fact measuring the changes you've made and not
noise. Here are some things you can do to make your benchmarks more stable:
1. The most simple thing you can do to drastically improve the stability of your benchmark is
to run it multiple times and then aggregate the results of those runs. As a general rule of
thumb, the smaller the change you are trying to measure, the more samples of benchmark runs
you will have to aggregate over to get reliable results. Here are some additional things to keep in
mind when running multiple trials:
* How you aggregate your samples are important. You might be tempted to use the mean of your
results. While this is certainly going to be a more stable number than a raw single sample
benchmark number, you might have more luck by taking the median. The mean is not robust to
outliers whereas the median is. Better still, you could simply take the fastest speed your
benchmark achieved on each run since that is likely the fastest your process will be
capable of running your code. In our experience, this (aggregating by just taking the sample
with the fastest running time) has been the most stable approach.
* The more samples you have, the more stable your benchmarks should be. You can verify
your improved stability by looking at the size of your confidence intervals as you
increase your sample count. These should get smaller and smaller. Eventually hopefully
smaller than the performance win you are expecting.
* Most processors will take some time to get `hot` when running anything. The observations
you collect during that time period will very different from the true performance number. Having
a very large number of sample will help alleviate this problem slightly but you can also
address is directly by simply not including the first `n` iterations of your benchmark in
your aggregations. You can determine `n` by simply looking at the results from each iteration
and then hand picking a good threshold after which the variance in results seems to stabilize.
2. You cannot really get reliable benchmarks if your host machine is simultaneously running
another cpu/memory-intensive application in the background. If you are running benchmarks on your
personal laptop for instance, you should close all applications (including your code editor and
browser) before running your benchmarks. You might also have invisible background applications
running. You can see what these are by looking at either Activity Monitor on Mac or Task Manager
on Windows. You will get more stable benchmark results of you end those processes as well.
* If you have multiple cores, you can even run your benchmark on a reserved core to prevent
pollution from other OS and user processes. There are a number of ways to do this depending
on your OS:
* On linux boxes, you have use https://github.com/lpechacek/cpuset.
* On Windows, you can "Set Processor Affinity" using https://www.thewindowsclub.com/processor-affinity-windows
* On Mac, you can try to use their dedicated affinity API https://developer.apple.com/library/archive/releasenotes/Performance/RN-AffinityAPI/#//apple_ref/doc/uid/TP40006635-CH1-DontLinkElementID_2
3. To benchmark, you will likely end up writing a separate c/c++ program that will link libzstd.
Dynamically linking your library will introduce some added variation (not a large amount but
definitely some). Statically linking libzstd will be more stable. Static libraries should
be enabled by default when building zstd.
4. Use a profiler with a good high resolution timer. See the section below on profiling for
details on this.
5. Disable frequency scaling, turbo boost and address space randomization (this will vary by OS)
6. Try to avoid storage. On some systems you can use tmpfs. Putting the program, inputs and outputs on
tmpfs avoids touching a real storage system, which can have a pretty big variability.
Also check our LLVM's guide on benchmarking here: https://llvm.org/docs/Benchmarking.html
### Zstd benchmark
The fastest signal you can get regarding your performance changes is via the in-build zstd cli
bench option. You can run Zstd as you typically would for your scenario using some set of options
and then additionally also specify the `-b#` option. Doing this will run our benchmarking pipeline
for that options you have just provided. If you want to look at the internals of how this
benchmarking script works, you can check out programs/benchzstd.c
For example: say you have made a change that you believe improves the speed of zstd level 1. The
very first thing you should use to assess whether you actually achieved any sort of improvement
is `zstd -b`. You might try to do something like this. Note: you can use the `-i` option to
specify a running time for your benchmark in seconds (default is 3 seconds).
Usually, the longer the running time, the more stable your results will be.
```
$ git checkout <commit-before-your-change>
$ make && cp zstd zstd-old
$ git checkout <commit-after-your-change>
$ make && cp zstd zstd-new
$ zstd-old -i5 -b1 <your-test-data>
1<your-test-data> : 8990 -> 3992 (2.252), 302.6 MB/s , 626.4 MB/s
$ zstd-new -i5 -b1 <your-test-data>
1<your-test-data> : 8990 -> 3992 (2.252), 302.8 MB/s , 628.4 MB/s
```
Unless your performance win is large enough to be visible despite the intrinsic noise
on your computer, benchzstd alone will likely not be enough to validate the impact of your
changes. For example, the results of the example above indicate that effectively nothing
changed but there could be a small <3% improvement that the noise on the host machine
obscured. So unless you see a large performance win (10-15% consistently) using just
this method of evaluation will not be sufficient.
### Profiling
There are a number of great profilers out there. We're going to briefly mention how you can
profile your code using `instruments` on mac, `perf` on linux and `visual studio profiler`
on Windows.
Say you have an idea for a change that you think will provide some good performance gains
for level 1 compression on Zstd. Typically this means, you have identified a section of
code that you think can be made to run faster.
The first thing you will want to do is make sure that the piece of code is actually taking up
a notable amount of time to run. It is usually not worth optimizing something which accounts for less than
0.0001% of the total running time. Luckily, there are tools to help with this.
Profilers will let you see how much time your code spends inside a particular function.
If your target code snippet is only part of a function, it might be worth trying to
isolate that snippet by moving it to its own function (this is usually not necessary but
might be).
Most profilers (including the profilers discussed below) will generate a call graph of
functions for you. Your goal will be to find your function of interest in this call graph
and then inspect the time spent inside of it. You might also want to look at the annotated
assembly which most profilers will provide you with.
#### Instruments
We will once again consider the scenario where you think you've identified a piece of code
whose performance can be improved upon. Follow these steps to profile your code using
Instruments.
1. Open Instruments
2. Select `Time Profiler` from the list of standard templates
3. Close all other applications except for your instruments window and your terminal
4. Run your benchmarking script from your terminal window
* You will want a benchmark that runs for at least a few seconds (5 seconds will
usually be long enough). This way the profiler will have something to work with
and you will have ample time to attach your profiler to this process:)
* I will just use benchzstd as my benchmarmking script for this example:
```
$ zstd -b1 -i5 <my-data> # this will run for 5 seconds
```
5. Once you run your benchmarking script, switch back over to instruments and attach your
process to the time profiler. You can do this by:
* Clicking on the `All Processes` drop down in the top left of the toolbar.
* Selecting your process from the dropdown. In my case, it is just going to be labeled
`zstd`
* Hitting the bright red record circle button on the top left of the toolbar
6. You profiler will now start collecting metrics from your benchmarking script. Once
you think you have collected enough samples (usually this is the case after 3 seconds of
recording), stop your profiler.
7. Make sure that in toolbar of the bottom window, `profile` is selected.
8. You should be able to see your call graph.
* If you don't see the call graph or an incomplete call graph, make sure you have compiled
zstd and your benchmarking script using debug flags. On mac and linux, this just means
you will have to supply the `-g` flag alone with your build script. You might also
have to provide the `-fno-omit-frame-pointer` flag
9. Dig down the graph to find your function call and then inspect it by double clicking
the list item. You will be able to see the annotated source code and the assembly side by
side.
#### Perf
This wiki has a pretty detailed tutorial on getting started working with perf so we'll
leave you to check that out of you're getting started:
https://perf.wiki.kernel.org/index.php/Tutorial
Some general notes on perf:
* Use `perf stat -r # <bench-program>` to quickly get some relevant timing and
counter statistics. Perf uses a high resolution timer and this is likely one
of the first things your team will run when assessing your PR.
* Perf has a long list of hardware counters that can be viewed with `perf --list`.
When measuring optimizations, something worth trying is to make sure the hardware
counters you expect to be impacted by your change are in fact being so. For example,
if you expect the L1 cache misses to decrease with your change, you can look at the
counter `L1-dcache-load-misses`
* Perf hardware counters will not work on a virtual machine.
#### Visual Studio
TODO
## Issues
We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.
Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.
## Coding Style
It's a pretty long topic, which is difficult to summarize in a single paragraph.
As a rule of thumbs, try to imitate the coding style of
similar lines of codes around your contribution.
The following is a non-exhaustive list of rules employed in zstd code base:
### C90
This code base is following strict C90 standard,
with 2 extensions : 64-bit `long long` types, and variadic macros.
This rule is applied strictly to code within `lib/` and `programs/`.
Sub-project in `contrib/` are allowed to use other conventions.
### C++ direct compatibility : symbol mangling
All public symbol declarations must be wrapped in `extern “C” { … }`,
so that this project can be compiled as C++98 code,
and linked into C++ applications.
### Minimal Frugal
This design requirement is fundamental to preserve the portability of the code base.
#### Dependencies
- Reduce dependencies to the minimum possible level.
Any dependency should be considered “bad” by default,
and only tolerated because it provides a service in a better way than can be achieved locally.
The only external dependencies this repository tolerates are
standard C libraries, and in rare cases, system level headers.
- Within `lib/`, this policy is even more drastic.
The only external dependencies allowed are `<assert.h>`, `<stdlib.h>`, `<string.h>`,
and even then, not directly.
In particular, no function shall ever allocate on heap directly,
and must use instead `ZSTD_malloc()` and equivalent.
Other accepted non-symbol headers are `<stddef.h>` and `<limits.h>`.
- Within the project, there is a strict hierarchy of dependencies that must be respected.
`programs/` is allowed to depend on `lib/`, but only its public API.
Within `lib/`, `lib/common` doesn't depend on any other directory.
`lib/compress` and `lib/decompress` shall not depend on each other.
`lib/dictBuilder` can depend on `lib/common` and `lib/compress`, but not `lib/decompress`.
#### Resources
- Functions in `lib/` must use very little stack space,
several dozens of bytes max.
Everything larger must use the heap allocator,
or require a scratch buffer to be emplaced manually.
### Naming
* All public symbols are prefixed with `ZSTD_`
+ private symbols, with a scope limited to their own unit, are free of this restriction.
However, since `libzstd` source code can be amalgamated,
each symbol name must attempt to be (and remain) unique.
Avoid too generic names that could become ground for future collisions.
This generally implies usage of some form of prefix.
* For symbols (functions and variables), naming convention is `PREFIX_camelCase`.
+ In some advanced cases, one can also find :
- `PREFIX_prefix2_camelCase`
- `PREFIX_camelCase_extendedQualifier`
* Multi-words names generally consist of an action followed by object:
- for example : `ZSTD_createCCtx()`
* Prefer positive actions
- `goBackward` rather than `notGoForward`
* Type names (`struct`, etc.) follow similar convention,
except that they are allowed and even invited to start by an Uppercase letter.
Example : `ZSTD_CCtx`, `ZSTD_CDict`
* Macro names are all Capital letters.
The same composition rules (`PREFIX_NAME_QUALIFIER`) apply.
* File names are all lowercase letters.
The convention is `snake_case`.
File names **must** be unique across the entire code base,
even when they stand in clearly separated directories.
### Qualifiers
* This code base is `const` friendly, if not `const` fanatical.
Any variable that can be `const` (aka. read-only) **must** be `const`.
Any pointer which content will not be modified must be `const`.
This property is then controlled at compiler level.
`const` variables are an important signal to readers that this variable isn't modified.
Conversely, non-const variables are a signal to readers to watch out for modifications later on in the function.
* If a function must be inlined, mention it explicitly,
using project's own portable macros, such as `FORCE_INLINE_ATTR`,
defined in `lib/common/compiler.h`.
### Debugging
* **Assertions** are welcome, and should be used very liberally,
to control any condition the code expects for its correct execution.
These assertion checks will be run in debug builds, and disabled in production.
* For traces, this project provides its own debug macros,
in particular `DEBUGLOG(level, ...)`, defined in `lib/common/debug.h`.
### Code documentation
* Avoid code documentation that merely repeats what the code is already stating.
Whenever applicable, prefer employing the code as the primary way to convey explanations.
Example 1 : `int nbTokens = n;` instead of `int i = n; /* i is a nb of tokens *./`.
Example 2 : `assert(size > 0);` instead of `/* here, size should be positive */`.
* At declaration level, the documentation explains how to use the function or variable
and when applicable why it's needed, of the scenarios where it can be useful.
* At implementation level, the documentation explains the general outline of the algorithm employed,
and when applicable why this specific choice was preferred.
### General layout
* 4 spaces for indentation rather than tabs
* Code documentation shall directly precede function declaration or implementation
* Function implementations and its code documentation should be preceded and followed by an empty line
## License
By contributing to Zstandard, you agree that your contributions will be licensed
under both the [LICENSE](LICENSE) file and the [COPYING](COPYING) file in the root directory of this source tree.

View File

@@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

View File

@@ -0,0 +1,30 @@
BSD License
For Zstandard software
Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name Facebook, nor Meta, nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,36 @@
// swift-tools-version:5.0
// The swift-tools-version declares the minimum version of Swift required to build this package.
import PackageDescription
let package = Package(
name: "zstd",
platforms: [
.macOS(.v10_10), .iOS(.v9), .tvOS(.v9)
],
products: [
// Products define the executables and libraries a package produces, and make them visible to other packages.
.library(
name: "libzstd",
targets: [ "libzstd" ])
],
dependencies: [
// Dependencies declare other packages that this package depends on.
// .package(url: /* package url */, from: "1.0.0"),
],
targets: [
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
// Targets can depend on other targets in this package, and on products in packages this package depends on.
.target(
name: "libzstd",
path: "lib",
sources: [ "common", "compress", "decompress", "dictBuilder" ],
publicHeadersPath: ".",
cSettings: [
.headerSearchPath(".")
])
],
swiftLanguageVersions: [.v5],
cLanguageStandard: .gnu11,
cxxLanguageStandard: .gnucxx14
)

View File

@@ -0,0 +1,237 @@
<p align="center"><img src="https://raw.githubusercontent.com/facebook/zstd/dev/doc/images/zstd_logo86.png" alt="Zstandard"></p>
__Zstandard__, or `zstd` as short version, is a fast lossless compression algorithm,
targeting real-time compression scenarios at zlib-level and better compression ratios.
It's backed by a very fast entropy stage, provided by [Huff0 and FSE library](https://github.com/Cyan4973/FiniteStateEntropy).
Zstandard's format is stable and documented in [RFC8878](https://datatracker.ietf.org/doc/html/rfc8878). Multiple independent implementations are already available.
This repository represents the reference implementation, provided as an open-source dual [BSD](LICENSE) OR [GPLv2](COPYING) licensed **C** library,
and a command line utility producing and decoding `.zst`, `.gz`, `.xz` and `.lz4` files.
Should your project require another programming language,
a list of known ports and bindings is provided on [Zstandard homepage](https://facebook.github.io/zstd/#other-languages).
**Development branch status:**
[![Build Status][travisDevBadge]][travisLink]
[![Build status][CircleDevBadge]][CircleLink]
[![Build status][CirrusDevBadge]][CirrusLink]
[![Fuzzing Status][OSSFuzzBadge]][OSSFuzzLink]
[travisDevBadge]: https://api.travis-ci.com/facebook/zstd.svg?branch=dev "Continuous Integration test suite"
[travisLink]: https://travis-ci.com/facebook/zstd
[CircleDevBadge]: https://circleci.com/gh/facebook/zstd/tree/dev.svg?style=shield "Short test suite"
[CircleLink]: https://circleci.com/gh/facebook/zstd
[CirrusDevBadge]: https://api.cirrus-ci.com/github/facebook/zstd.svg?branch=dev
[CirrusLink]: https://cirrus-ci.com/github/facebook/zstd
[OSSFuzzBadge]: https://oss-fuzz-build-logs.storage.googleapis.com/badges/zstd.svg
[OSSFuzzLink]: https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:zstd
## Benchmarks
For reference, several fast compression algorithms were tested and compared
on a desktop featuring a Core i7-9700K CPU @ 4.9GHz
and running Ubuntu 20.04 (`Linux ubu20 5.15.0-101-generic`),
using [lzbench], an open-source in-memory benchmark by @inikep
compiled with [gcc] 9.4.0,
on the [Silesia compression corpus].
[lzbench]: https://github.com/inikep/lzbench
[Silesia compression corpus]: https://sun.aei.polsl.pl//~sdeor/index.php?page=silesia
[gcc]: https://gcc.gnu.org/
| Compressor name | Ratio | Compression| Decompress.|
| --------------- | ------| -----------| ---------- |
| **zstd 1.5.6 -1** | 2.887 | 510 MB/s | 1580 MB/s |
| [zlib] 1.2.11 -1 | 2.743 | 95 MB/s | 400 MB/s |
| brotli 1.0.9 -0 | 2.702 | 395 MB/s | 430 MB/s |
| **zstd 1.5.6 --fast=1** | 2.437 | 545 MB/s | 1890 MB/s |
| **zstd 1.5.6 --fast=3** | 2.239 | 650 MB/s | 2000 MB/s |
| quicklz 1.5.0 -1 | 2.238 | 525 MB/s | 750 MB/s |
| lzo1x 2.10 -1 | 2.106 | 650 MB/s | 825 MB/s |
| [lz4] 1.9.4 | 2.101 | 700 MB/s | 4000 MB/s |
| lzf 3.6 -1 | 2.077 | 420 MB/s | 830 MB/s |
| snappy 1.1.9 | 2.073 | 530 MB/s | 1660 MB/s |
[zlib]: https://www.zlib.net/
[lz4]: https://lz4.github.io/lz4/
The negative compression levels, specified with `--fast=#`,
offer faster compression and decompression speed
at the cost of compression ratio.
Zstd can also offer stronger compression ratios at the cost of compression speed.
Speed vs Compression trade-off is configurable by small increments.
Decompression speed is preserved and remains roughly the same at all settings,
a property shared by most LZ compression algorithms, such as [zlib] or lzma.
The following tests were run
on a server running Linux Debian (`Linux version 4.14.0-3-amd64`)
with a Core i7-6700K CPU @ 4.0GHz,
using [lzbench], an open-source in-memory benchmark by @inikep
compiled with [gcc] 7.3.0,
on the [Silesia compression corpus].
Compression Speed vs Ratio | Decompression Speed
---------------------------|--------------------
![Compression Speed vs Ratio](doc/images/CSpeed2.png "Compression Speed vs Ratio") | ![Decompression Speed](doc/images/DSpeed3.png "Decompression Speed")
A few other algorithms can produce higher compression ratios at slower speeds, falling outside of the graph.
For a larger picture including slow modes, [click on this link](doc/images/DCspeed5.png).
## The case for Small Data compression
Previous charts provide results applicable to typical file and stream scenarios (several MB). Small data comes with different perspectives.
The smaller the amount of data to compress, the more difficult it is to compress. This problem is common to all compression algorithms, and reason is, compression algorithms learn from past data how to compress future data. But at the beginning of a new data set, there is no "past" to build upon.
To solve this situation, Zstd offers a __training mode__, which can be used to tune the algorithm for a selected type of data.
Training Zstandard is achieved by providing it with a few samples (one file per sample). The result of this training is stored in a file called "dictionary", which must be loaded before compression and decompression.
Using this dictionary, the compression ratio achievable on small data improves dramatically.
The following example uses the `github-users` [sample set](https://github.com/facebook/zstd/releases/tag/v1.1.3), created from [github public API](https://developer.github.com/v3/users/#get-all-users).
It consists of roughly 10K records weighing about 1KB each.
Compression Ratio | Compression Speed | Decompression Speed
------------------|-------------------|--------------------
![Compression Ratio](doc/images/dict-cr.png "Compression Ratio") | ![Compression Speed](doc/images/dict-cs.png "Compression Speed") | ![Decompression Speed](doc/images/dict-ds.png "Decompression Speed")
These compression gains are achieved while simultaneously providing _faster_ compression and decompression speeds.
Training works if there is some correlation in a family of small data samples. The more data-specific a dictionary is, the more efficient it is (there is no _universal dictionary_).
Hence, deploying one dictionary per type of data will provide the greatest benefits.
Dictionary gains are mostly effective in the first few KB. Then, the compression algorithm will gradually use previously decoded content to better compress the rest of the file.
### Dictionary compression How To:
1. Create the dictionary
`zstd --train FullPathToTrainingSet/* -o dictionaryName`
2. Compress with dictionary
`zstd -D dictionaryName FILE`
3. Decompress with dictionary
`zstd -D dictionaryName --decompress FILE.zst`
## Build instructions
`make` is the officially maintained build system of this project.
All other build systems are "compatible" and 3rd-party maintained,
they may feature small differences in advanced options.
When your system allows it, prefer using `make` to build `zstd` and `libzstd`.
### Makefile
If your system is compatible with standard `make` (or `gmake`),
invoking `make` in root directory will generate `zstd` cli in root directory.
It will also create `libzstd` into `lib/`.
Other available options include:
- `make install` : create and install zstd cli, library and man pages
- `make check` : create and run `zstd`, test its behavior on local platform
The `Makefile` follows the [GNU Standard Makefile conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html),
allowing staged install, standard flags, directory variables and command variables.
For advanced use cases, specialized compilation flags which control binary generation
are documented in [`lib/README.md`](lib/README.md#modular-build) for the `libzstd` library
and in [`programs/README.md`](programs/README.md#compilation-variables) for the `zstd` CLI.
### cmake
A `cmake` project generator is provided within `build/cmake`.
It can generate Makefiles or other build scripts
to create `zstd` binary, and `libzstd` dynamic and static libraries.
By default, `CMAKE_BUILD_TYPE` is set to `Release`.
#### Support for Fat (Universal2) Output
`zstd` can be built and installed with support for both Apple Silicon (M1/M2) as well as Intel by using CMake's Universal2 support.
To perform a Fat/Universal2 build and install use the following commands:
```bash
cmake -B build-cmake-debug -S build/cmake -G Ninja -DCMAKE_OSX_ARCHITECTURES="x86_64;x86_64h;arm64"
cd build-cmake-debug
ninja
sudo ninja install
```
### Meson
A Meson project is provided within [`build/meson`](build/meson). Follow
build instructions in that directory.
You can also take a look at [`.travis.yml`](.travis.yml) file for an
example about how Meson is used to build this project.
Note that default build type is **release**.
### VCPKG
You can build and install zstd [vcpkg](https://github.com/Microsoft/vcpkg/) dependency manager:
git clone https://github.com/Microsoft/vcpkg.git
cd vcpkg
./bootstrap-vcpkg.sh
./vcpkg integrate install
./vcpkg install zstd
The zstd port in vcpkg is kept up to date by Microsoft team members and community contributors.
If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
### Conan
You can install pre-built binaries for zstd or build it from source using [Conan](https://conan.io/). Use the following command:
```bash
conan install --requires="zstd/[*]" --build=missing
```
The zstd Conan recipe is kept up to date by Conan maintainers and community contributors.
If the version is out of date, please [create an issue or pull request](https://github.com/conan-io/conan-center-index) on the ConanCenterIndex repository.
### Visual Studio (Windows)
Going into `build` directory, you will find additional possibilities:
- Projects for Visual Studio 2005, 2008 and 2010.
+ VS2010 project is compatible with VS2012, VS2013, VS2015 and VS2017.
- Automated build scripts for Visual compiler by [@KrzysFR](https://github.com/KrzysFR), in `build/VS_scripts`,
which will build `zstd` cli and `libzstd` library without any need to open Visual Studio solution.
### Buck
You can build the zstd binary via buck by executing: `buck build programs:zstd` from the root of the repo.
The output binary will be in `buck-out/gen/programs/`.
### Bazel
You easily can integrate zstd into your Bazel project by using the module hosted on the [Bazel Central Repository](https://registry.bazel.build/modules/zstd).
## Testing
You can run quick local smoke tests by running `make check`.
If you can't use `make`, execute the `playTest.sh` script from the `src/tests` directory.
Two env variables `$ZSTD_BIN` and `$DATAGEN_BIN` are needed for the test script to locate the `zstd` and `datagen` binary.
For information on CI testing, please refer to `TESTING.md`.
## Status
Zstandard is currently deployed within Facebook and many other large cloud infrastructures.
It is run continuously to compress large amounts of data in multiple formats and use cases.
Zstandard is considered safe for production environments.
## License
Zstandard is dual-licensed under [BSD](LICENSE) OR [GPLv2](COPYING).
## Contributing
The `dev` branch is the one where all contributions are merged before reaching `release`.
If you plan to propose a patch, please commit into the `dev` branch, or its own feature branch.
Direct commit to `release` are not permitted.
For more information, please read [CONTRIBUTING](CONTRIBUTING.md).

View File

@@ -0,0 +1,15 @@
# Reporting and Fixing Security Issues
Please do not open GitHub issues or pull requests - this makes the problem immediately visible to everyone, including malicious actors. Security issues in this open source project can be safely reported via the Meta Bug Bounty program:
https://www.facebook.com/whitehat
Meta's security team will triage your report and determine whether or not is it eligible for a bounty under our program.
# Receiving Vulnerability Notifications
In the case that a significant security vulnerability is reported to us or discovered by us---without being publicly known---we will, at our discretion, notify high-profile, high-exposure users of Zstandard ahead of our public disclosure of the issue and associated fix.
If you believe your project would benefit from inclusion in this list, please reach out to one of the maintainers.
<!-- Note to maintainers: this list is kept [here](https://fburl.com/wiki/cgc1l62x). -->

View File

@@ -0,0 +1,43 @@
Testing
=======
Zstandard CI testing is split up into three sections:
short, medium, and long tests.
Short Tests
-----------
Short tests run on CircleCI for new commits on every branch and pull request.
They consist of the following tests:
- Compilation on all supported targets (x86, x86_64, ARM, AArch64, PowerPC, and PowerPC64)
- Compilation on various versions of gcc, clang, and g++
- `tests/playTests.sh` on x86_64, without the tests on long data (CLI tests)
- Small tests (`tests/legacy.c`, `tests/longmatch.c`) on x64_64
Medium Tests
------------
Medium tests run on every commit and pull request to `dev` branch, on TravisCI.
They consist of the following tests:
- The following tests run with UBsan and Asan on x86_64 and x86, as well as with
Msan on x86_64
- `tests/playTests.sh --test-large-data`
- Fuzzer tests: `tests/fuzzer.c`, `tests/zstreamtest.c`, and `tests/decodecorpus.c`
- `tests/zstreamtest.c` under Tsan (streaming mode, including multithreaded mode)
- Valgrind Test (`make -C tests test-valgrind`) (testing CLI and fuzzer under `valgrind`)
- Fuzzer tests (see above) on ARM, AArch64, PowerPC, and PowerPC64
Long Tests
----------
Long tests run on all commits to `release` branch,
and once a day on the current version of `dev` branch,
on TravisCI.
They consist of the following tests:
- Entire test suite (including fuzzers and some other specialized tests) on:
- x86_64 and x86 with UBsan and Asan
- x86_64 with Msan
- ARM, AArch64, PowerPC, and PowerPC64
- Streaming mode fuzzer with Tsan (for the `zstdmt` testing)
- ZlibWrapper tests, including under valgrind
- Versions test (ensuring `zstd` can decode files from all previous versions)
- `pzstd` with asan and tsan, as well as in 32-bits mode
- Testing `zstd` with legacy mode off
- Entire test suite and make install on macOS

View File

@@ -0,0 +1,3 @@
## Project Support Notice
The VS2005 Project directory has been moved to the contrib directory in order to indicate that it will no longer be supported.

View File

@@ -0,0 +1,440 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="fullbench"
ProjectGUID="{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}"
RootNamespace="fullbench"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\programs\datagen.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\entropy_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\fse_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.c"
>
</File>
<File
RelativePath="..\..\..\tests\fullbench.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\huf_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\huf_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath="..\..\..\lib\common\bitstream.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd_errors.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\mem.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_internal.h"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_opt.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_static.h"
>
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@@ -0,0 +1,488 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="fuzzer"
ProjectGUID="{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}"
RootNamespace="fuzzer"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\programs\datagen.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\entropy_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\pool.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\threading.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstdmt_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\fse_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_decompress.c"
>
</File>
<File
RelativePath="..\..\..\tests\fuzzer.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\huf_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\huf_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\zdict.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath="..\..\..\lib\common\pool.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\threading.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\bitstream.h"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd_errors.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\mem.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.h"
>
</File>
<File
RelativePath="..\..\..\lib\zdict.h"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\zdict_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_internal.h"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstdmt_compress.h"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_opt.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_static.h"
>
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@@ -0,0 +1,55 @@
Microsoft Visual Studio Solution File, Format Version 9.00
# Visual C++ Express 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zstd", "zstd\zstd.vcproj", "{1A2AB08E-5CE7-4C5B-BE55-458157C14051}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fuzzer", "fuzzer\fuzzer.vcproj", "{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fullbench", "fullbench\fullbench.vcproj", "{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zstdlib", "zstdlib\zstdlib.vcproj", "{99DE2A79-7298-4004-A0ED-030D7A3796CA}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Debug|x64 = Debug|x64
Release|Win32 = Release|Win32
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Debug|Win32.ActiveCfg = Debug|Win32
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Debug|Win32.Build.0 = Debug|Win32
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Debug|x64.ActiveCfg = Debug|x64
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Debug|x64.Build.0 = Debug|x64
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Release|Win32.ActiveCfg = Release|Win32
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Release|Win32.Build.0 = Release|Win32
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Release|x64.ActiveCfg = Release|x64
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Release|x64.Build.0 = Release|x64
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Debug|Win32.ActiveCfg = Debug|Win32
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Debug|Win32.Build.0 = Debug|Win32
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Debug|x64.ActiveCfg = Debug|x64
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Debug|x64.Build.0 = Debug|x64
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Release|Win32.ActiveCfg = Release|Win32
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Release|Win32.Build.0 = Release|Win32
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Release|x64.ActiveCfg = Release|x64
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Release|x64.Build.0 = Release|x64
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Debug|Win32.ActiveCfg = Debug|Win32
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Debug|Win32.Build.0 = Debug|Win32
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Debug|x64.ActiveCfg = Debug|x64
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Debug|x64.Build.0 = Debug|x64
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Release|Win32.ActiveCfg = Release|Win32
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Release|Win32.Build.0 = Release|Win32
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Release|x64.ActiveCfg = Release|x64
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Release|x64.Build.0 = Release|x64
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Debug|Win32.ActiveCfg = Debug|Win32
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Debug|Win32.Build.0 = Debug|Win32
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Debug|x64.ActiveCfg = Debug|x64
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Debug|x64.Build.0 = Debug|x64
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Release|Win32.ActiveCfg = Release|Win32
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Release|Win32.Build.0 = Release|Win32
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Release|x64.ActiveCfg = Release|x64
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@@ -0,0 +1,552 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="zstd"
ProjectGUID="{1A2AB08E-5CE7-4C5B-BE55-458157C14051}"
RootNamespace="zstd"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
PreprocessorDefinitions="ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="setargv.obj"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
PreprocessorDefinitions="ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="setargv.obj"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
PreprocessorDefinitions="ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="setargv.obj"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
PreprocessorDefinitions="ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="setargv.obj"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\programs\bench.c"
>
</File>
<File
RelativePath="..\..\..\programs\datagen.c"
>
</File>
<File
RelativePath="..\..\..\programs\dibio.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\entropy_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.c"
>
</File>
<File
RelativePath="..\..\..\programs\fileio.c"
>
</File>
<File
RelativePath="..\..\..\programs\fileio_asyncio.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\fse_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\huf_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\huf_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\zdict.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstdmt_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v01.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v02.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v03.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v04.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v05.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v06.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v07.c"
>
</File>
<File
RelativePath="..\..\..\programs\zstdcli.c"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath="..\..\..\lib\common\bitstream.h"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd_errors.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\mem.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.h"
>
</File>
<File
RelativePath="..\..\..\lib\zdict.h"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\zdict_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_internal.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_legacy.h"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_opt.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v01.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v02.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v03.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v04.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v05.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v06.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v07.h"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstdmt_compress.h"
>
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@@ -0,0 +1,546 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="zstdlib"
ProjectGUID="{99DE2A79-7298-4004-A0ED-030D7A3796CA}"
RootNamespace="zstdlib"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\pool.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\threading.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\entropy_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\fse_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\huf_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\huf_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\zdict.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstdmt_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v01.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v02.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v03.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v04.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v05.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v06.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v07.c"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath="..\..\..\lib\common\bitstream.h"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\pool.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\threading.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd_errors.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\mem.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.h"
>
</File>
<File
RelativePath="..\..\..\lib\zdict.h"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\zdict_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_internal.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_legacy.h"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_opt.h"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstdmt_compress.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v01.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v02.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v03.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v04.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v05.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v06.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v07.h"
>
</File>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@@ -0,0 +1,2 @@
#!/bin/sh
sed -i '' $'s/\t/ /g' ../lib/**/*.{h,c} ../programs/*.{h,c} ../tests/*.c ./**/*.{h,cpp} ../examples/*.c ../zlibWrapper/*.{h,c}

View File

@@ -0,0 +1 @@
check_flipped_bits

View File

@@ -0,0 +1,400 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"
#include "zstd_errors.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
typedef struct {
char *input;
size_t input_size;
char *perturbed; /* same size as input */
char *output;
size_t output_size;
const char *dict_file_name;
const char *dict_file_dir_name;
int32_t dict_id;
char *dict;
size_t dict_size;
ZSTD_DDict* ddict;
ZSTD_DCtx* dctx;
int success_count;
int error_counts[ZSTD_error_maxCode];
} stuff_t;
static void free_stuff(stuff_t* stuff) {
free(stuff->input);
free(stuff->output);
ZSTD_freeDDict(stuff->ddict);
free(stuff->dict);
ZSTD_freeDCtx(stuff->dctx);
}
static void usage(void) {
fprintf(stderr, "check_flipped_bits input_filename [-d dict] [-D dict_dir]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Arguments:\n");
fprintf(stderr, " -d file: path to a dictionary file to use.\n");
fprintf(stderr, " -D dir : path to a directory, with files containing dictionaries, of the\n"
" form DICTID.zstd-dict, e.g., 12345.zstd-dict.\n");
exit(1);
}
static void print_summary(stuff_t* stuff) {
int error_code;
fprintf(stderr, "%9d successful decompressions\n", stuff->success_count);
for (error_code = 0; error_code < ZSTD_error_maxCode; error_code++) {
int count = stuff->error_counts[error_code];
if (count) {
fprintf(
stderr, "%9d failed decompressions with message: %s\n",
count, ZSTD_getErrorString(error_code));
}
}
}
static char* readFile(const char* filename, size_t* size) {
struct stat statbuf;
int ret;
FILE* f;
char *buf;
size_t bytes_read;
ret = stat(filename, &statbuf);
if (ret != 0) {
fprintf(stderr, "stat failed: %m\n");
return NULL;
}
if ((statbuf.st_mode & S_IFREG) != S_IFREG) {
fprintf(stderr, "Input must be regular file\n");
return NULL;
}
*size = statbuf.st_size;
f = fopen(filename, "r");
if (f == NULL) {
fprintf(stderr, "fopen failed: %m\n");
return NULL;
}
buf = malloc(*size);
if (buf == NULL) {
fprintf(stderr, "malloc failed\n");
fclose(f);
return NULL;
}
bytes_read = fread(buf, 1, *size, f);
if (bytes_read != *size) {
fprintf(stderr, "failed to read whole file\n");
fclose(f);
free(buf);
return NULL;
}
ret = fclose(f);
if (ret != 0) {
fprintf(stderr, "fclose failed: %m\n");
free(buf);
return NULL;
}
return buf;
}
static ZSTD_DDict* readDict(const char* filename, char **buf, size_t* size, int32_t* dict_id) {
ZSTD_DDict* ddict;
*buf = readFile(filename, size);
if (*buf == NULL) {
fprintf(stderr, "Opening dictionary file '%s' failed\n", filename);
return NULL;
}
ddict = ZSTD_createDDict_advanced(*buf, *size, ZSTD_dlm_byRef, ZSTD_dct_auto, ZSTD_defaultCMem);
if (ddict == NULL) {
fprintf(stderr, "Failed to create ddict.\n");
return NULL;
}
if (dict_id != NULL) {
*dict_id = ZSTD_getDictID_fromDDict(ddict);
}
return ddict;
}
static ZSTD_DDict* readDictByID(stuff_t *stuff, int32_t dict_id, char **buf, size_t* size) {
if (stuff->dict_file_dir_name == NULL) {
return NULL;
} else {
size_t dir_name_len = strlen(stuff->dict_file_dir_name);
int dir_needs_separator = 0;
size_t dict_file_name_alloc_size = dir_name_len + 1 /* '/' */ + 10 /* max int32_t len */ + strlen(".zstd-dict") + 1 /* '\0' */;
char *dict_file_name = malloc(dict_file_name_alloc_size);
ZSTD_DDict* ddict;
int32_t read_dict_id;
if (dict_file_name == NULL) {
fprintf(stderr, "malloc failed.\n");
return 0;
}
if (dir_name_len > 0 && stuff->dict_file_dir_name[dir_name_len - 1] != '/') {
dir_needs_separator = 1;
}
snprintf(
dict_file_name,
dict_file_name_alloc_size,
"%s%s%u.zstd-dict",
stuff->dict_file_dir_name,
dir_needs_separator ? "/" : "",
dict_id);
/* fprintf(stderr, "Loading dict %u from '%s'.\n", dict_id, dict_file_name); */
ddict = readDict(dict_file_name, buf, size, &read_dict_id);
if (ddict == NULL) {
fprintf(stderr, "Failed to create ddict from '%s'.\n", dict_file_name);
free(dict_file_name);
return 0;
}
if (read_dict_id != dict_id) {
fprintf(stderr, "Read dictID (%u) does not match expected (%u).\n", read_dict_id, dict_id);
free(dict_file_name);
ZSTD_freeDDict(ddict);
return 0;
}
free(dict_file_name);
return ddict;
}
}
static int init_stuff(stuff_t* stuff, int argc, char *argv[]) {
const char* input_filename;
if (argc < 2) {
usage();
}
input_filename = argv[1];
stuff->input_size = 0;
stuff->input = readFile(input_filename, &stuff->input_size);
if (stuff->input == NULL) {
fprintf(stderr, "Failed to read input file.\n");
return 0;
}
stuff->perturbed = malloc(stuff->input_size);
if (stuff->perturbed == NULL) {
fprintf(stderr, "malloc failed.\n");
return 0;
}
memcpy(stuff->perturbed, stuff->input, stuff->input_size);
stuff->output_size = ZSTD_DStreamOutSize();
stuff->output = malloc(stuff->output_size);
if (stuff->output == NULL) {
fprintf(stderr, "malloc failed.\n");
return 0;
}
stuff->dict_file_name = NULL;
stuff->dict_file_dir_name = NULL;
stuff->dict_id = 0;
stuff->dict = NULL;
stuff->dict_size = 0;
stuff->ddict = NULL;
if (argc > 2) {
if (!strcmp(argv[2], "-d")) {
if (argc > 3) {
stuff->dict_file_name = argv[3];
} else {
usage();
}
} else
if (!strcmp(argv[2], "-D")) {
if (argc > 3) {
stuff->dict_file_dir_name = argv[3];
} else {
usage();
}
} else {
usage();
}
}
if (stuff->dict_file_dir_name) {
int32_t dict_id = ZSTD_getDictID_fromFrame(stuff->input, stuff->input_size);
if (dict_id != 0) {
stuff->ddict = readDictByID(stuff, dict_id, &stuff->dict, &stuff->dict_size);
if (stuff->ddict == NULL) {
fprintf(stderr, "Failed to create cached ddict.\n");
return 0;
}
stuff->dict_id = dict_id;
}
} else
if (stuff->dict_file_name) {
stuff->ddict = readDict(stuff->dict_file_name, &stuff->dict, &stuff->dict_size, &stuff->dict_id);
if (stuff->ddict == NULL) {
fprintf(stderr, "Failed to create ddict from '%s'.\n", stuff->dict_file_name);
return 0;
}
}
stuff->dctx = ZSTD_createDCtx();
if (stuff->dctx == NULL) {
return 0;
}
stuff->success_count = 0;
memset(stuff->error_counts, 0, sizeof(stuff->error_counts));
return 1;
}
static int test_decompress(stuff_t* stuff) {
size_t ret;
ZSTD_inBuffer in = {stuff->perturbed, stuff->input_size, 0};
ZSTD_outBuffer out = {stuff->output, stuff->output_size, 0};
ZSTD_DCtx* dctx = stuff->dctx;
int32_t custom_dict_id = ZSTD_getDictID_fromFrame(in.src, in.size);
char *custom_dict = NULL;
size_t custom_dict_size = 0;
ZSTD_DDict* custom_ddict = NULL;
if (custom_dict_id != 0 && custom_dict_id != stuff->dict_id) {
/* fprintf(stderr, "Instead of dict %u, this perturbed blob wants dict %u.\n", stuff->dict_id, custom_dict_id); */
custom_ddict = readDictByID(stuff, custom_dict_id, &custom_dict, &custom_dict_size);
}
ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only);
if (custom_ddict != NULL) {
ZSTD_DCtx_refDDict(dctx, custom_ddict);
} else {
ZSTD_DCtx_refDDict(dctx, stuff->ddict);
}
while (in.pos != in.size) {
out.pos = 0;
ret = ZSTD_decompressStream(dctx, &out, &in);
if (ZSTD_isError(ret)) {
unsigned int code = ZSTD_getErrorCode(ret);
if (code >= ZSTD_error_maxCode) {
fprintf(stderr, "Received unexpected error code!\n");
exit(1);
}
stuff->error_counts[code]++;
/*
fprintf(
stderr, "Decompression failed: %s\n", ZSTD_getErrorName(ret));
*/
if (custom_ddict != NULL) {
ZSTD_freeDDict(custom_ddict);
free(custom_dict);
}
return 0;
}
}
stuff->success_count++;
if (custom_ddict != NULL) {
ZSTD_freeDDict(custom_ddict);
free(custom_dict);
}
return 1;
}
static int perturb_bits(stuff_t* stuff) {
size_t pos;
size_t bit;
for (pos = 0; pos < stuff->input_size; pos++) {
unsigned char old_val = stuff->input[pos];
if (pos % 1000 == 0) {
fprintf(stderr, "Perturbing byte %zu / %zu\n", pos, stuff->input_size);
}
for (bit = 0; bit < 8; bit++) {
unsigned char new_val = old_val ^ (1 << bit);
stuff->perturbed[pos] = new_val;
if (test_decompress(stuff)) {
fprintf(
stderr,
"Flipping byte %zu bit %zu (0x%02x -> 0x%02x) "
"produced a successful decompression!\n",
pos, bit, old_val, new_val);
}
}
stuff->perturbed[pos] = old_val;
}
return 1;
}
static int perturb_bytes(stuff_t* stuff) {
size_t pos;
size_t new_val;
for (pos = 0; pos < stuff->input_size; pos++) {
unsigned char old_val = stuff->input[pos];
if (pos % 1000 == 0) {
fprintf(stderr, "Perturbing byte %zu / %zu\n", pos, stuff->input_size);
}
for (new_val = 0; new_val < 256; new_val++) {
stuff->perturbed[pos] = new_val;
if (test_decompress(stuff)) {
fprintf(
stderr,
"Changing byte %zu (0x%02x -> 0x%02x) "
"produced a successful decompression!\n",
pos, old_val, (unsigned char)new_val);
}
}
stuff->perturbed[pos] = old_val;
}
return 1;
}
int main(int argc, char* argv[]) {
stuff_t stuff;
if(!init_stuff(&stuff, argc, argv)) {
fprintf(stderr, "Failed to init.\n");
return 1;
}
if (test_decompress(&stuff)) {
fprintf(stderr, "Blob already decompresses successfully!\n");
return 1;
}
perturb_bits(&stuff);
perturb_bytes(&stuff);
print_summary(&stuff);
free_stuff(&stuff);
return 0;
}

View File

@@ -0,0 +1,20 @@
# Dockerfile
# First image to build the binary
FROM alpine@sha256:69665d02cb32192e52e07644d76bc6f25abeb5410edc1c7a81a10ba3f0efb90a as builder
RUN apk --no-cache add make gcc libc-dev
COPY . /src
RUN mkdir /pkg && cd /src && make && make DESTDIR=/pkg install
# Second minimal image to only keep the built binary
FROM alpine@sha256:69665d02cb32192e52e07644d76bc6f25abeb5410edc1c7a81a10ba3f0efb90a
# Copy the built files
COPY --from=builder /pkg /
# Copy the license as well
RUN mkdir -p /usr/local/share/licenses/zstd
COPY --from=builder /src/LICENSE /usr/local/share/licences/zstd/
# Just run `zstd` if no other command is given
CMD ["/usr/local/bin/zstd"]

View File

@@ -0,0 +1,20 @@
## Requirement
The `Dockerfile` script requires a version of `docker` >= 17.05
## Installing docker
The official docker install docs use a ppa with a modern version available:
https://docs.docker.com/install/linux/docker-ce/ubuntu/
## How to run
`docker build -t zstd .`
## test
```
echo foo | docker run -i --rm zstd | docker run -i --rm zstd zstdcat
foo
```

View File

@@ -0,0 +1,2 @@
# build artifacts
externalSequenceProducer

View File

@@ -0,0 +1,14 @@
externalSequenceProducer
=====================
`externalSequenceProducer` is a test tool for the Block-Level Sequence Producer API.
It demonstrates how to use the API to perform a simple round-trip test.
A sample sequence producer is provided in sequence_producer.c, but the user can swap
this out with a different one if desired. The sample sequence producer implements
LZ parsing with a 1KB hashtable. Dictionary-based parsing is not currently supported.
Command line :
```
externalSequenceProducer filename
```

View File

@@ -0,0 +1,107 @@
/*
* Copyright (c) Yann Collet, Meta Platforms, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"
#include "zstd_errors.h"
#include "sequence_producer.h" // simpleSequenceProducer
#define CHECK(res) \
do { \
if (ZSTD_isError(res)) { \
printf("ERROR: %s\n", ZSTD_getErrorName(res)); \
return 1; \
} \
} while (0) \
int main(int argc, char *argv[]) {
if (argc != 2) {
printf("Usage: externalSequenceProducer <file>\n");
return 1;
}
ZSTD_CCtx* const zc = ZSTD_createCCtx();
int simpleSequenceProducerState = 0xdeadbeef;
// Here is the crucial bit of code!
ZSTD_registerSequenceProducer(
zc,
&simpleSequenceProducerState,
simpleSequenceProducer
);
{
size_t const res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 1);
CHECK(res);
}
FILE *f = fopen(argv[1], "rb");
assert(f);
{
int const ret = fseek(f, 0, SEEK_END);
assert(ret == 0);
}
size_t const srcSize = ftell(f);
{
int const ret = fseek(f, 0, SEEK_SET);
assert(ret == 0);
}
char* const src = malloc(srcSize + 1);
assert(src);
{
size_t const ret = fread(src, srcSize, 1, f);
assert(ret == 1);
int const ret2 = fclose(f);
assert(ret2 == 0);
}
size_t const dstSize = ZSTD_compressBound(srcSize);
char* const dst = malloc(dstSize);
assert(dst);
size_t const cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize);
CHECK(cSize);
char* const val = malloc(srcSize);
assert(val);
{
size_t const res = ZSTD_decompress(val, srcSize, dst, cSize);
CHECK(res);
}
if (memcmp(src, val, srcSize) == 0) {
printf("Compression and decompression were successful!\n");
printf("Original size: %lu\n", srcSize);
printf("Compressed size: %lu\n", cSize);
} else {
printf("ERROR: input and validation buffers don't match!\n");
for (size_t i = 0; i < srcSize; i++) {
if (src[i] != val[i]) {
printf("First bad index: %zu\n", i);
break;
}
}
return 1;
}
ZSTD_freeCCtx(zc);
free(src);
free(dst);
free(val);
return 0;
}

View File

@@ -0,0 +1,80 @@
/*
* Copyright (c) Yann Collet, Meta Platforms, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include "zstd_compress_internal.h"
#include "sequence_producer.h"
#define HSIZE 1024
static U32 const HLOG = 10;
static U32 const MLS = 4;
static U32 const BADIDX = 0xffffffff;
size_t simpleSequenceProducer(
void* sequenceProducerState,
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
const void* src, size_t srcSize,
const void* dict, size_t dictSize,
int compressionLevel,
size_t windowSize
) {
const BYTE* const istart = (const BYTE*)src;
const BYTE* const iend = istart + srcSize;
const BYTE* ip = istart;
const BYTE* anchor = istart;
size_t seqCount = 0;
U32 hashTable[HSIZE];
(void)sequenceProducerState;
(void)dict;
(void)dictSize;
(void)outSeqsCapacity;
(void)compressionLevel;
{ int i;
for (i=0; i < HSIZE; i++) {
hashTable[i] = BADIDX;
} }
while (ip + MLS < iend) {
size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS);
U32 const matchIndex = hashTable[hash];
hashTable[hash] = (U32)(ip - istart);
if (matchIndex != BADIDX) {
const BYTE* const match = istart + matchIndex;
U32 const matchLen = (U32)ZSTD_count(ip, match, iend);
if (matchLen >= ZSTD_MINMATCH_MIN) {
U32 const litLen = (U32)(ip - anchor);
U32 const offset = (U32)(ip - match);
ZSTD_Sequence const seq = {
offset, litLen, matchLen, 0
};
/* Note: it's crucial to stay within the window size! */
if (offset <= windowSize) {
outSeqs[seqCount++] = seq;
ip += matchLen;
anchor = ip;
continue;
}
}
}
ip++;
}
{ ZSTD_Sequence const finalSeq = {
0, (U32)(iend - anchor), 0, 0
};
outSeqs[seqCount++] = finalSeq;
}
return seqCount;
}

View File

@@ -0,0 +1,26 @@
/*
* Copyright (c) Yann Collet, Meta Platforms, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef MATCHFINDER_H
#define MATCHFINDER_H
#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"
size_t simpleSequenceProducer(
void* sequenceProducerState,
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
const void* src, size_t srcSize,
const void* dict, size_t dictSize,
int compressionLevel,
size_t windowSize
);
#endif

View File

@@ -0,0 +1,774 @@
#!/usr/bin/env python3
# ################################################################
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# You may select, at your option, one of the above-listed licenses.
# ##########################################################################
import argparse
import contextlib
import os
import re
import shutil
import sys
from typing import Optional
INCLUDED_SUBDIRS = ["common", "compress", "decompress"]
SKIPPED_FILES = [
"common/mem.h",
"common/zstd_deps.h",
"common/pool.c",
"common/pool.h",
"common/threading.c",
"common/threading.h",
"common/zstd_trace.h",
"compress/zstdmt_compress.h",
"compress/zstdmt_compress.c",
]
XXHASH_FILES = [
"common/xxhash.c",
"common/xxhash.h",
]
class FileLines(object):
def __init__(self, filename):
self.filename = filename
with open(self.filename, "r") as f:
self.lines = f.readlines()
def write(self):
with open(self.filename, "w") as f:
f.write("".join(self.lines))
class PartialPreprocessor(object):
"""
Looks for simple ifdefs and ifndefs and replaces them.
Handles && and ||.
Has fancy logic to handle translating elifs to ifs.
Only looks for macros in the first part of the expression with no
parens.
Does not handle multi-line macros (only looks in first line).
"""
def __init__(self, defs: [(str, Optional[str])], replaces: [(str, str)], undefs: [str]):
MACRO_GROUP = r"(?P<macro>[a-zA-Z_][a-zA-Z_0-9]*)"
ELIF_GROUP = r"(?P<elif>el)?"
OP_GROUP = r"(?P<op>&&|\|\|)?"
self._defs = {macro:value for macro, value in defs}
self._replaces = {macro:value for macro, value in replaces}
self._defs.update(self._replaces)
self._undefs = set(undefs)
self._define = re.compile(r"\s*#\s*define")
self._if = re.compile(r"\s*#\s*if")
self._elif = re.compile(r"\s*#\s*(?P<elif>el)if")
self._else = re.compile(r"\s*#\s*(?P<else>else)")
self._endif = re.compile(r"\s*#\s*endif")
self._ifdef = re.compile(fr"\s*#\s*if(?P<not>n)?def {MACRO_GROUP}\s*")
self._if_defined = re.compile(
fr"\s*#\s*{ELIF_GROUP}if\s+(?P<not>!)?\s*defined\s*\(\s*{MACRO_GROUP}\s*\)\s*{OP_GROUP}"
)
self._if_defined_value = re.compile(
fr"\s*#\s*{ELIF_GROUP}if\s+defined\s*\(\s*{MACRO_GROUP}\s*\)\s*"
fr"(?P<op>&&)\s*"
fr"(?P<openp>\()?\s*"
fr"(?P<macro2>[a-zA-Z_][a-zA-Z_0-9]*)\s*"
fr"(?P<cmp>[=><!]+)\s*"
fr"(?P<value>[0-9]*)\s*"
fr"(?P<closep>\))?\s*"
)
self._if_true = re.compile(
fr"\s*#\s*{ELIF_GROUP}if\s+{MACRO_GROUP}\s*{OP_GROUP}"
)
self._c_comment = re.compile(r"/\*.*?\*/")
self._cpp_comment = re.compile(r"//")
def _log(self, *args, **kwargs):
print(*args, **kwargs)
def _strip_comments(self, line):
# First strip c-style comments (may include //)
while True:
m = self._c_comment.search(line)
if m is None:
break
line = line[:m.start()] + line[m.end():]
# Then strip cpp-style comments
m = self._cpp_comment.search(line)
if m is not None:
line = line[:m.start()]
return line
def _fixup_indentation(self, macro, replace: [str]):
if len(replace) == 0:
return replace
if len(replace) == 1 and self._define.match(replace[0]) is None:
# If there is only one line, only replace defines
return replace
all_pound = True
for line in replace:
if not line.startswith('#'):
all_pound = False
if all_pound:
replace = [line[1:] for line in replace]
min_spaces = len(replace[0])
for line in replace:
spaces = 0
for i, c in enumerate(line):
if c != ' ':
# Non-preprocessor line ==> skip the fixup
if not all_pound and c != '#':
return replace
spaces = i
break
min_spaces = min(min_spaces, spaces)
replace = [line[min_spaces:] for line in replace]
if all_pound:
replace = ["#" + line for line in replace]
return replace
def _handle_if_block(self, macro, idx, is_true, prepend):
"""
Remove the #if or #elif block starting on this line.
"""
REMOVE_ONE = 0
KEEP_ONE = 1
REMOVE_REST = 2
if is_true:
state = KEEP_ONE
else:
state = REMOVE_ONE
line = self._inlines[idx]
is_if = self._if.match(line) is not None
assert is_if or self._elif.match(line) is not None
depth = 0
start_idx = idx
idx += 1
replace = prepend
finished = False
while idx < len(self._inlines):
line = self._inlines[idx]
# Nested if statement
if self._if.match(line):
depth += 1
idx += 1
continue
# We're inside a nested statement
if depth > 0:
if self._endif.match(line):
depth -= 1
idx += 1
continue
# We're at the original depth
# Looking only for an endif.
# We've found a true statement, but haven't
# completely elided the if block, so we just
# remove the remainder.
if state == REMOVE_REST:
if self._endif.match(line):
if is_if:
# Remove the endif because we took the first if
idx += 1
finished = True
break
idx += 1
continue
if state == KEEP_ONE:
m = self._elif.match(line)
if self._endif.match(line):
replace += self._inlines[start_idx + 1:idx]
idx += 1
finished = True
break
if self._elif.match(line) or self._else.match(line):
replace += self._inlines[start_idx + 1:idx]
state = REMOVE_REST
idx += 1
continue
if state == REMOVE_ONE:
m = self._elif.match(line)
if m is not None:
if is_if:
idx += 1
b = m.start('elif')
e = m.end('elif')
assert e - b == 2
replace.append(line[:b] + line[e:])
finished = True
break
m = self._else.match(line)
if m is not None:
if is_if:
idx += 1
while self._endif.match(self._inlines[idx]) is None:
replace.append(self._inlines[idx])
idx += 1
idx += 1
finished = True
break
if self._endif.match(line):
if is_if:
# Remove the endif because no other elifs
idx += 1
finished = True
break
idx += 1
continue
if not finished:
raise RuntimeError("Unterminated if block!")
replace = self._fixup_indentation(macro, replace)
self._log(f"\tHardwiring {macro}")
if start_idx > 0:
self._log(f"\t\t {self._inlines[start_idx - 1][:-1]}")
for x in range(start_idx, idx):
self._log(f"\t\t- {self._inlines[x][:-1]}")
for line in replace:
self._log(f"\t\t+ {line[:-1]}")
if idx < len(self._inlines):
self._log(f"\t\t {self._inlines[idx][:-1]}")
return idx, replace
def _preprocess_once(self):
outlines = []
idx = 0
changed = False
while idx < len(self._inlines):
line = self._inlines[idx]
sline = self._strip_comments(line)
m = self._ifdef.fullmatch(sline)
if_true = False
if m is None:
m = self._if_defined_value.fullmatch(sline)
if m is None:
m = self._if_defined.match(sline)
if m is None:
m = self._if_true.match(sline)
if_true = (m is not None)
if m is None:
outlines.append(line)
idx += 1
continue
groups = m.groupdict()
macro = groups['macro']
op = groups.get('op')
if not (macro in self._defs or macro in self._undefs):
outlines.append(line)
idx += 1
continue
defined = macro in self._defs
# Needed variables set:
# resolved: Is the statement fully resolved?
# is_true: If resolved, is the statement true?
ifdef = False
if if_true:
if not defined:
outlines.append(line)
idx += 1
continue
defined_value = self._defs[macro]
is_int = True
try:
defined_value = int(defined_value)
except TypeError:
is_int = False
except ValueError:
is_int = False
resolved = is_int
is_true = (defined_value != 0)
if resolved and op is not None:
if op == '&&':
resolved = not is_true
else:
assert op == '||'
resolved = is_true
else:
ifdef = groups.get('not') is None
elseif = groups.get('elif') is not None
macro2 = groups.get('macro2')
cmp = groups.get('cmp')
value = groups.get('value')
openp = groups.get('openp')
closep = groups.get('closep')
is_true = (ifdef == defined)
resolved = True
if op is not None:
if op == '&&':
resolved = not is_true
else:
assert op == '||'
resolved = is_true
if macro2 is not None and not resolved:
assert ifdef and defined and op == '&&' and cmp is not None
# If the statement is true, but we have a single value check, then
# check the value.
defined_value = self._defs[macro]
are_ints = True
try:
defined_value = int(defined_value)
value = int(value)
except TypeError:
are_ints = False
except ValueError:
are_ints = False
if (
macro == macro2 and
((openp is None) == (closep is None)) and
are_ints
):
resolved = True
if cmp == '<':
is_true = defined_value < value
elif cmp == '<=':
is_true = defined_value <= value
elif cmp == '==':
is_true = defined_value == value
elif cmp == '!=':
is_true = defined_value != value
elif cmp == '>=':
is_true = defined_value >= value
elif cmp == '>':
is_true = defined_value > value
else:
resolved = False
if op is not None and not resolved:
# Remove the first op in the line + spaces
if op == '&&':
opre = op
else:
assert op == '||'
opre = r'\|\|'
needle = re.compile(fr"(?P<if>\s*#\s*(el)?if\s+).*?(?P<op>{opre}\s*)")
match = needle.match(line)
assert match is not None
newline = line[:match.end('if')] + line[match.end('op'):]
self._log(f"\tHardwiring partially resolved {macro}")
self._log(f"\t\t- {line[:-1]}")
self._log(f"\t\t+ {newline[:-1]}")
outlines.append(newline)
idx += 1
continue
# Skip any statements we cannot fully compute
if not resolved:
outlines.append(line)
idx += 1
continue
prepend = []
if macro in self._replaces:
assert not ifdef
assert op is None
value = self._replaces.pop(macro)
prepend = [f"#define {macro} {value}\n"]
idx, replace = self._handle_if_block(macro, idx, is_true, prepend)
outlines += replace
changed = True
return changed, outlines
def preprocess(self, filename):
with open(filename, 'r') as f:
self._inlines = f.readlines()
changed = True
iters = 0
while changed:
iters += 1
changed, outlines = self._preprocess_once()
self._inlines = outlines
with open(filename, 'w') as f:
f.write(''.join(self._inlines))
class Freestanding(object):
def __init__(
self, zstd_deps: str, mem: str, source_lib: str, output_lib: str,
external_xxhash: bool, xxh64_state: Optional[str],
xxh64_prefix: Optional[str], rewritten_includes: [(str, str)],
defs: [(str, Optional[str])], replaces: [(str, str)],
undefs: [str], excludes: [str], seds: [str], spdx: bool,
):
self._zstd_deps = zstd_deps
self._mem = mem
self._src_lib = source_lib
self._dst_lib = output_lib
self._external_xxhash = external_xxhash
self._xxh64_state = xxh64_state
self._xxh64_prefix = xxh64_prefix
self._rewritten_includes = rewritten_includes
self._defs = defs
self._replaces = replaces
self._undefs = undefs
self._excludes = excludes
self._seds = seds
self._spdx = spdx
def _dst_lib_file_paths(self):
"""
Yields all the file paths in the dst_lib.
"""
for root, dirname, filenames in os.walk(self._dst_lib):
for filename in filenames:
filepath = os.path.join(root, filename)
yield filepath
def _log(self, *args, **kwargs):
print(*args, **kwargs)
def _copy_file(self, lib_path):
suffixes = [".c", ".h", ".S"]
if not any((lib_path.endswith(suffix) for suffix in suffixes)):
return
if lib_path in SKIPPED_FILES:
self._log(f"\tSkipping file: {lib_path}")
return
if self._external_xxhash and lib_path in XXHASH_FILES:
self._log(f"\tSkipping xxhash file: {lib_path}")
return
src_path = os.path.join(self._src_lib, lib_path)
dst_path = os.path.join(self._dst_lib, lib_path)
self._log(f"\tCopying: {src_path} -> {dst_path}")
shutil.copyfile(src_path, dst_path)
def _copy_source_lib(self):
self._log("Copying source library into output library")
assert os.path.exists(self._src_lib)
os.makedirs(self._dst_lib, exist_ok=True)
self._copy_file("zstd.h")
self._copy_file("zstd_errors.h")
for subdir in INCLUDED_SUBDIRS:
src_dir = os.path.join(self._src_lib, subdir)
dst_dir = os.path.join(self._dst_lib, subdir)
assert os.path.exists(src_dir)
os.makedirs(dst_dir, exist_ok=True)
for filename in os.listdir(src_dir):
lib_path = os.path.join(subdir, filename)
self._copy_file(lib_path)
def _copy_zstd_deps(self):
dst_zstd_deps = os.path.join(self._dst_lib, "common", "zstd_deps.h")
self._log(f"Copying zstd_deps: {self._zstd_deps} -> {dst_zstd_deps}")
shutil.copyfile(self._zstd_deps, dst_zstd_deps)
def _copy_mem(self):
dst_mem = os.path.join(self._dst_lib, "common", "mem.h")
self._log(f"Copying mem: {self._mem} -> {dst_mem}")
shutil.copyfile(self._mem, dst_mem)
def _hardwire_preprocessor(self, name: str, value: Optional[str] = None, undef=False):
"""
If value=None then hardwire that it is defined, but not what the value is.
If undef=True then value must be None.
If value='' then the macro is defined to '' exactly.
"""
assert not (undef and value is not None)
for filepath in self._dst_lib_file_paths():
file = FileLines(filepath)
def _hardwire_defines(self):
self._log("Hardwiring macros")
partial_preprocessor = PartialPreprocessor(self._defs, self._replaces, self._undefs)
for filepath in self._dst_lib_file_paths():
partial_preprocessor.preprocess(filepath)
def _remove_excludes(self):
self._log("Removing excluded sections")
for exclude in self._excludes:
self._log(f"\tRemoving excluded sections for: {exclude}")
begin_re = re.compile(f"BEGIN {exclude}")
end_re = re.compile(f"END {exclude}")
for filepath in self._dst_lib_file_paths():
file = FileLines(filepath)
outlines = []
skipped = []
emit = True
for line in file.lines:
if emit and begin_re.search(line) is not None:
assert end_re.search(line) is None
emit = False
if emit:
outlines.append(line)
else:
skipped.append(line)
if end_re.search(line) is not None:
assert begin_re.search(line) is None
self._log(f"\t\tRemoving excluded section: {exclude}")
for s in skipped:
self._log(f"\t\t\t- {s}")
emit = True
skipped = []
if not emit:
raise RuntimeError("Excluded section unfinished!")
file.lines = outlines
file.write()
def _rewrite_include(self, original, rewritten):
self._log(f"\tRewriting include: {original} -> {rewritten}")
regex = re.compile(f"\\s*#\\s*include\\s*(?P<include>{original})")
for filepath in self._dst_lib_file_paths():
file = FileLines(filepath)
for i, line in enumerate(file.lines):
match = regex.match(line)
if match is None:
continue
s = match.start('include')
e = match.end('include')
file.lines[i] = line[:s] + rewritten + line[e:]
file.write()
def _rewrite_includes(self):
self._log("Rewriting includes")
for original, rewritten in self._rewritten_includes:
self._rewrite_include(original, rewritten)
def _replace_xxh64_prefix(self):
if self._xxh64_prefix is None:
return
self._log(f"Replacing XXH64 prefix with {self._xxh64_prefix}")
replacements = []
if self._xxh64_state is not None:
replacements.append(
(re.compile(r"([^\w]|^)(?P<orig>XXH64_state_t)([^\w]|$)"), self._xxh64_state)
)
if self._xxh64_prefix is not None:
replacements.append(
(re.compile(r"([^\w]|^)(?P<orig>XXH64)[\(_]"), self._xxh64_prefix)
)
for filepath in self._dst_lib_file_paths():
file = FileLines(filepath)
for i, line in enumerate(file.lines):
modified = False
for regex, replacement in replacements:
match = regex.search(line)
while match is not None:
modified = True
b = match.start('orig')
e = match.end('orig')
line = line[:b] + replacement + line[e:]
match = regex.search(line)
if modified:
self._log(f"\t- {file.lines[i][:-1]}")
self._log(f"\t+ {line[:-1]}")
file.lines[i] = line
file.write()
def _parse_sed(self, sed):
assert sed[0] == 's'
delim = sed[1]
match = re.fullmatch(f's{delim}(.+){delim}(.*){delim}(.*)', sed)
assert match is not None
regex = re.compile(match.group(1))
format_str = match.group(2)
is_global = match.group(3) == 'g'
return regex, format_str, is_global
def _process_sed(self, sed):
self._log(f"Processing sed: {sed}")
regex, format_str, is_global = self._parse_sed(sed)
for filepath in self._dst_lib_file_paths():
file = FileLines(filepath)
for i, line in enumerate(file.lines):
modified = False
while True:
match = regex.search(line)
if match is None:
break
replacement = format_str.format(match.groups(''), match.groupdict(''))
b = match.start()
e = match.end()
line = line[:b] + replacement + line[e:]
modified = True
if not is_global:
break
if modified:
self._log(f"\t- {file.lines[i][:-1]}")
self._log(f"\t+ {line[:-1]}")
file.lines[i] = line
file.write()
def _process_seds(self):
self._log("Processing seds")
for sed in self._seds:
self._process_sed(sed)
def _process_spdx(self):
if not self._spdx:
return
self._log("Processing spdx")
SPDX_C = "// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause\n"
SPDX_H_S = "/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */\n"
for filepath in self._dst_lib_file_paths():
file = FileLines(filepath)
if file.lines[0] == SPDX_C or file.lines[0] == SPDX_H_S:
continue
for line in file.lines:
if "SPDX-License-Identifier" in line:
raise RuntimeError(f"Unexpected SPDX license identifier: {file.filename} {repr(line)}")
if file.filename.endswith(".c"):
file.lines.insert(0, SPDX_C)
elif file.filename.endswith(".h") or file.filename.endswith(".S"):
file.lines.insert(0, SPDX_H_S)
else:
raise RuntimeError(f"Unexpected file extension: {file.filename}")
file.write()
def go(self):
self._copy_source_lib()
self._copy_zstd_deps()
self._copy_mem()
self._hardwire_defines()
self._remove_excludes()
self._rewrite_includes()
self._replace_xxh64_prefix()
self._process_seds()
self._process_spdx()
def parse_optional_pair(defines: [str]) -> [(str, Optional[str])]:
output = []
for define in defines:
parsed = define.split('=')
if len(parsed) == 1:
output.append((parsed[0], None))
elif len(parsed) == 2:
output.append((parsed[0], parsed[1]))
else:
raise RuntimeError(f"Bad define: {define}")
return output
def parse_pair(rewritten_includes: [str]) -> [(str, str)]:
output = []
for rewritten_include in rewritten_includes:
parsed = rewritten_include.split('=')
if len(parsed) == 2:
output.append((parsed[0], parsed[1]))
else:
raise RuntimeError(f"Bad rewritten include: {rewritten_include}")
return output
def main(name, args):
parser = argparse.ArgumentParser(prog=name)
parser.add_argument("--zstd-deps", default="zstd_deps.h", help="Zstd dependencies file")
parser.add_argument("--mem", default="mem.h", help="Memory module")
parser.add_argument("--source-lib", default="../../lib", help="Location of the zstd library")
parser.add_argument("--output-lib", default="./freestanding_lib", help="Where to output the freestanding zstd library")
parser.add_argument("--xxhash", default=None, help="Alternate external xxhash include e.g. --xxhash='<xxhash.h>'. If set xxhash is not included.")
parser.add_argument("--xxh64-state", default=None, help="Alternate XXH64 state type (excluding _) e.g. --xxh64-state='struct xxh64_state'")
parser.add_argument("--xxh64-prefix", default=None, help="Alternate XXH64 function prefix (excluding _) e.g. --xxh64-prefix=xxh64")
parser.add_argument("--rewrite-include", default=[], dest="rewritten_includes", action="append", help="Rewrite an include REGEX=NEW (e.g. '<stddef\\.h>=<linux/types.h>')")
parser.add_argument("--sed", default=[], dest="seds", action="append", help="Apply a sed replacement. Format: `s/REGEX/FORMAT/[g]`. REGEX is a Python regex. FORMAT is a Python format string formatted by the regex dict.")
parser.add_argument("--spdx", action="store_true", help="Add SPDX License Identifiers")
parser.add_argument("-D", "--define", default=[], dest="defs", action="append", help="Pre-define this macro (can be passed multiple times)")
parser.add_argument("-U", "--undefine", default=[], dest="undefs", action="append", help="Pre-undefine this macro (can be passed multiple times)")
parser.add_argument("-R", "--replace", default=[], dest="replaces", action="append", help="Pre-define this macro and replace the first ifndef block with its definition")
parser.add_argument("-E", "--exclude", default=[], dest="excludes", action="append", help="Exclude all lines between 'BEGIN <EXCLUDE>' and 'END <EXCLUDE>'")
args = parser.parse_args(args)
# Always remove threading
if "ZSTD_MULTITHREAD" not in args.undefs:
args.undefs.append("ZSTD_MULTITHREAD")
args.defs = parse_optional_pair(args.defs)
for name, _ in args.defs:
if name in args.undefs:
raise RuntimeError(f"{name} is both defined and undefined!")
# Always set tracing to 0
if "ZSTD_NO_TRACE" not in (arg[0] for arg in args.defs):
args.defs.append(("ZSTD_NO_TRACE", None))
args.defs.append(("ZSTD_TRACE", "0"))
args.replaces = parse_pair(args.replaces)
for name, _ in args.replaces:
if name in args.undefs or name in args.defs:
raise RuntimeError(f"{name} is both replaced and (un)defined!")
args.rewritten_includes = parse_pair(args.rewritten_includes)
external_xxhash = False
if args.xxhash is not None:
external_xxhash = True
args.rewritten_includes.append(('"(\\.\\./common/)?xxhash.h"', args.xxhash))
if args.xxh64_prefix is not None:
if not external_xxhash:
raise RuntimeError("--xxh64-prefix may only be used with --xxhash provided")
if args.xxh64_state is not None:
if not external_xxhash:
raise RuntimeError("--xxh64-state may only be used with --xxhash provided")
Freestanding(
args.zstd_deps,
args.mem,
args.source_lib,
args.output_lib,
external_xxhash,
args.xxh64_state,
args.xxh64_prefix,
args.rewritten_includes,
args.defs,
args.replaces,
args.undefs,
args.excludes,
args.seds,
args.spdx,
).go()
if __name__ == "__main__":
main(sys.argv[0], sys.argv[1:])

View File

@@ -0,0 +1,3 @@
# make artefact
gen_html
zstd_manual.html

View File

@@ -0,0 +1,31 @@
gen_html - a program for automatic generation of zstd manual
============================================================
#### Introduction
This simple C++ program generates a single-page HTML manual from `zstd.h`.
The format of recognized comment blocks is following:
- comments of type `/*!` mean: this is a function declaration; switch comments with declarations
- comments of type `/**` and `/*-` mean: this is a comment; use a `<H2>` header for the first line
- comments of type `/*=` and `/**=` mean: use a `<H3>` header and show also all functions until first empty line
- comments of type `/*X` where `X` is different from above-mentioned are ignored
Moreover:
- `ZSTDLIB_API` is removed to improve readability
- `typedef` are detected and included even if uncommented
- comments of type `/**<` and `/*!<` are detected and only function declaration is highlighted (bold)
#### Usage
The program requires 3 parameters:
```
gen_html [zstd_version] [input_file] [output_html]
```
To compile program and generate zstd manual we have used:
```
make
./gen_html.exe 1.1.1 ../../lib/zstd.h zstd_manual.html
```

View File

@@ -0,0 +1,9 @@
#!/bin/sh
LIBVER_MAJOR_SCRIPT=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h`
LIBVER_MINOR_SCRIPT=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h`
LIBVER_PATCH_SCRIPT=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h`
LIBVER_SCRIPT=$LIBVER_MAJOR_SCRIPT.$LIBVER_MINOR_SCRIPT.$LIBVER_PATCH_SCRIPT
echo ZSTD_VERSION=$LIBVER_SCRIPT
./gen_html $LIBVER_SCRIPT ../../lib/zstd.h ./zstd_manual.html

View File

@@ -0,0 +1,225 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
using namespace std;
/* trim string at the beginning and at the end */
void trim(string& s, string characters)
{
size_t p = s.find_first_not_of(characters);
s.erase(0, p);
p = s.find_last_not_of(characters);
if (string::npos != p)
s.erase(p+1);
}
/* trim C++ style comments */
void trim_comments(string &s)
{
size_t spos, epos;
spos = s.find("/*");
epos = s.find("*/");
s = s.substr(spos+3, epos-(spos+3));
}
/* get lines until a given terminator */
vector<string> get_lines(vector<string>& input, int& linenum, string terminator)
{
vector<string> out;
string line;
size_t epos;
while ((size_t)linenum < input.size()) {
line = input[linenum];
if (terminator.empty() && line.empty()) { linenum--; break; }
epos = line.find(terminator);
if (!terminator.empty() && epos!=string::npos) {
out.push_back(line);
break;
}
out.push_back(line);
linenum++;
}
return out;
}
/* print line with ZSTDLIB_API removed and C++ comments not bold */
void print_line(stringstream &sout, string line)
{
size_t spos;
if (line.substr(0,12) == "ZSTDLIB_API ") line = line.substr(12);
spos = line.find("/*");
if (spos!=string::npos) {
sout << line.substr(0, spos);
sout << "</b>" << line.substr(spos) << "<b>" << endl;
} else {
// fprintf(stderr, "lines=%s\n", line.c_str());
sout << line << endl;
}
}
int main(int argc, char *argv[]) {
char exclam;
int linenum, chapter = 1;
vector<string> input, lines, comments, chapters;
string line, version;
size_t spos, l;
stringstream sout;
ifstream istream;
ofstream ostream;
if (argc < 4) {
cout << "usage: " << argv[0] << " [zstd_version] [input_file] [output_html]" << endl;
return 1;
}
version = "zstd " + string(argv[1]) + " Manual";
istream.open(argv[2], ifstream::in);
if (!istream.is_open()) {
cout << "Error opening file " << argv[2] << endl;
return 1;
}
ostream.open(argv[3], ifstream::out);
if (!ostream.is_open()) {
cout << "Error opening file " << argv[3] << endl;
return 1;
}
while (getline(istream, line)) {
input.push_back(line);
}
for (linenum=0; (size_t)linenum < input.size(); linenum++) {
line = input[linenum];
/* typedefs are detected and included even if uncommented */
if (line.substr(0,7) == "typedef" && line.find("{")!=string::npos) {
lines = get_lines(input, linenum, "}");
sout << "<pre><b>";
for (l=0; l<lines.size(); l++) {
print_line(sout, lines[l]);
}
sout << "</b></pre><BR>" << endl;
continue;
}
/* comments of type /**< and /*!< are detected and only function declaration is highlighted (bold) */
if ((line.find("/**<")!=string::npos || line.find("/*!<")!=string::npos) && line.find("*/")!=string::npos) {
sout << "<pre><b>";
print_line(sout, line);
sout << "</b></pre><BR>" << endl;
continue;
}
spos = line.find("/**=");
if (spos==string::npos) {
spos = line.find("/*!");
if (spos==string::npos)
spos = line.find("/**");
if (spos==string::npos)
spos = line.find("/*-");
if (spos==string::npos)
spos = line.find("/*=");
if (spos==string::npos)
continue;
exclam = line[spos+2];
}
else exclam = '=';
comments = get_lines(input, linenum, "*/");
if (!comments.empty()) comments[0] = line.substr(spos+3);
if (!comments.empty()) comments[comments.size()-1] = comments[comments.size()-1].substr(0, comments[comments.size()-1].find("*/"));
for (l=0; l<comments.size(); l++) {
if (comments[l].find(" *")==0) comments[l] = comments[l].substr(2);
else if (comments[l].find(" *")==0) comments[l] = comments[l].substr(3);
trim(comments[l], "*-=");
}
while (!comments.empty() && comments[comments.size()-1].empty()) comments.pop_back(); // remove empty line at the end
while (!comments.empty() && comments[0].empty()) comments.erase(comments.begin()); // remove empty line at the start
/* comments of type /*! mean: this is a function declaration; switch comments with declarations */
if (exclam == '!') {
if (!comments.empty()) comments.erase(comments.begin()); /* remove first line like "ZSTD_XXX() :" */
linenum++;
lines = get_lines(input, linenum, "");
sout << "<pre><b>";
for (l=0; l<lines.size(); l++) {
// fprintf(stderr, "line[%d]=%s\n", l, lines[l].c_str());
string fline = lines[l];
if (fline.substr(0, 12) == "ZSTDLIB_API " ||
fline.substr(0, 12) == string(12, ' '))
fline = fline.substr(12);
print_line(sout, fline);
}
sout << "</b><p>";
for (l=0; l<comments.size(); l++) {
print_line(sout, comments[l]);
}
sout << "</p></pre><BR>" << endl << endl;
} else if (exclam == '=') { /* comments of type /*= and /**= mean: use a <H3> header and show also all functions until first empty line */
trim(comments[0], " ");
sout << "<h3>" << comments[0] << "</h3><pre>";
for (l=1; l<comments.size(); l++) {
print_line(sout, comments[l]);
}
sout << "</pre><b><pre>";
lines = get_lines(input, ++linenum, "");
for (l=0; l<lines.size(); l++) {
print_line(sout, lines[l]);
}
sout << "</pre></b><BR>" << endl;
} else { /* comments of type /** and /*- mean: this is a comment; use a <H2> header for the first line */
if (comments.empty()) continue;
trim(comments[0], " ");
sout << "<a name=\"Chapter" << chapter << "\"></a><h2>" << comments[0] << "</h2><pre>";
chapters.push_back(comments[0]);
chapter++;
for (l=1; l<comments.size(); l++) {
print_line(sout, comments[l]);
}
if (comments.size() > 1)
sout << "<BR></pre>" << endl << endl;
else
sout << "</pre>" << endl << endl;
}
}
ostream << "<html>\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\">\n<title>" << version << "</title>\n</head>\n<body>" << endl;
ostream << "<h1>" << version << "</h1>\n";
ostream << "Note: the content of this file has been automatically generated by parsing \"zstd.h\" \n";
ostream << "<hr>\n<a name=\"Contents\"></a><h2>Contents</h2>\n<ol>\n";
for (size_t i=0; i<chapters.size(); i++)
ostream << "<li><a href=\"#Chapter" << i+1 << "\">" << chapters[i].c_str() << "</a></li>\n";
ostream << "</ol>\n<hr>\n";
ostream << sout.str();
ostream << "</html>" << endl << "</body>" << endl;
return 0;
}

View File

@@ -0,0 +1,2 @@
# build artifacts
largeNbDicts

View File

@@ -0,0 +1,33 @@
largeNbDicts
=====================
`largeNbDicts` is a benchmark test tool
dedicated to the specific scenario of
dictionary decompression using a very large number of dictionaries.
When dictionaries are constantly changing, they are always "cold",
suffering from increased latency due to cache misses.
The tool is created in a bid to investigate performance for this scenario,
and experiment mitigation techniques.
Command line :
```
largeNbDicts [Options] filename(s)
Options :
-z : benchmark compression (default)
-d : benchmark decompression
-r : recursively load all files in subdirectories (default: off)
-B# : split input into blocks of size # (default: no split)
-# : use compression level # (default: 3)
-D # : use # as a dictionary (default: create one)
-i# : nb benchmark rounds (default: 6)
--nbBlocks=#: use # blocks for bench (default: one per file)
--nbDicts=# : create # dictionaries for bench (default: one per block)
-h : help (this text)
Advanced Options (see zstd.h for documentation) :
--dedicated-dict-search
--dict-content-type=#
--dict-attach-pref=#
```

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,4 @@
!lib/zstd
!lib/zstd/*
*.o
*.a

View File

@@ -0,0 +1,14 @@
# Zstd in the Linux Kernel
This directory contains the scripts needed to transform upstream zstd into the version imported into the kernel. All the transforms are automated and tested by our continuous integration.
## Upgrading Zstd in the Linux Kernel
1. `cd` into this directory.
2. Run `make libzstd` and read the output. Make sure that all the diffs printed and changes made by the script are correct.
3. Run `make test` and ensure that it passes.
4. Import zstd into the Linux Kernel `make import LINUX=/path/to/linux/repo`
5. Inspect the diff for sanity.
6. Check the Linux Kernel history for zstd. If any patches were made to the kernel version of zstd, but not to upstream zstd, then port them upstream if necessary.
7. Test the diff. Benchmark if necessary. Make sure to test multiple architectures: At least x86, i386, and arm.
8. Submit the patch to the LKML.

View File

@@ -0,0 +1,104 @@
# !/bin/sh
set -e
# Benchmarks run on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM.
# The VM is running on a Macbook Pro with a 3.1 GHz Intel Core i7 processor and
# 16 GB of RAM and an SSD.
# silesia is a directory that can be downloaded from
# http://mattmahoney.net/dc/silesia.html
# ls -l silesia/
# total 203M
# -rwxr-xr-x 1 terrelln 9.8M Apr 12 2002 dickens
# -rwxr-xr-x 1 terrelln 49M May 31 2002 mozilla
# -rwxr-xr-x 1 terrelln 9.6M Mar 20 2003 mr
# -rwxr-xr-x 1 terrelln 32M Apr 2 2002 nci
# -rwxr-xr-x 1 terrelln 5.9M Jul 4 2002 ooffice
# -rwxr-xr-x 1 terrelln 9.7M Apr 11 2002 osdb
# -rwxr-xr-x 1 terrelln 6.4M Apr 2 2002 reymont
# -rwxr-xr-x 1 terrelln 21M Mar 25 2002 samba
# -rwxr-xr-x 1 terrelln 7.0M Mar 24 2002 sao
# -rwxr-xr-x 1 terrelln 40M Mar 25 2002 webster
# -rwxr-xr-x 1 terrelln 8.1M Apr 4 2002 x-ray
# -rwxr-xr-x 1 terrelln 5.1M Nov 30 2000 xml
# $HOME is on a ext4 filesystem
BENCHMARK_DIR="$HOME/silesia/"
N=10
# Normalize the environment
sudo umount /mnt/btrfs 2> /dev/null > /dev/null || true
sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs
sudo rm -rf /mnt/btrfs/*
sync
sudo umount /mnt/btrfs
sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs
# Run the benchmark
echo "Compression"
time sh -c "for i in \$(seq $N); do sudo cp -r $BENCHMARK_DIR /mnt/btrfs/\$i; done; sync"
echo "Approximate compression ratio"
printf "%d / %d\n" \
$(df /mnt/btrfs --output=used -B 1 | tail -n 1) \
$(sudo du /mnt/btrfs -b -d 0 | tr '\t' '\n' | head -n 1);
# Unmount and remount to avoid any caching
sudo umount /mnt/btrfs
sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs
echo "Decompression"
time sudo tar -c /mnt/btrfs 2> /dev/null | wc -c > /dev/null
sudo rm -rf /mnt/btrfs/*
sudo umount /mnt/btrfs
# Run for each of -o compress-force={none, lzo, zlib, zstd} 5 times and take the
# min time and ratio.
# Ran zstd with compression levels {1, 3, 6, 9, 12, 15}.
# Original size: 2119415342 B (using du /mnt/btrfs)
# none
# compress: 4.205 s
# decompress: 3.090 s
# ratio: 0.99
# lzo
# compress: 5.328 s
# decompress: 4.793 s
# ratio: 1.66
# zlib
# compress: 32.588 s
# decompress: 8.791 s
# ratio : 2.58
# zstd 1
# compress: 8.147 s
# decompress: 5.527 s
# ratio : 2.57
# zstd 3
# compress: 12.207 s
# decompress: 5.195 s
# ratio : 2.71
# zstd 6
# compress: 30.253 s
# decompress: 5.324 s
# ratio : 2.87
# zstd 9
# compress: 49.659 s
# decompress: 5.220 s
# ratio : 2.92
# zstd 12
# compress: 99.245 s
# decompress: 5.193 s
# ratio : 2.93
# zstd 15
# compress: 196.997 s
# decompress: 5.992 s
# ratio : 3.01

View File

@@ -0,0 +1,99 @@
# !/bin/sh
set -e
# Benchmarks run on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM.
# The VM is running on a Macbook Pro with a 3.1 GHz Intel Core i7 processor and
# 16 GB of RAM and an SSD.
# silesia is a directory that can be downloaded from
# http://mattmahoney.net/dc/silesia.html
# ls -l silesia/
# total 203M
# -rwxr-xr-x 1 terrelln 9.8M Apr 12 2002 dickens
# -rwxr-xr-x 1 terrelln 49M May 31 2002 mozilla
# -rwxr-xr-x 1 terrelln 9.6M Mar 20 2003 mr
# -rwxr-xr-x 1 terrelln 32M Apr 2 2002 nci
# -rwxr-xr-x 1 terrelln 5.9M Jul 4 2002 ooffice
# -rwxr-xr-x 1 terrelln 9.7M Apr 11 2002 osdb
# -rwxr-xr-x 1 terrelln 6.4M Apr 2 2002 reymont
# -rwxr-xr-x 1 terrelln 21M Mar 25 2002 samba
# -rwxr-xr-x 1 terrelln 7.0M Mar 24 2002 sao
# -rwxr-xr-x 1 terrelln 40M Mar 25 2002 webster
# -rwxr-xr-x 1 terrelln 8.1M Apr 4 2002 x-ray
# -rwxr-xr-x 1 terrelln 5.1M Nov 30 2000 xml
# $HOME is on a ext4 filesystem
BENCHMARK_FILE="linux-4.11.6.tar"
BENCHMARK_DIR="$HOME/$BENCHMARK_FILE"
# Normalize the environment
sudo umount /mnt/btrfs 2> /dev/null > /dev/null || true
sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs
sudo rm -rf /mnt/btrfs/*
sync
sudo umount /mnt/btrfs
sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs
# Run the benchmark
echo "Copy"
time sh -c "sudo cp -r $BENCHMARK_DIR /mnt/btrfs/$BENCHMARK_FILE && sync"
echo "Approximate tarred compression ratio"
printf "%d / %d\n" \
$(df /mnt/btrfs --output=used -B 1 | tail -n 1) \
$(sudo du /mnt/btrfs -b -d 0 | tr '\t' '\n' | head -n 1);
# Unmount and remount to avoid any caching
sudo umount /mnt/btrfs
sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs
echo "Extract"
time sh -c "sudo tar -C /mnt/btrfs -xf /mnt/btrfs/$BENCHMARK_FILE && sync"
# Remove the tarball, leaving only the extracted data
sudo rm /mnt/btrfs/$BENCHMARK_FILE
# Unmount and remount to avoid any caching
sudo umount /mnt/btrfs
sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs
echo "Approximate extracted compression ratio"
printf "%d / %d\n" \
$(df /mnt/btrfs --output=used -B 1 | tail -n 1) \
$(sudo du /mnt/btrfs -b -d 0 | tr '\t' '\n' | head -n 1);
echo "Read"
time sudo tar -c /mnt/btrfs 2> /dev/null | wc -c > /dev/null
sudo rm -rf /mnt/btrfs/*
sudo umount /mnt/btrfs
# Run for each of -o compress-force={none, lzo, zlib, zstd} 5 times and take the
# min time and ratio.
# none
# copy: 0.981 s
# extract: 5.501 s
# read: 8.807 s
# tarball ratio: 0.97
# extracted ratio: 0.78
# lzo
# copy: 1.631 s
# extract: 8.458 s
# read: 8.585 s
# tarball ratio: 2.06
# extracted ratio: 1.38
# zlib
# copy: 7.750 s
# extract: 21.544 s
# read: 11.744 s
# tarball ratio : 3.40
# extracted ratio: 1.86
# zstd 1
# copy: 2.579 s
# extract: 11.479 s
# read: 9.389 s
# tarball ratio : 3.57
# extracted ratio: 1.85

View File

@@ -0,0 +1,34 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/*
* This file includes every .c file needed for decompression.
* It is used by lib/decompress_unzstd.c to include the decompression
* source into the translation-unit, so it can be used for kernel
* decompression.
*/
/*
* Disable the ASM Huffman implementation because we need to
* include all the sources.
*/
#define ZSTD_DISABLE_ASM 1
#include "common/debug.c"
#include "common/entropy_common.c"
#include "common/error_private.c"
#include "common/fse_decompress.c"
#include "common/zstd_common.c"
#include "decompress/huf_decompress.c"
#include "decompress/zstd_ddict.c"
#include "decompress/zstd_decompress.c"
#include "decompress/zstd_decompress_block.c"
#include "zstd_decompress_module.c"

View File

@@ -0,0 +1,44 @@
# SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
# ################################################################
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# You may select, at your option, one of the above-listed licenses.
# ################################################################
obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o
obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o
obj-$(CONFIG_ZSTD_COMMON) += zstd_common.o
zstd_compress-y := \
zstd_compress_module.o \
compress/fse_compress.o \
compress/hist.o \
compress/huf_compress.o \
compress/zstd_compress.o \
compress/zstd_compress_literals.o \
compress/zstd_compress_sequences.o \
compress/zstd_compress_superblock.o \
compress/zstd_double_fast.o \
compress/zstd_fast.o \
compress/zstd_lazy.o \
compress/zstd_ldm.o \
compress/zstd_opt.o \
compress/zstd_preSplit.o \
zstd_decompress-y := \
zstd_decompress_module.o \
decompress/huf_decompress.o \
decompress/zstd_ddict.o \
decompress/zstd_decompress.o \
decompress/zstd_decompress_block.o \
zstd_common-y := \
zstd_common_module.o \
common/debug.o \
common/entropy_common.o \
common/error_private.o \
common/fse_decompress.o \
common/zstd_common.o \

View File

@@ -0,0 +1,525 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of https://github.com/facebook/zstd) and
* the GPLv2 (found in the COPYING file in the root directory of
* https://github.com/facebook/zstd). You may select, at your option, one of the
* above-listed licenses.
*/
#ifndef LINUX_ZSTD_H
#define LINUX_ZSTD_H
/**
* This is a kernel-style API that wraps the upstream zstd API, which cannot be
* used directly because the symbols aren't exported. It exposes the minimal
* functionality which is currently required by users of zstd in the kernel.
* Expose extra functions from lib/zstd/zstd.h as needed.
*/
/* ====== Dependency ====== */
#include <linux/types.h>
#include <linux/zstd_errors.h>
#include <linux/zstd_lib.h>
/* ====== Helper Functions ====== */
/**
* zstd_compress_bound() - maximum compressed size in worst case scenario
* @src_size: The size of the data to compress.
*
* Return: The maximum compressed size in the worst case scenario.
*/
size_t zstd_compress_bound(size_t src_size);
/**
* zstd_is_error() - tells if a size_t function result is an error code
* @code: The function result to check for error.
*
* Return: Non-zero iff the code is an error.
*/
unsigned int zstd_is_error(size_t code);
/**
* enum zstd_error_code - zstd error codes
*/
typedef ZSTD_ErrorCode zstd_error_code;
/**
* zstd_get_error_code() - translates an error function result to an error code
* @code: The function result for which zstd_is_error(code) is true.
*
* Return: A unique error code for this error.
*/
zstd_error_code zstd_get_error_code(size_t code);
/**
* zstd_get_error_name() - translates an error function result to a string
* @code: The function result for which zstd_is_error(code) is true.
*
* Return: An error string corresponding to the error code.
*/
const char *zstd_get_error_name(size_t code);
/**
* zstd_min_clevel() - minimum allowed compression level
*
* Return: The minimum allowed compression level.
*/
int zstd_min_clevel(void);
/**
* zstd_max_clevel() - maximum allowed compression level
*
* Return: The maximum allowed compression level.
*/
int zstd_max_clevel(void);
/* ====== Parameter Selection ====== */
/**
* enum zstd_strategy - zstd compression search strategy
*
* From faster to stronger. See zstd_lib.h.
*/
typedef ZSTD_strategy zstd_strategy;
/**
* struct zstd_compression_parameters - zstd compression parameters
* @windowLog: Log of the largest match distance. Larger means more
* compression, and more memory needed during decompression.
* @chainLog: Fully searched segment. Larger means more compression,
* slower, and more memory (useless for fast).
* @hashLog: Dispatch table. Larger means more compression,
* slower, and more memory.
* @searchLog: Number of searches. Larger means more compression and slower.
* @searchLength: Match length searched. Larger means faster decompression,
* sometimes less compression.
* @targetLength: Acceptable match size for optimal parser (only). Larger means
* more compression, and slower.
* @strategy: The zstd compression strategy.
*
* See zstd_lib.h.
*/
typedef ZSTD_compressionParameters zstd_compression_parameters;
/**
* struct zstd_frame_parameters - zstd frame parameters
* @contentSizeFlag: Controls whether content size will be present in the
* frame header (when known).
* @checksumFlag: Controls whether a 32-bit checksum is generated at the
* end of the frame for error detection.
* @noDictIDFlag: Controls whether dictID will be saved into the frame
* header when using dictionary compression.
*
* The default value is all fields set to 0. See zstd_lib.h.
*/
typedef ZSTD_frameParameters zstd_frame_parameters;
/**
* struct zstd_parameters - zstd parameters
* @cParams: The compression parameters.
* @fParams: The frame parameters.
*/
typedef ZSTD_parameters zstd_parameters;
/**
* zstd_get_params() - returns zstd_parameters for selected level
* @level: The compression level
* @estimated_src_size: The estimated source size to compress or 0
* if unknown.
*
* Return: The selected zstd_parameters.
*/
zstd_parameters zstd_get_params(int level,
unsigned long long estimated_src_size);
typedef ZSTD_CCtx zstd_cctx;
typedef ZSTD_cParameter zstd_cparameter;
/**
* zstd_cctx_set_param() - sets a compression parameter
* @cctx: The context. Must have been initialized with zstd_init_cctx().
* @param: The parameter to set.
* @value: The value to set the parameter to.
*
* Return: Zero or an error, which can be checked using zstd_is_error().
*/
size_t zstd_cctx_set_param(zstd_cctx *cctx, zstd_cparameter param, int value);
/* ====== Single-pass Compression ====== */
/**
* zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx
* @parameters: The compression parameters to be used.
*
* If multiple compression parameters might be used, the caller must call
* zstd_cctx_workspace_bound() for each set of parameters and use the maximum
* size.
*
* Return: A lower bound on the size of the workspace that is passed to
* zstd_init_cctx().
*/
size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters);
/**
* zstd_cctx_workspace_bound_with_ext_seq_prod() - max memory needed to
* initialize a zstd_cctx when using the block-level external sequence
* producer API.
* @parameters: The compression parameters to be used.
*
* If multiple compression parameters might be used, the caller must call
* this function for each set of parameters and use the maximum size.
*
* Return: A lower bound on the size of the workspace that is passed to
* zstd_init_cctx().
*/
size_t zstd_cctx_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *parameters);
/**
* zstd_init_cctx() - initialize a zstd compression context
* @workspace: The workspace to emplace the context into. It must outlive
* the returned context.
* @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to
* determine how large the workspace must be.
*
* Return: A zstd compression context or NULL on error.
*/
zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size);
/**
* zstd_compress_cctx() - compress src into dst with the initialized parameters
* @cctx: The context. Must have been initialized with zstd_init_cctx().
* @dst: The buffer to compress src into.
* @dst_capacity: The size of the destination buffer. May be any size, but
* ZSTD_compressBound(srcSize) is guaranteed to be large enough.
* @src: The data to compress.
* @src_size: The size of the data to compress.
* @parameters: The compression parameters to be used.
*
* Return: The compressed size or an error, which can be checked using
* zstd_is_error().
*/
size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
const void *src, size_t src_size, const zstd_parameters *parameters);
/* ====== Single-pass Decompression ====== */
typedef ZSTD_DCtx zstd_dctx;
/**
* zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx
*
* Return: A lower bound on the size of the workspace that is passed to
* zstd_init_dctx().
*/
size_t zstd_dctx_workspace_bound(void);
/**
* zstd_init_dctx() - initialize a zstd decompression context
* @workspace: The workspace to emplace the context into. It must outlive
* the returned context.
* @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to
* determine how large the workspace must be.
*
* Return: A zstd decompression context or NULL on error.
*/
zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size);
/**
* zstd_decompress_dctx() - decompress zstd compressed src into dst
* @dctx: The decompression context.
* @dst: The buffer to decompress src into.
* @dst_capacity: The size of the destination buffer. Must be at least as large
* as the decompressed size. If the caller cannot upper bound the
* decompressed size, then it's better to use the streaming API.
* @src: The zstd compressed data to decompress. Multiple concatenated
* frames and skippable frames are allowed.
* @src_size: The exact size of the data to decompress.
*
* Return: The decompressed size or an error, which can be checked using
* zstd_is_error().
*/
size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
const void *src, size_t src_size);
/* ====== Streaming Buffers ====== */
/**
* struct zstd_in_buffer - input buffer for streaming
* @src: Start of the input buffer.
* @size: Size of the input buffer.
* @pos: Position where reading stopped. Will be updated.
* Necessarily 0 <= pos <= size.
*
* See zstd_lib.h.
*/
typedef ZSTD_inBuffer zstd_in_buffer;
/**
* struct zstd_out_buffer - output buffer for streaming
* @dst: Start of the output buffer.
* @size: Size of the output buffer.
* @pos: Position where writing stopped. Will be updated.
* Necessarily 0 <= pos <= size.
*
* See zstd_lib.h.
*/
typedef ZSTD_outBuffer zstd_out_buffer;
/* ====== Streaming Compression ====== */
typedef ZSTD_CStream zstd_cstream;
/**
* zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream
* @cparams: The compression parameters to be used for compression.
*
* Return: A lower bound on the size of the workspace that is passed to
* zstd_init_cstream().
*/
size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams);
/**
* zstd_cstream_workspace_bound_with_ext_seq_prod() - memory needed to initialize
* a zstd_cstream when using the block-level external sequence producer API.
* @cparams: The compression parameters to be used for compression.
*
* Return: A lower bound on the size of the workspace that is passed to
* zstd_init_cstream().
*/
size_t zstd_cstream_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *cparams);
/**
* zstd_init_cstream() - initialize a zstd streaming compression context
* @parameters The zstd parameters to use for compression.
* @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller
* must pass the source size (zero means empty source).
* Otherwise, the caller may optionally pass the source
* size, or zero if unknown.
* @workspace: The workspace to emplace the context into. It must outlive
* the returned context.
* @workspace_size: The size of workspace.
* Use zstd_cstream_workspace_bound(params->cparams) to
* determine how large the workspace must be.
*
* Return: The zstd streaming compression context or NULL on error.
*/
zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
unsigned long long pledged_src_size, void *workspace, size_t workspace_size);
/**
* zstd_reset_cstream() - reset the context using parameters from creation
* @cstream: The zstd streaming compression context to reset.
* @pledged_src_size: Optionally the source size, or zero if unknown.
*
* Resets the context using the parameters from creation. Skips dictionary
* loading, since it can be reused. If `pledged_src_size` is non-zero the frame
* content size is always written into the frame header.
*
* Return: Zero or an error, which can be checked using
* zstd_is_error().
*/
size_t zstd_reset_cstream(zstd_cstream *cstream,
unsigned long long pledged_src_size);
/**
* zstd_compress_stream() - streaming compress some of input into output
* @cstream: The zstd streaming compression context.
* @output: Destination buffer. `output->pos` is updated to indicate how much
* compressed data was written.
* @input: Source buffer. `input->pos` is updated to indicate how much data
* was read. Note that it may not consume the entire input, in which
* case `input->pos < input->size`, and it's up to the caller to
* present remaining data again.
*
* The `input` and `output` buffers may be any size. Guaranteed to make some
* forward progress if `input` and `output` are not empty.
*
* Return: A hint for the number of bytes to use as the input for the next
* function call or an error, which can be checked using
* zstd_is_error().
*/
size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
zstd_in_buffer *input);
/**
* zstd_flush_stream() - flush internal buffers into output
* @cstream: The zstd streaming compression context.
* @output: Destination buffer. `output->pos` is updated to indicate how much
* compressed data was written.
*
* zstd_flush_stream() must be called until it returns 0, meaning all the data
* has been flushed. Since zstd_flush_stream() causes a block to be ended,
* calling it too often will degrade the compression ratio.
*
* Return: The number of bytes still present within internal buffers or an
* error, which can be checked using zstd_is_error().
*/
size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output);
/**
* zstd_end_stream() - flush internal buffers into output and end the frame
* @cstream: The zstd streaming compression context.
* @output: Destination buffer. `output->pos` is updated to indicate how much
* compressed data was written.
*
* zstd_end_stream() must be called until it returns 0, meaning all the data has
* been flushed and the frame epilogue has been written.
*
* Return: The number of bytes still present within internal buffers or an
* error, which can be checked using zstd_is_error().
*/
size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output);
/* ====== Streaming Decompression ====== */
typedef ZSTD_DStream zstd_dstream;
/**
* zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream
* @max_window_size: The maximum window size allowed for compressed frames.
*
* Return: A lower bound on the size of the workspace that is passed
* to zstd_init_dstream().
*/
size_t zstd_dstream_workspace_bound(size_t max_window_size);
/**
* zstd_init_dstream() - initialize a zstd streaming decompression context
* @max_window_size: The maximum window size allowed for compressed frames.
* @workspace: The workspace to emplace the context into. It must outlive
* the returned context.
* @workspaceSize: The size of workspace.
* Use zstd_dstream_workspace_bound(max_window_size) to
* determine how large the workspace must be.
*
* Return: The zstd streaming decompression context.
*/
zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
size_t workspace_size);
/**
* zstd_reset_dstream() - reset the context using parameters from creation
* @dstream: The zstd streaming decompression context to reset.
*
* Resets the context using the parameters from creation. Skips dictionary
* loading, since it can be reused.
*
* Return: Zero or an error, which can be checked using zstd_is_error().
*/
size_t zstd_reset_dstream(zstd_dstream *dstream);
/**
* zstd_decompress_stream() - streaming decompress some of input into output
* @dstream: The zstd streaming decompression context.
* @output: Destination buffer. `output.pos` is updated to indicate how much
* decompressed data was written.
* @input: Source buffer. `input.pos` is updated to indicate how much data was
* read. Note that it may not consume the entire input, in which case
* `input.pos < input.size`, and it's up to the caller to present
* remaining data again.
*
* The `input` and `output` buffers may be any size. Guaranteed to make some
* forward progress if `input` and `output` are not empty.
* zstd_decompress_stream() will not consume the last byte of the frame until
* the entire frame is flushed.
*
* Return: Returns 0 iff a frame is completely decoded and fully flushed.
* Otherwise returns a hint for the number of bytes to use as the
* input for the next function call or an error, which can be checked
* using zstd_is_error(). The size hint will never load more than the
* frame.
*/
size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
zstd_in_buffer *input);
/* ====== Frame Inspection Functions ====== */
/**
* zstd_find_frame_compressed_size() - returns the size of a compressed frame
* @src: Source buffer. It should point to the start of a zstd encoded
* frame or a skippable frame.
* @src_size: The size of the source buffer. It must be at least as large as the
* size of the frame.
*
* Return: The compressed size of the frame pointed to by `src` or an error,
* which can be check with zstd_is_error().
* Suitable to pass to ZSTD_decompress() or similar functions.
*/
size_t zstd_find_frame_compressed_size(const void *src, size_t src_size);
/**
* zstd_register_sequence_producer() - exposes the zstd library function
* ZSTD_registerSequenceProducer(). This is used for the block-level external
* sequence producer API. See upstream zstd.h for detailed documentation.
*/
typedef ZSTD_sequenceProducer_F zstd_sequence_producer_f;
void zstd_register_sequence_producer(
zstd_cctx *cctx,
void* sequence_producer_state,
zstd_sequence_producer_f sequence_producer
);
/**
* struct zstd_frame_params - zstd frame parameters stored in the frame header
* @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not
* present.
* @windowSize: The window size, or 0 if the frame is a skippable frame.
* @blockSizeMax: The maximum block size.
* @frameType: The frame type (zstd or skippable)
* @headerSize: The size of the frame header.
* @dictID: The dictionary id, or 0 if not present.
* @checksumFlag: Whether a checksum was used.
*
* See zstd_lib.h.
*/
typedef ZSTD_FrameHeader zstd_frame_header;
/**
* zstd_get_frame_header() - extracts parameters from a zstd or skippable frame
* @params: On success the frame parameters are written here.
* @src: The source buffer. It must point to a zstd or skippable frame.
* @src_size: The size of the source buffer.
*
* Return: 0 on success. If more data is required it returns how many bytes
* must be provided to make forward progress. Otherwise it returns
* an error, which can be checked using zstd_is_error().
*/
size_t zstd_get_frame_header(zstd_frame_header *params, const void *src,
size_t src_size);
/**
* struct zstd_sequence - a sequence of literals or a match
*
* @offset: The offset of the match
* @litLength: The literal length of the sequence
* @matchLength: The match length of the sequence
* @rep: Represents which repeat offset is used
*/
typedef ZSTD_Sequence zstd_sequence;
/**
* zstd_compress_sequences_and_literals() - compress an array of zstd_sequence and literals
*
* @cctx: The zstd compression context.
* @dst: The buffer to compress the data into.
* @dst_capacity: The size of the destination buffer.
* @in_seqs: The array of zstd_sequence to compress.
* @in_seqs_size: The number of sequences in in_seqs.
* @literals: The literals associated to the sequences to be compressed.
* @lit_size: The size of the literals in the literals buffer.
* @lit_capacity: The size of the literals buffer.
* @decompressed_size: The size of the input data
*
* Return: The compressed size or an error, which can be checked using
* zstd_is_error().
*/
size_t zstd_compress_sequences_and_literals(zstd_cctx *cctx, void* dst, size_t dst_capacity,
const zstd_sequence *in_seqs, size_t in_seqs_size,
const void* literals, size_t lit_size, size_t lit_capacity,
size_t decompressed_size);
#endif /* LINUX_ZSTD_H */

View File

@@ -0,0 +1,262 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef MEM_H_MODULE
#define MEM_H_MODULE
/*-****************************************
* Dependencies
******************************************/
#include <linux/unaligned.h> /* get_unaligned, put_unaligned* */
#include <linux/compiler.h> /* inline */
#include <linux/swab.h> /* swab32, swab64 */
#include <linux/types.h> /* size_t, ptrdiff_t */
#include "debug.h" /* DEBUG_STATIC_ASSERT */
/*-****************************************
* Compiler specifics
******************************************/
#undef MEM_STATIC /* may be already defined from common/compiler.h */
#define MEM_STATIC static inline
/*-**************************************************************
* Basic Types
*****************************************************************/
typedef uint8_t BYTE;
typedef uint8_t U8;
typedef int8_t S8;
typedef uint16_t U16;
typedef int16_t S16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
typedef int64_t S64;
/*-**************************************************************
* Memory I/O API
*****************************************************************/
/*=== Static platform detection ===*/
MEM_STATIC unsigned MEM_32bits(void);
MEM_STATIC unsigned MEM_64bits(void);
MEM_STATIC unsigned MEM_isLittleEndian(void);
/*=== Native unaligned read/write ===*/
MEM_STATIC U16 MEM_read16(const void* memPtr);
MEM_STATIC U32 MEM_read32(const void* memPtr);
MEM_STATIC U64 MEM_read64(const void* memPtr);
MEM_STATIC size_t MEM_readST(const void* memPtr);
MEM_STATIC void MEM_write16(void* memPtr, U16 value);
MEM_STATIC void MEM_write32(void* memPtr, U32 value);
MEM_STATIC void MEM_write64(void* memPtr, U64 value);
/*=== Little endian unaligned read/write ===*/
MEM_STATIC U16 MEM_readLE16(const void* memPtr);
MEM_STATIC U32 MEM_readLE24(const void* memPtr);
MEM_STATIC U32 MEM_readLE32(const void* memPtr);
MEM_STATIC U64 MEM_readLE64(const void* memPtr);
MEM_STATIC size_t MEM_readLEST(const void* memPtr);
MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
/*=== Big endian unaligned read/write ===*/
MEM_STATIC U32 MEM_readBE32(const void* memPtr);
MEM_STATIC U64 MEM_readBE64(const void* memPtr);
MEM_STATIC size_t MEM_readBEST(const void* memPtr);
MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
/*=== Byteswap ===*/
MEM_STATIC U32 MEM_swap32(U32 in);
MEM_STATIC U64 MEM_swap64(U64 in);
MEM_STATIC size_t MEM_swapST(size_t in);
/*-**************************************************************
* Memory I/O Implementation
*****************************************************************/
MEM_STATIC unsigned MEM_32bits(void)
{
return sizeof(size_t) == 4;
}
MEM_STATIC unsigned MEM_64bits(void)
{
return sizeof(size_t) == 8;
}
#if defined(__LITTLE_ENDIAN)
#define MEM_LITTLE_ENDIAN 1
#else
#define MEM_LITTLE_ENDIAN 0
#endif
MEM_STATIC unsigned MEM_isLittleEndian(void)
{
return MEM_LITTLE_ENDIAN;
}
MEM_STATIC U16 MEM_read16(const void *memPtr)
{
return get_unaligned((const U16 *)memPtr);
}
MEM_STATIC U32 MEM_read32(const void *memPtr)
{
return get_unaligned((const U32 *)memPtr);
}
MEM_STATIC U64 MEM_read64(const void *memPtr)
{
return get_unaligned((const U64 *)memPtr);
}
MEM_STATIC size_t MEM_readST(const void *memPtr)
{
return get_unaligned((const size_t *)memPtr);
}
MEM_STATIC void MEM_write16(void *memPtr, U16 value)
{
put_unaligned(value, (U16 *)memPtr);
}
MEM_STATIC void MEM_write32(void *memPtr, U32 value)
{
put_unaligned(value, (U32 *)memPtr);
}
MEM_STATIC void MEM_write64(void *memPtr, U64 value)
{
put_unaligned(value, (U64 *)memPtr);
}
/*=== Little endian r/w ===*/
MEM_STATIC U16 MEM_readLE16(const void *memPtr)
{
return get_unaligned_le16(memPtr);
}
MEM_STATIC void MEM_writeLE16(void *memPtr, U16 val)
{
put_unaligned_le16(val, memPtr);
}
MEM_STATIC U32 MEM_readLE24(const void *memPtr)
{
return MEM_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16);
}
MEM_STATIC void MEM_writeLE24(void *memPtr, U32 val)
{
MEM_writeLE16(memPtr, (U16)val);
((BYTE *)memPtr)[2] = (BYTE)(val >> 16);
}
MEM_STATIC U32 MEM_readLE32(const void *memPtr)
{
return get_unaligned_le32(memPtr);
}
MEM_STATIC void MEM_writeLE32(void *memPtr, U32 val32)
{
put_unaligned_le32(val32, memPtr);
}
MEM_STATIC U64 MEM_readLE64(const void *memPtr)
{
return get_unaligned_le64(memPtr);
}
MEM_STATIC void MEM_writeLE64(void *memPtr, U64 val64)
{
put_unaligned_le64(val64, memPtr);
}
MEM_STATIC size_t MEM_readLEST(const void *memPtr)
{
if (MEM_32bits())
return (size_t)MEM_readLE32(memPtr);
else
return (size_t)MEM_readLE64(memPtr);
}
MEM_STATIC void MEM_writeLEST(void *memPtr, size_t val)
{
if (MEM_32bits())
MEM_writeLE32(memPtr, (U32)val);
else
MEM_writeLE64(memPtr, (U64)val);
}
/*=== Big endian r/w ===*/
MEM_STATIC U32 MEM_readBE32(const void *memPtr)
{
return get_unaligned_be32(memPtr);
}
MEM_STATIC void MEM_writeBE32(void *memPtr, U32 val32)
{
put_unaligned_be32(val32, memPtr);
}
MEM_STATIC U64 MEM_readBE64(const void *memPtr)
{
return get_unaligned_be64(memPtr);
}
MEM_STATIC void MEM_writeBE64(void *memPtr, U64 val64)
{
put_unaligned_be64(val64, memPtr);
}
MEM_STATIC size_t MEM_readBEST(const void *memPtr)
{
if (MEM_32bits())
return (size_t)MEM_readBE32(memPtr);
else
return (size_t)MEM_readBE64(memPtr);
}
MEM_STATIC void MEM_writeBEST(void *memPtr, size_t val)
{
if (MEM_32bits())
MEM_writeBE32(memPtr, (U32)val);
else
MEM_writeBE64(memPtr, (U64)val);
}
MEM_STATIC U32 MEM_swap32(U32 in)
{
return swab32(in);
}
MEM_STATIC U64 MEM_swap64(U64 in)
{
return swab64(in);
}
MEM_STATIC size_t MEM_swapST(size_t in)
{
if (MEM_32bits())
return (size_t)MEM_swap32((U32)in);
else
return (size_t)MEM_swap64((U64)in);
}
#endif /* MEM_H_MODULE */

View File

@@ -0,0 +1,39 @@
# !/bin/sh
set -e
# Benchmarks run on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM.
# The VM is running on a Macbook Pro with a 3.1 GHz Intel Core i7 processor and
# 16 GB of RAM and an SSD.
# $BENCHMARK_DIR is generated with the following commands, from the Ubuntu image
# ubuntu-16.10-desktop-amd64.iso.
# > mkdir mnt
# > sudo mount -o loop ubuntu-16.10-desktop-amd64.iso mnt
# > cp mnt/casper/filesystem.squashfs .
# > sudo unsquashfs filesystem.squashfs
# $HOME is on a ext4 filesystem
BENCHMARK_DIR="$HOME/squashfs-root/"
BENCHMARK_FS="$HOME/filesystem.squashfs"
# Normalize the environment
sudo rm -f $BENCHMARK_FS 2> /dev/null > /dev/null || true
sudo umount /mnt/squashfs 2> /dev/null > /dev/null || true
# Run the benchmark
echo "Compression"
echo "sudo mksquashfs $BENCHMARK_DIR $BENCHMARK_FS $@"
time sudo mksquashfs $BENCHMARK_DIR $BENCHMARK_FS $@ 2> /dev/null > /dev/null
echo "Approximate compression ratio"
printf "%d / %d\n" \
$(sudo du -sx --block-size=1 $BENCHMARK_DIR | cut -f1) \
$(sudo du -sx --block-size=1 $BENCHMARK_FS | cut -f1);
# Mount the filesystem
sudo mount -t squashfs $BENCHMARK_FS /mnt/squashfs
echo "Decompression"
time sudo tar -c /mnt/squashfs 2> /dev/null | wc -c > /dev/null
sudo umount /mnt/squashfs

View File

@@ -0,0 +1,23 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_COMPILER_H
#define LINUX_COMPILER_H
#ifndef inline
#define inline __inline __attribute__((unused))
#endif
#ifndef noinline
#define noinline __attribute__((noinline))
#endif
#define fallthrough __attribute__((__fallthrough__))
#endif

View File

@@ -0,0 +1,15 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_ERRNO_H
#define LINUX_ERRNO_H
#define EINVAL 22
#endif

View File

@@ -0,0 +1,19 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_KERNEL_H
#define LINUX_KERNEL_H
#define WARN_ON(x)
#define PTR_ALIGN(p, a) (typeof(p))ALIGN((unsigned long long)(p), (a))
#define ALIGN(x, a) ALIGN_MASK((x), (a) - 1)
#define ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
#endif

View File

@@ -0,0 +1,15 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_LIMITS_H
#define LINUX_LIMITS_H
#include <limits.h>
#endif

View File

@@ -0,0 +1,15 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_MATH64_H
#define LINUX_MATH64_H
#define div_u64(dividend, divisor) ((dividend) / (divisor))
#endif

View File

@@ -0,0 +1,20 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_MODULE_H
#define LINUX_MODULE_H
#define EXPORT_SYMBOL(symbol) \
void* __##symbol = symbol
#define EXPORT_SYMBOL_GPL(symbol) \
void* __##symbol = symbol
#define MODULE_LICENSE(license)
#define MODULE_DESCRIPTION(description)
#endif

View File

@@ -0,0 +1,15 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_PRINTK_H
#define LINUX_PRINTK_H
#define pr_debug(...)
#endif

View File

@@ -0,0 +1,15 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_STDDEF_H
#define LINUX_STDDEF_H
#include <stddef.h>
#endif

View File

@@ -0,0 +1,16 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_SWAB_H
#define LINUX_SWAB_H
#define swab32(x) __builtin_bswap32((x))
#define swab64(x) __builtin_bswap64((x))
#endif

View File

@@ -0,0 +1,16 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_TYPES_H
#define LINUX_TYPES_H
#include <stddef.h>
#include <stdint.h>
#endif

View File

@@ -0,0 +1,187 @@
#ifndef ASM_UNALIGNED_H
#define ASM_UNALIGNED_H
#include <assert.h>
#include <linux/types.h>
#ifndef __LITTLE_ENDIAN
# if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN__)
# define __LITTLE_ENDIAN 1
# endif
#endif
#ifdef __LITTLE_ENDIAN
# define _IS_LITTLE_ENDIAN 1
#else
# define _IS_LITTLE_ENDIAN 0
#endif
static unsigned _isLittleEndian(void)
{
const union { uint32_t u; uint8_t c[4]; } one = { 1 };
assert(_IS_LITTLE_ENDIAN == one.c[0]);
(void)one;
return _IS_LITTLE_ENDIAN;
}
static uint16_t _swap16(uint16_t in)
{
return ((in & 0xF) << 8) + ((in & 0xF0) >> 8);
}
static uint32_t _swap32(uint32_t in)
{
return __builtin_bswap32(in);
}
static uint64_t _swap64(uint64_t in)
{
return __builtin_bswap64(in);
}
/* Little endian */
static uint16_t get_unaligned_le16(const void* memPtr)
{
uint16_t val;
__builtin_memcpy(&val, memPtr, sizeof(val));
if (!_isLittleEndian()) _swap16(val);
return val;
}
static uint32_t get_unaligned_le32(const void* memPtr)
{
uint32_t val;
__builtin_memcpy(&val, memPtr, sizeof(val));
if (!_isLittleEndian()) _swap32(val);
return val;
}
static uint64_t get_unaligned_le64(const void* memPtr)
{
uint64_t val;
__builtin_memcpy(&val, memPtr, sizeof(val));
if (!_isLittleEndian()) _swap64(val);
return val;
}
static void put_unaligned_le16(uint16_t value, void* memPtr)
{
if (!_isLittleEndian()) value = _swap16(value);
__builtin_memcpy(memPtr, &value, sizeof(value));
}
static void put_unaligned_le32(uint32_t value, void* memPtr)
{
if (!_isLittleEndian()) value = _swap32(value);
__builtin_memcpy(memPtr, &value, sizeof(value));
}
static void put_unaligned_le64(uint64_t value, void* memPtr)
{
if (!_isLittleEndian()) value = _swap64(value);
__builtin_memcpy(memPtr, &value, sizeof(value));
}
/* big endian */
static uint32_t get_unaligned_be32(const void* memPtr)
{
uint32_t val;
__builtin_memcpy(&val, memPtr, sizeof(val));
if (_isLittleEndian()) _swap32(val);
return val;
}
static uint64_t get_unaligned_be64(const void* memPtr)
{
uint64_t val;
__builtin_memcpy(&val, memPtr, sizeof(val));
if (_isLittleEndian()) _swap64(val);
return val;
}
static void put_unaligned_be32(uint32_t value, void* memPtr)
{
if (_isLittleEndian()) value = _swap32(value);
__builtin_memcpy(memPtr, &value, sizeof(value));
}
static void put_unaligned_be64(uint64_t value, void* memPtr)
{
if (_isLittleEndian()) value = _swap64(value);
__builtin_memcpy(memPtr, &value, sizeof(value));
}
/* generic */
extern void __bad_unaligned_access_size(void);
#define __get_unaligned_le(ptr) ((typeof(*(ptr)))({ \
__builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \
__builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_le16((ptr)), \
__builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_le32((ptr)), \
__builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_le64((ptr)), \
__bad_unaligned_access_size())))); \
}))
#define __get_unaligned_be(ptr) ((typeof(*(ptr)))({ \
__builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \
__builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_be16((ptr)), \
__builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_be32((ptr)), \
__builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_be64((ptr)), \
__bad_unaligned_access_size())))); \
}))
#define __put_unaligned_le(val, ptr) \
({ \
void *__gu_p = (ptr); \
switch (sizeof(*(ptr))) { \
case 1: \
*(uint8_t *)__gu_p = (uint8_t)(val); \
break; \
case 2: \
put_unaligned_le16((uint16_t)(val), __gu_p); \
break; \
case 4: \
put_unaligned_le32((uint32_t)(val), __gu_p); \
break; \
case 8: \
put_unaligned_le64((uint64_t)(val), __gu_p); \
break; \
default: \
__bad_unaligned_access_size(); \
break; \
} \
(void)0; \
})
#define __put_unaligned_be(val, ptr) \
({ \
void *__gu_p = (ptr); \
switch (sizeof(*(ptr))) { \
case 1: \
*(uint8_t *)__gu_p = (uint8_t)(val); \
break; \
case 2: \
put_unaligned_be16((uint16_t)(val), __gu_p); \
break; \
case 4: \
put_unaligned_be32((uint32_t)(val), __gu_p); \
break; \
case 8: \
put_unaligned_be64((uint64_t)(val), __gu_p); \
break; \
default: \
__bad_unaligned_access_size(); \
break; \
} \
(void)0; \
})
#if _IS_LITTLE_ENDIAN
# define get_unaligned __get_unaligned_le
# define put_unaligned __put_unaligned_le
#else
# define get_unaligned __get_unaligned_be
# define put_unaligned __put_unaligned_be
#endif
#endif // ASM_UNALIGNED_H

View File

@@ -0,0 +1,745 @@
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2012-2016, Yann Collet.
*
* BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License version 2 as published by the
* Free Software Foundation. This program is dual-licensed; you may select
* either version 2 of the GNU General Public License ("GPL") or BSD license
* ("BSD").
*
* You can contact the author at:
* - xxHash homepage: https://cyan4973.github.io/xxHash/
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
/*
* Notice extracted from xxHash homepage:
*
* xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
* It also successfully passes all tests from the SMHasher suite.
*
* Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2
* Duo @3GHz)
*
* Name Speed Q.Score Author
* xxHash 5.4 GB/s 10
* CrapWow 3.2 GB/s 2 Andrew
* MumurHash 3a 2.7 GB/s 10 Austin Appleby
* SpookyHash 2.0 GB/s 10 Bob Jenkins
* SBox 1.4 GB/s 9 Bret Mulvey
* Lookup3 1.2 GB/s 9 Bob Jenkins
* SuperFastHash 1.2 GB/s 1 Paul Hsieh
* CityHash64 1.05 GB/s 10 Pike & Alakuijala
* FNV 0.55 GB/s 5 Fowler, Noll, Vo
* CRC32 0.43 GB/s 9
* MD5-32 0.33 GB/s 10 Ronald L. Rivest
* SHA1-32 0.28 GB/s 10
*
* Q.Score is a measure of quality of the hash function.
* It depends on successfully passing SMHasher test set.
* 10 is a perfect score.
*
* A 64-bits version, named xxh64 offers much better speed,
* but for 64-bits applications only.
* Name Speed on 64 bits Speed on 32 bits
* xxh64 13.8 GB/s 1.9 GB/s
* xxh32 6.8 GB/s 6.0 GB/s
*/
#ifndef XXHASH_H
#define XXHASH_H
#include <linux/types.h>
#define XXH_API static inline __attribute__((unused))
/*-****************************
* Simple Hash Functions
*****************************/
/**
* xxh32() - calculate the 32-bit hash of the input with a given seed.
*
* @input: The data to hash.
* @length: The length of the data to hash.
* @seed: The seed can be used to alter the result predictably.
*
* Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
*
* Return: The 32-bit hash of the data.
*/
XXH_API uint32_t xxh32(const void *input, size_t length, uint32_t seed);
/**
* xxh64() - calculate the 64-bit hash of the input with a given seed.
*
* @input: The data to hash.
* @length: The length of the data to hash.
* @seed: The seed can be used to alter the result predictably.
*
* This function runs 2x faster on 64-bit systems, but slower on 32-bit systems.
*
* Return: The 64-bit hash of the data.
*/
XXH_API uint64_t xxh64(const void *input, size_t length, uint64_t seed);
/**
* xxhash() - calculate wordsize hash of the input with a given seed
* @input: The data to hash.
* @length: The length of the data to hash.
* @seed: The seed can be used to alter the result predictably.
*
* If the hash does not need to be comparable between machines with
* different word sizes, this function will call whichever of xxh32()
* or xxh64() is faster.
*
* Return: wordsize hash of the data.
*/
static inline unsigned long xxhash(const void *input, size_t length,
uint64_t seed)
{
if (sizeof(size_t) == 8)
return xxh64(input, length, seed);
else
return xxh32(input, length, seed);
}
/*-****************************
* Streaming Hash Functions
*****************************/
/*
* These definitions are only meant to allow allocation of XXH state
* statically, on stack, or in a struct for example.
* Do not use members directly.
*/
/**
* struct xxh32_state - private xxh32 state, do not use members directly
*/
struct xxh32_state {
uint32_t total_len_32;
uint32_t large_len;
uint32_t v1;
uint32_t v2;
uint32_t v3;
uint32_t v4;
uint32_t mem32[4];
uint32_t memsize;
};
/**
* struct xxh32_state - private xxh64 state, do not use members directly
*/
struct xxh64_state {
uint64_t total_len;
uint64_t v1;
uint64_t v2;
uint64_t v3;
uint64_t v4;
uint64_t mem64[4];
uint32_t memsize;
};
/**
* xxh32_reset() - reset the xxh32 state to start a new hashing operation
*
* @state: The xxh32 state to reset.
* @seed: Initialize the hash state with this seed.
*
* Call this function on any xxh32_state to prepare for a new hashing operation.
*/
XXH_API void xxh32_reset(struct xxh32_state *state, uint32_t seed);
/**
* xxh32_update() - hash the data given and update the xxh32 state
*
* @state: The xxh32 state to update.
* @input: The data to hash.
* @length: The length of the data to hash.
*
* After calling xxh32_reset() call xxh32_update() as many times as necessary.
*
* Return: Zero on success, otherwise an error code.
*/
XXH_API int xxh32_update(struct xxh32_state *state, const void *input, size_t length);
/**
* xxh32_digest() - produce the current xxh32 hash
*
* @state: Produce the current xxh32 hash of this state.
*
* A hash value can be produced at any time. It is still possible to continue
* inserting input into the hash state after a call to xxh32_digest(), and
* generate new hashes later on, by calling xxh32_digest() again.
*
* Return: The xxh32 hash stored in the state.
*/
XXH_API uint32_t xxh32_digest(const struct xxh32_state *state);
/**
* xxh64_reset() - reset the xxh64 state to start a new hashing operation
*
* @state: The xxh64 state to reset.
* @seed: Initialize the hash state with this seed.
*/
XXH_API void xxh64_reset(struct xxh64_state *state, uint64_t seed);
/**
* xxh64_update() - hash the data given and update the xxh64 state
* @state: The xxh64 state to update.
* @input: The data to hash.
* @length: The length of the data to hash.
*
* After calling xxh64_reset() call xxh64_update() as many times as necessary.
*
* Return: Zero on success, otherwise an error code.
*/
XXH_API int xxh64_update(struct xxh64_state *state, const void *input, size_t length);
/**
* xxh64_digest() - produce the current xxh64 hash
*
* @state: Produce the current xxh64 hash of this state.
*
* A hash value can be produced at any time. It is still possible to continue
* inserting input into the hash state after a call to xxh64_digest(), and
* generate new hashes later on, by calling xxh64_digest() again.
*
* Return: The xxh64 hash stored in the state.
*/
XXH_API uint64_t xxh64_digest(const struct xxh64_state *state);
/*-**************************
* Utils
***************************/
/**
* xxh32_copy_state() - copy the source state into the destination state
*
* @src: The source xxh32 state.
* @dst: The destination xxh32 state.
*/
XXH_API void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src);
/**
* xxh64_copy_state() - copy the source state into the destination state
*
* @src: The source xxh64 state.
* @dst: The destination xxh64 state.
*/
XXH_API void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src);
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2012-2016, Yann Collet.
*
* BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License version 2 as published by the
* Free Software Foundation. This program is dual-licensed; you may select
* either version 2 of the GNU General Public License ("GPL") or BSD license
* ("BSD").
*
* You can contact the author at:
* - xxHash homepage: https://cyan4973.github.io/xxHash/
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
#include <linux/unaligned.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/xxhash.h>
/*-*************************************
* Macros
**************************************/
#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
#define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r)))
#ifdef __LITTLE_ENDIAN
# define XXH_CPU_LITTLE_ENDIAN 1
#else
# define XXH_CPU_LITTLE_ENDIAN 0
#endif
/*-*************************************
* Constants
**************************************/
static const uint32_t PRIME32_1 = 2654435761U;
static const uint32_t PRIME32_2 = 2246822519U;
static const uint32_t PRIME32_3 = 3266489917U;
static const uint32_t PRIME32_4 = 668265263U;
static const uint32_t PRIME32_5 = 374761393U;
static const uint64_t PRIME64_1 = 11400714785074694791ULL;
static const uint64_t PRIME64_2 = 14029467366897019727ULL;
static const uint64_t PRIME64_3 = 1609587929392839161ULL;
static const uint64_t PRIME64_4 = 9650029242287828579ULL;
static const uint64_t PRIME64_5 = 2870177450012600261ULL;
/*-**************************
* Utils
***************************/
XXH_API void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src)
{
__builtin_memcpy(dst, src, sizeof(*dst));
}
XXH_API void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src)
{
__builtin_memcpy(dst, src, sizeof(*dst));
}
/*-***************************
* Simple Hash Functions
****************************/
static uint32_t xxh32_round(uint32_t seed, const uint32_t input)
{
seed += input * PRIME32_2;
seed = xxh_rotl32(seed, 13);
seed *= PRIME32_1;
return seed;
}
XXH_API uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
{
const uint8_t *p = (const uint8_t *)input;
const uint8_t *b_end = p + len;
uint32_t h32;
if (len >= 16) {
const uint8_t *const limit = b_end - 16;
uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
uint32_t v2 = seed + PRIME32_2;
uint32_t v3 = seed + 0;
uint32_t v4 = seed - PRIME32_1;
do {
v1 = xxh32_round(v1, get_unaligned_le32(p));
p += 4;
v2 = xxh32_round(v2, get_unaligned_le32(p));
p += 4;
v3 = xxh32_round(v3, get_unaligned_le32(p));
p += 4;
v4 = xxh32_round(v4, get_unaligned_le32(p));
p += 4;
} while (p <= limit);
h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +
xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);
} else {
h32 = seed + PRIME32_5;
}
h32 += (uint32_t)len;
while (p + 4 <= b_end) {
h32 += get_unaligned_le32(p) * PRIME32_3;
h32 = xxh_rotl32(h32, 17) * PRIME32_4;
p += 4;
}
while (p < b_end) {
h32 += (*p) * PRIME32_5;
h32 = xxh_rotl32(h32, 11) * PRIME32_1;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
static uint64_t xxh64_round(uint64_t acc, const uint64_t input)
{
acc += input * PRIME64_2;
acc = xxh_rotl64(acc, 31);
acc *= PRIME64_1;
return acc;
}
static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val)
{
val = xxh64_round(0, val);
acc ^= val;
acc = acc * PRIME64_1 + PRIME64_4;
return acc;
}
XXH_API uint64_t xxh64(const void *input, const size_t len, const uint64_t seed)
{
const uint8_t *p = (const uint8_t *)input;
const uint8_t *const b_end = p + len;
uint64_t h64;
if (len >= 32) {
const uint8_t *const limit = b_end - 32;
uint64_t v1 = seed + PRIME64_1 + PRIME64_2;
uint64_t v2 = seed + PRIME64_2;
uint64_t v3 = seed + 0;
uint64_t v4 = seed - PRIME64_1;
do {
v1 = xxh64_round(v1, get_unaligned_le64(p));
p += 8;
v2 = xxh64_round(v2, get_unaligned_le64(p));
p += 8;
v3 = xxh64_round(v3, get_unaligned_le64(p));
p += 8;
v4 = xxh64_round(v4, get_unaligned_le64(p));
p += 8;
} while (p <= limit);
h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
h64 = xxh64_merge_round(h64, v1);
h64 = xxh64_merge_round(h64, v2);
h64 = xxh64_merge_round(h64, v3);
h64 = xxh64_merge_round(h64, v4);
} else {
h64 = seed + PRIME64_5;
}
h64 += (uint64_t)len;
while (p + 8 <= b_end) {
const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
h64 ^= k1;
h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
p += 8;
}
if (p + 4 <= b_end) {
h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
p += 4;
}
while (p < b_end) {
h64 ^= (*p) * PRIME64_5;
h64 = xxh_rotl64(h64, 11) * PRIME64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
h64 ^= h64 >> 29;
h64 *= PRIME64_3;
h64 ^= h64 >> 32;
return h64;
}
/*-**************************************************
* Advanced Hash Functions
***************************************************/
XXH_API void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed)
{
/* use a local state for memcpy() to avoid strict-aliasing warnings */
struct xxh32_state state;
__builtin_memset(&state, 0, sizeof(state));
state.v1 = seed + PRIME32_1 + PRIME32_2;
state.v2 = seed + PRIME32_2;
state.v3 = seed + 0;
state.v4 = seed - PRIME32_1;
__builtin_memcpy(statePtr, &state, sizeof(state));
}
XXH_API void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
{
/* use a local state for memcpy() to avoid strict-aliasing warnings */
struct xxh64_state state;
__builtin_memset(&state, 0, sizeof(state));
state.v1 = seed + PRIME64_1 + PRIME64_2;
state.v2 = seed + PRIME64_2;
state.v3 = seed + 0;
state.v4 = seed - PRIME64_1;
__builtin_memcpy(statePtr, &state, sizeof(state));
}
XXH_API int xxh32_update(struct xxh32_state *state, const void *input, const size_t len)
{
const uint8_t *p = (const uint8_t *)input;
const uint8_t *const b_end = p + len;
if (input == NULL)
return -EINVAL;
state->total_len_32 += (uint32_t)len;
state->large_len |= (len >= 16) | (state->total_len_32 >= 16);
if (state->memsize + len < 16) { /* fill in tmp buffer */
__builtin_memcpy((uint8_t *)(state->mem32) + state->memsize, input, len);
state->memsize += (uint32_t)len;
return 0;
}
if (state->memsize) { /* some data left from previous update */
const uint32_t *p32 = state->mem32;
__builtin_memcpy((uint8_t *)(state->mem32) + state->memsize, input,
16 - state->memsize);
state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32));
p32++;
state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32));
p32++;
state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32));
p32++;
state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32));
p32++;
p += 16-state->memsize;
state->memsize = 0;
}
if (p <= b_end - 16) {
const uint8_t *const limit = b_end - 16;
uint32_t v1 = state->v1;
uint32_t v2 = state->v2;
uint32_t v3 = state->v3;
uint32_t v4 = state->v4;
do {
v1 = xxh32_round(v1, get_unaligned_le32(p));
p += 4;
v2 = xxh32_round(v2, get_unaligned_le32(p));
p += 4;
v3 = xxh32_round(v3, get_unaligned_le32(p));
p += 4;
v4 = xxh32_round(v4, get_unaligned_le32(p));
p += 4;
} while (p <= limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < b_end) {
__builtin_memcpy(state->mem32, p, (size_t)(b_end-p));
state->memsize = (uint32_t)(b_end-p);
}
return 0;
}
XXH_API uint32_t xxh32_digest(const struct xxh32_state *state)
{
const uint8_t *p = (const uint8_t *)state->mem32;
const uint8_t *const b_end = (const uint8_t *)(state->mem32) +
state->memsize;
uint32_t h32;
if (state->large_len) {
h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) +
xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18);
} else {
h32 = state->v3 /* == seed */ + PRIME32_5;
}
h32 += state->total_len_32;
while (p + 4 <= b_end) {
h32 += get_unaligned_le32(p) * PRIME32_3;
h32 = xxh_rotl32(h32, 17) * PRIME32_4;
p += 4;
}
while (p < b_end) {
h32 += (*p) * PRIME32_5;
h32 = xxh_rotl32(h32, 11) * PRIME32_1;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
XXH_API int xxh64_update(struct xxh64_state *state, const void *input, const size_t len)
{
const uint8_t *p = (const uint8_t *)input;
const uint8_t *const b_end = p + len;
if (input == NULL)
return -EINVAL;
state->total_len += len;
if (state->memsize + len < 32) { /* fill in tmp buffer */
__builtin_memcpy(((uint8_t *)state->mem64) + state->memsize, input, len);
state->memsize += (uint32_t)len;
return 0;
}
if (state->memsize) { /* tmp buffer is full */
uint64_t *p64 = state->mem64;
__builtin_memcpy(((uint8_t *)p64) + state->memsize, input,
32 - state->memsize);
state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64));
p64++;
state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64));
p64++;
state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64));
p64++;
state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64));
p += 32 - state->memsize;
state->memsize = 0;
}
if (p + 32 <= b_end) {
const uint8_t *const limit = b_end - 32;
uint64_t v1 = state->v1;
uint64_t v2 = state->v2;
uint64_t v3 = state->v3;
uint64_t v4 = state->v4;
do {
v1 = xxh64_round(v1, get_unaligned_le64(p));
p += 8;
v2 = xxh64_round(v2, get_unaligned_le64(p));
p += 8;
v3 = xxh64_round(v3, get_unaligned_le64(p));
p += 8;
v4 = xxh64_round(v4, get_unaligned_le64(p));
p += 8;
} while (p <= limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < b_end) {
__builtin_memcpy(state->mem64, p, (size_t)(b_end-p));
state->memsize = (uint32_t)(b_end - p);
}
return 0;
}
XXH_API uint64_t xxh64_digest(const struct xxh64_state *state)
{
const uint8_t *p = (const uint8_t *)state->mem64;
const uint8_t *const b_end = (const uint8_t *)state->mem64 +
state->memsize;
uint64_t h64;
if (state->total_len >= 32) {
const uint64_t v1 = state->v1;
const uint64_t v2 = state->v2;
const uint64_t v3 = state->v3;
const uint64_t v4 = state->v4;
h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
h64 = xxh64_merge_round(h64, v1);
h64 = xxh64_merge_round(h64, v2);
h64 = xxh64_merge_round(h64, v3);
h64 = xxh64_merge_round(h64, v4);
} else {
h64 = state->v3 + PRIME64_5;
}
h64 += (uint64_t)state->total_len;
while (p + 8 <= b_end) {
const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
h64 ^= k1;
h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
p += 8;
}
if (p + 4 <= b_end) {
h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
p += 4;
}
while (p < b_end) {
h64 ^= (*p) * PRIME64_5;
h64 = xxh_rotl64(h64, 11) * PRIME64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
h64 ^= h64 >> 29;
h64 *= PRIME64_3;
h64 ^= h64 >> 32;
return h64;
}
#endif /* XXHASH_H */

View File

@@ -0,0 +1,44 @@
#!/usr/bin/env sh
set -e
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
INCLUDE_DIR="$SCRIPT_DIR/../linux/include"
LIB_DIR="$SCRIPT_DIR/../linux/lib"
print() {
printf '%b' "${*}"
}
println() {
printf '%b\n' "${*}"
}
die() {
println "$@" 1>&2
exit 1
}
test_not_present() {
print "Testing that '$1' is not present... "
grep -r $1 "$INCLUDE_DIR" "$LIB_DIR" && die "Fail!"
println "Okay"
}
println "This test checks that the macro removal process worked as expected"
println "If this test fails, then freestanding.py wasn't able to remove one of these"
println "macros from the source code completely. You'll either need to rewrite the check"
println "or improve freestanding.py."
println ""
test_not_present "ZSTD_NO_INTRINSICS"
test_not_present "ZSTD_NO_UNUSED_FUNCTIONS"
test_not_present "ZSTD_LEGACY_SUPPORT"
test_not_present "STATIC_BMI2"
test_not_present "ZSTD_DLL_EXPORT"
test_not_present "ZSTD_DLL_IMPORT"
test_not_present "__ICCARM__"
test_not_present "_MSC_VER"
test_not_present "_WIN32"
test_not_present "__linux__"

View File

@@ -0,0 +1,52 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "decompress_sources.h"
#include <linux/zstd.h>
#define CONTROL(x) \
do { \
if (!(x)) { \
fprintf(stderr, "%s:%u: %s failed!\n", __FUNCTION__, __LINE__, #x); \
abort(); \
} \
} while (0)
static const char kEmptyZstdFrame[] = {
0x28, 0xb5, 0x2f, 0xfd, 0x24, 0x00, 0x01, 0x00, 0x00, 0x99, 0xe9, 0xd8, 0x51
};
static void test_decompress_unzstd(void) {
fprintf(stderr, "Testing decompress unzstd... ");
{
size_t const wkspSize = zstd_dctx_workspace_bound();
void* wksp = malloc(wkspSize);
ZSTD_DCtx* dctx = zstd_init_dctx(wksp, wkspSize);
CONTROL(wksp != NULL);
CONTROL(dctx != NULL);
{
size_t const dSize = zstd_decompress_dctx(dctx, NULL, 0, kEmptyZstdFrame, sizeof(kEmptyZstdFrame));
CONTROL(!zstd_is_error(dSize));
CONTROL(dSize == 0);
}
free(wksp);
}
fprintf(stderr, "Ok\n");
}
int main(void) {
test_decompress_unzstd();
return 0;
}

View File

@@ -0,0 +1,229 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <linux/zstd.h>
#define CONTROL(x) \
do { \
if (!(x)) { \
fprintf(stderr, "%s:%u: %s failed!\n", __FUNCTION__, __LINE__, #x); \
abort(); \
} \
} while (0)
typedef struct {
char *data;
char *data2;
size_t dataSize;
char *comp;
size_t compSize;
} test_data_t;
static test_data_t create_test_data(void) {
test_data_t data;
data.dataSize = 128 * 1024;
data.data = (char*)malloc(data.dataSize);
CONTROL(data.data != NULL);
data.data2 = (char*)malloc(data.dataSize);
CONTROL(data.data2 != NULL);
data.compSize = zstd_compress_bound(data.dataSize);
data.comp = (char*)malloc(data.compSize);
CONTROL(data.comp != NULL);
memset(data.data, 0, data.dataSize);
return data;
}
static void free_test_data(test_data_t const *data) {
free(data->data);
free(data->data2);
free(data->comp);
}
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
static void test_btrfs(test_data_t const *data) {
size_t const size = MIN(data->dataSize, 128 * 1024);
fprintf(stderr, "testing btrfs use cases... ");
for (int level = -1; level < 16; ++level) {
zstd_parameters params = zstd_get_params(level, size);
size_t const workspaceSize =
MAX(zstd_cstream_workspace_bound(&params.cParams),
zstd_dstream_workspace_bound(size));
void *workspace = malloc(workspaceSize);
char const *ip = data->data;
char const *iend = ip + size;
char *op = data->comp;
char *oend = op + data->compSize;
CONTROL(params.cParams.windowLog <= 17);
CONTROL(workspace != NULL);
{
zstd_cstream *cctx = zstd_init_cstream(&params, size, workspace, workspaceSize);
zstd_out_buffer out = {NULL, 0, 0};
zstd_in_buffer in = {NULL, 0, 0};
CONTROL(cctx != NULL);
for (;;) {
if (in.pos == in.size) {
in.src = ip;
in.size = MIN(4096, iend - ip);
in.pos = 0;
ip += in.size;
}
if (out.pos == out.size) {
out.dst = op;
out.size = MIN(4096, oend - op);
out.pos = 0;
op += out.size;
}
if (ip != iend || in.pos < in.size) {
CONTROL(!zstd_is_error(zstd_compress_stream(cctx, &out, &in)));
} else {
size_t const ret = zstd_end_stream(cctx, &out);
CONTROL(!zstd_is_error(ret));
if (ret == 0) {
break;
}
}
}
op += out.pos;
}
ip = data->comp;
iend = op;
op = data->data2;
oend = op + size;
{
zstd_dstream *dctx = zstd_init_dstream(1ULL << params.cParams.windowLog, workspace, workspaceSize);
zstd_out_buffer out = {NULL, 0, 0};
zstd_in_buffer in = {NULL, 0, 0};
CONTROL(dctx != NULL);
for (;;) {
if (in.pos == in.size) {
in.src = ip;
in.size = MIN(4096, iend - ip);
in.pos = 0;
ip += in.size;
}
if (out.pos == out.size) {
out.dst = op;
out.size = MIN(4096, oend - op);
out.pos = 0;
op += out.size;
}
{
size_t const ret = zstd_decompress_stream(dctx, &out, &in);
CONTROL(!zstd_is_error(ret));
if (ret == 0) {
break;
}
}
}
}
CONTROL((size_t)(op - data->data2) == data->dataSize);
CONTROL(!memcmp(data->data, data->data2, data->dataSize));
free(workspace);
}
fprintf(stderr, "Ok\n");
}
static void test_decompress_unzstd(test_data_t const *data) {
size_t cSize;
fprintf(stderr, "Testing decompress unzstd... ");
{
zstd_parameters params = zstd_get_params(19, 0);
size_t const wkspSize = zstd_cctx_workspace_bound(&params.cParams);
void* wksp = malloc(wkspSize);
zstd_cctx* cctx = zstd_init_cctx(wksp, wkspSize);
CONTROL(wksp != NULL);
CONTROL(cctx != NULL);
cSize = zstd_compress_cctx(cctx, data->comp, data->compSize, data->data, data->dataSize, &params);
CONTROL(!zstd_is_error(cSize));
free(wksp);
}
{
size_t const wkspSize = zstd_dctx_workspace_bound();
void* wksp = malloc(wkspSize);
zstd_dctx* dctx = zstd_init_dctx(wksp, wkspSize);
CONTROL(wksp != NULL);
CONTROL(dctx != NULL);
{
size_t const dSize = zstd_decompress_dctx(dctx, data->data2, data->dataSize, data->comp, cSize);
CONTROL(!zstd_is_error(dSize));
CONTROL(dSize == data->dataSize);
}
CONTROL(!memcmp(data->data, data->data2, data->dataSize));
free(wksp);
}
fprintf(stderr, "Ok\n");
}
static void test_f2fs(void) {
fprintf(stderr, "testing f2fs uses... ");
CONTROL(zstd_min_clevel() < 0);
CONTROL(zstd_max_clevel() == 22);
fprintf(stderr, "Ok\n");
}
static char *g_stack = NULL;
static void __attribute__((noinline)) use(void *x) {
asm volatile("" : "+r"(x));
}
static void __attribute__((noinline)) fill_stack(void) {
memset(g_stack, 0x33, 8192);
}
static void __attribute__((noinline)) set_stack(void) {
char stack[8192];
g_stack = stack;
use(g_stack);
}
static void __attribute__((noinline)) check_stack(void) {
size_t cleanStack = 0;
while (cleanStack < 8192 && g_stack[cleanStack] == 0x33) {
++cleanStack;
}
{
size_t const stackSize = 8192 - cleanStack;
fprintf(stderr, "Maximum stack size: %zu\n", stackSize);
CONTROL(stackSize <= 2048 + 512);
}
}
static void test_stack_usage(test_data_t const *data) {
set_stack();
fill_stack();
test_f2fs();
test_btrfs(data);
test_decompress_unzstd(data);
check_stack();
}
int main(void) {
test_data_t data = create_test_data();
test_f2fs();
test_btrfs(&data);
test_decompress_unzstd(&data);
test_stack_usage(&data);
free_test_data(&data);
return 0;
}

View File

@@ -0,0 +1,29 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <linux/module.h>
#include "common/huf.h"
#include "common/fse.h"
#include "common/zstd_internal.h"
// Export symbols shared by compress and decompress into a common module
#undef ZSTD_isError /* defined within zstd_internal.h */
EXPORT_SYMBOL_GPL(FSE_readNCount);
EXPORT_SYMBOL_GPL(HUF_readStats);
EXPORT_SYMBOL_GPL(HUF_readStats_wksp);
EXPORT_SYMBOL_GPL(ZSTD_isError);
EXPORT_SYMBOL_GPL(ZSTD_getErrorName);
EXPORT_SYMBOL_GPL(ZSTD_getErrorCode);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("Zstd Common");

View File

@@ -0,0 +1,237 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/zstd.h>
#include "common/zstd_deps.h"
#include "common/zstd_internal.h"
#include "compress/zstd_compress_internal.h"
#define ZSTD_FORWARD_IF_ERR(ret) \
do { \
size_t const __ret = (ret); \
if (ZSTD_isError(__ret)) \
return __ret; \
} while (0)
static size_t zstd_cctx_init(zstd_cctx *cctx, const zstd_parameters *parameters,
unsigned long long pledged_src_size)
{
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_reset(
cctx, ZSTD_reset_session_and_parameters));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setPledgedSrcSize(
cctx, pledged_src_size));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_windowLog, parameters->cParams.windowLog));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_hashLog, parameters->cParams.hashLog));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_chainLog, parameters->cParams.chainLog));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_searchLog, parameters->cParams.searchLog));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_minMatch, parameters->cParams.minMatch));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_targetLength, parameters->cParams.targetLength));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_strategy, parameters->cParams.strategy));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_contentSizeFlag, parameters->fParams.contentSizeFlag));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_checksumFlag, parameters->fParams.checksumFlag));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_dictIDFlag, !parameters->fParams.noDictIDFlag));
return 0;
}
int zstd_min_clevel(void)
{
return ZSTD_minCLevel();
}
EXPORT_SYMBOL(zstd_min_clevel);
int zstd_max_clevel(void)
{
return ZSTD_maxCLevel();
}
EXPORT_SYMBOL(zstd_max_clevel);
size_t zstd_compress_bound(size_t src_size)
{
return ZSTD_compressBound(src_size);
}
EXPORT_SYMBOL(zstd_compress_bound);
zstd_parameters zstd_get_params(int level,
unsigned long long estimated_src_size)
{
return ZSTD_getParams(level, estimated_src_size, 0);
}
EXPORT_SYMBOL(zstd_get_params);
size_t zstd_cctx_set_param(zstd_cctx *cctx, ZSTD_cParameter param, int value)
{
return ZSTD_CCtx_setParameter(cctx, param, value);
}
EXPORT_SYMBOL(zstd_cctx_set_param);
size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
{
return ZSTD_estimateCCtxSize_usingCParams(*cparams);
}
EXPORT_SYMBOL(zstd_cctx_workspace_bound);
// Used by zstd_cctx_workspace_bound_with_ext_seq_prod()
static size_t dummy_external_sequence_producer(
void *sequenceProducerState,
ZSTD_Sequence *outSeqs, size_t outSeqsCapacity,
const void *src, size_t srcSize,
const void *dict, size_t dictSize,
int compressionLevel,
size_t windowSize)
{
(void)sequenceProducerState;
(void)outSeqs; (void)outSeqsCapacity;
(void)src; (void)srcSize;
(void)dict; (void)dictSize;
(void)compressionLevel;
(void)windowSize;
return ZSTD_SEQUENCE_PRODUCER_ERROR;
}
static void init_cctx_params_from_compress_params(
ZSTD_CCtx_params *cctx_params,
const zstd_compression_parameters *compress_params)
{
ZSTD_parameters zstd_params;
memset(&zstd_params, 0, sizeof(zstd_params));
zstd_params.cParams = *compress_params;
ZSTD_CCtxParams_init_advanced(cctx_params, zstd_params);
}
size_t zstd_cctx_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *compress_params)
{
ZSTD_CCtx_params cctx_params;
init_cctx_params_from_compress_params(&cctx_params, compress_params);
ZSTD_CCtxParams_registerSequenceProducer(&cctx_params, NULL, dummy_external_sequence_producer);
return ZSTD_estimateCCtxSize_usingCCtxParams(&cctx_params);
}
EXPORT_SYMBOL(zstd_cctx_workspace_bound_with_ext_seq_prod);
size_t zstd_cstream_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *compress_params)
{
ZSTD_CCtx_params cctx_params;
init_cctx_params_from_compress_params(&cctx_params, compress_params);
ZSTD_CCtxParams_registerSequenceProducer(&cctx_params, NULL, dummy_external_sequence_producer);
return ZSTD_estimateCStreamSize_usingCCtxParams(&cctx_params);
}
EXPORT_SYMBOL(zstd_cstream_workspace_bound_with_ext_seq_prod);
zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size)
{
if (workspace == NULL)
return NULL;
return ZSTD_initStaticCCtx(workspace, workspace_size);
}
EXPORT_SYMBOL(zstd_init_cctx);
size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
const void *src, size_t src_size, const zstd_parameters *parameters)
{
ZSTD_FORWARD_IF_ERR(zstd_cctx_init(cctx, parameters, src_size));
return ZSTD_compress2(cctx, dst, dst_capacity, src, src_size);
}
EXPORT_SYMBOL(zstd_compress_cctx);
size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
{
return ZSTD_estimateCStreamSize_usingCParams(*cparams);
}
EXPORT_SYMBOL(zstd_cstream_workspace_bound);
zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
{
zstd_cstream *cstream;
if (workspace == NULL)
return NULL;
cstream = ZSTD_initStaticCStream(workspace, workspace_size);
if (cstream == NULL)
return NULL;
/* 0 means unknown in linux zstd API but means 0 in new zstd API */
if (pledged_src_size == 0)
pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
if (ZSTD_isError(zstd_cctx_init(cstream, parameters, pledged_src_size)))
return NULL;
return cstream;
}
EXPORT_SYMBOL(zstd_init_cstream);
size_t zstd_reset_cstream(zstd_cstream *cstream,
unsigned long long pledged_src_size)
{
if (pledged_src_size == 0)
pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
ZSTD_FORWARD_IF_ERR( ZSTD_CCtx_reset(cstream, ZSTD_reset_session_only) );
ZSTD_FORWARD_IF_ERR( ZSTD_CCtx_setPledgedSrcSize(cstream, pledged_src_size) );
return 0;
}
EXPORT_SYMBOL(zstd_reset_cstream);
size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
zstd_in_buffer *input)
{
return ZSTD_compressStream(cstream, output, input);
}
EXPORT_SYMBOL(zstd_compress_stream);
size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output)
{
return ZSTD_flushStream(cstream, output);
}
EXPORT_SYMBOL(zstd_flush_stream);
size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output)
{
return ZSTD_endStream(cstream, output);
}
EXPORT_SYMBOL(zstd_end_stream);
void zstd_register_sequence_producer(
zstd_cctx *cctx,
void* sequence_producer_state,
zstd_sequence_producer_f sequence_producer
) {
ZSTD_registerSequenceProducer(cctx, sequence_producer_state, sequence_producer);
}
EXPORT_SYMBOL(zstd_register_sequence_producer);
size_t zstd_compress_sequences_and_literals(zstd_cctx *cctx, void* dst, size_t dst_capacity,
const zstd_sequence *in_seqs, size_t in_seqs_size,
const void* literals, size_t lit_size, size_t lit_capacity,
size_t decompressed_size)
{
return ZSTD_compressSequencesAndLiterals(cctx, dst, dst_capacity, in_seqs,
in_seqs_size, literals, lit_size,
lit_capacity, decompressed_size);
}
EXPORT_SYMBOL(zstd_compress_sequences_and_literals);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("Zstd Compressor");

View File

@@ -0,0 +1,105 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/zstd.h>
#include "common/zstd_deps.h"
/* Common symbols. zstd_compress must depend on zstd_decompress. */
unsigned int zstd_is_error(size_t code)
{
return ZSTD_isError(code);
}
EXPORT_SYMBOL(zstd_is_error);
zstd_error_code zstd_get_error_code(size_t code)
{
return ZSTD_getErrorCode(code);
}
EXPORT_SYMBOL(zstd_get_error_code);
const char *zstd_get_error_name(size_t code)
{
return ZSTD_getErrorName(code);
}
EXPORT_SYMBOL(zstd_get_error_name);
/* Decompression symbols. */
size_t zstd_dctx_workspace_bound(void)
{
return ZSTD_estimateDCtxSize();
}
EXPORT_SYMBOL(zstd_dctx_workspace_bound);
zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size)
{
if (workspace == NULL)
return NULL;
return ZSTD_initStaticDCtx(workspace, workspace_size);
}
EXPORT_SYMBOL(zstd_init_dctx);
size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
const void *src, size_t src_size)
{
return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size);
}
EXPORT_SYMBOL(zstd_decompress_dctx);
size_t zstd_dstream_workspace_bound(size_t max_window_size)
{
return ZSTD_estimateDStreamSize(max_window_size);
}
EXPORT_SYMBOL(zstd_dstream_workspace_bound);
zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
size_t workspace_size)
{
if (workspace == NULL)
return NULL;
(void)max_window_size;
return ZSTD_initStaticDStream(workspace, workspace_size);
}
EXPORT_SYMBOL(zstd_init_dstream);
size_t zstd_reset_dstream(zstd_dstream *dstream)
{
return ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only);
}
EXPORT_SYMBOL(zstd_reset_dstream);
size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
zstd_in_buffer *input)
{
return ZSTD_decompressStream(dstream, output, input);
}
EXPORT_SYMBOL(zstd_decompress_stream);
size_t zstd_find_frame_compressed_size(const void *src, size_t src_size)
{
return ZSTD_findFrameCompressedSize(src, src_size);
}
EXPORT_SYMBOL(zstd_find_frame_compressed_size);
size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
size_t src_size)
{
return ZSTD_getFrameHeader(header, src, src_size);
}
EXPORT_SYMBOL(zstd_get_frame_header);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("Zstd Decompressor");

View File

@@ -0,0 +1,121 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/*
* This file provides common libc dependencies that zstd requires.
* The purpose is to allow replacing this file with a custom implementation
* to compile zstd without libc support.
*/
/* Need:
* NULL
* INT_MAX
* UINT_MAX
* ZSTD_memcpy()
* ZSTD_memset()
* ZSTD_memmove()
*/
#ifndef ZSTD_DEPS_COMMON
#define ZSTD_DEPS_COMMON
#include <linux/limits.h>
#include <linux/stddef.h>
#define ZSTD_memcpy(d,s,n) __builtin_memcpy((d),(s),(n))
#define ZSTD_memmove(d,s,n) __builtin_memmove((d),(s),(n))
#define ZSTD_memset(d,s,n) __builtin_memset((d),(s),(n))
#endif /* ZSTD_DEPS_COMMON */
/*
* Define malloc as always failing. That means the user must
* either use ZSTD_customMem or statically allocate memory.
* Need:
* ZSTD_malloc()
* ZSTD_free()
* ZSTD_calloc()
*/
#ifdef ZSTD_DEPS_NEED_MALLOC
#ifndef ZSTD_DEPS_MALLOC
#define ZSTD_DEPS_MALLOC
#define ZSTD_malloc(s) ({ (void)(s); NULL; })
#define ZSTD_free(p) ((void)(p))
#define ZSTD_calloc(n,s) ({ (void)(n); (void)(s); NULL; })
#endif /* ZSTD_DEPS_MALLOC */
#endif /* ZSTD_DEPS_NEED_MALLOC */
/*
* Provides 64-bit math support.
* Need:
* U64 ZSTD_div64(U64 dividend, U32 divisor)
*/
#ifdef ZSTD_DEPS_NEED_MATH64
#ifndef ZSTD_DEPS_MATH64
#define ZSTD_DEPS_MATH64
#include <linux/math64.h>
static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
return div_u64(dividend, divisor);
}
#endif /* ZSTD_DEPS_MATH64 */
#endif /* ZSTD_DEPS_NEED_MATH64 */
/*
* This is only requested when DEBUGLEVEL >= 1, meaning
* it is disabled in production.
* Need:
* assert()
*/
#ifdef ZSTD_DEPS_NEED_ASSERT
#ifndef ZSTD_DEPS_ASSERT
#define ZSTD_DEPS_ASSERT
#include <linux/kernel.h>
#define assert(x) WARN_ON(!(x))
#endif /* ZSTD_DEPS_ASSERT */
#endif /* ZSTD_DEPS_NEED_ASSERT */
/*
* This is only requested when DEBUGLEVEL >= 2, meaning
* it is disabled in production.
* Need:
* ZSTD_DEBUG_PRINT()
*/
#ifdef ZSTD_DEPS_NEED_IO
#ifndef ZSTD_DEPS_IO
#define ZSTD_DEPS_IO
#include <linux/printk.h>
#define ZSTD_DEBUG_PRINT(...) pr_debug(__VA_ARGS__)
#endif /* ZSTD_DEPS_IO */
#endif /* ZSTD_DEPS_NEED_IO */
/*
* Only requested when MSAN is enabled.
* Need:
* intptr_t
*/
#ifdef ZSTD_DEPS_NEED_STDINT
#ifndef ZSTD_DEPS_STDINT
#define ZSTD_DEPS_STDINT
/* intptr_t already provided by ZSTD_DEPS_COMMON */
#endif /* ZSTD_DEPS_STDINT */
#endif /* ZSTD_DEPS_NEED_STDINT */

View File

@@ -0,0 +1,42 @@
## Edit Distance Match Finder
```
/* This match finder leverages techniques used in file comparison algorithms
* to find matches between a dictionary and a source file.
*
* The original motivation for studying this approach was to try and optimize
* Zstandard for the use case of patching: the most common scenario being
* updating an existing software package with the next version. When patching,
* the difference between the old version of the package and the new version
* is generally tiny (most of the new file will be identical to
* the old one). In more technical terms, the edit distance (the minimal number
* of changes required to take one sequence of bytes to another) between the
* files would be small relative to the size of the file.
*
* Various 'diffing' algorithms utilize this notion of edit distance and
* the corresponding concept of a minimal edit script between two
* sequences to identify the regions within two files where they differ.
* The core algorithm used in this match finder is described in:
*
* "An O(ND) Difference Algorithm and its Variations", Eugene W. Myers,
* Algorithmica Vol. 1, 1986, pp. 251-266,
* <https://doi.org/10.1007/BF01840446>.
*
* Additional algorithmic heuristics for speed improvement have also been included.
* These we inspired from implementations of various regular and binary diffing
* algorithms such as GNU diff, bsdiff, and Xdelta.
*
* Note: after some experimentation, this approach proved to not provide enough
* utility to justify the additional CPU used in finding matches. The one area
* where this approach consistently outperforms Zstandard even on level 19 is
* when compressing small files (<10 KB) using an equally small dictionary that
* is very similar to the source file. For the use case that this was intended,
* (large similar files) this approach by itself took 5-10X longer than zstd-19 and
* generally resulted in 2-3X larger files. The core advantage that zstd-19 has
* over this approach for match finding is the overlapping matches. This approach
* cannot find any.
*
* I'm leaving this in the contrib section in case this ever becomes interesting
* to explore again.
* */
```

View File

@@ -0,0 +1,558 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/*-*************************************
* Dependencies
***************************************/
/* Currently relies on qsort when combining contiguous matches. This can probably
* be avoided but would require changes to the algorithm. The qsort is far from
* the bottleneck in this algorithm even for medium sized files so it's probably
* not worth trying to address */
#include <stdlib.h>
#include <assert.h>
#include "zstd_edist.h"
#include "mem.h"
/*-*************************************
* Constants
***************************************/
/* Just a sential for the entries of the diagonal matrix */
#define ZSTD_EDIST_DIAG_MAX (S32)(1 << 30)
/* How large should a snake be to be considered a 'big' snake.
* For an explanation of what a 'snake' is with respect to the
* edit distance matrix, see the linked paper in zstd_edist.h */
#define ZSTD_EDIST_SNAKE_THRESH 20
/* After how many iterations should we start to use the heuristic
* based on 'big' snakes */
#define ZSTD_EDIST_SNAKE_ITER_THRESH 200
/* After how many iterations should be just give up and take
* the best available edit script for this round */
#define ZSTD_EDIST_EXPENSIVE_THRESH 1024
/*-*************************************
* Structures
***************************************/
typedef struct {
U32 dictIdx;
U32 srcIdx;
U32 matchLength;
} ZSTD_eDist_match;
typedef struct {
const BYTE* dict;
const BYTE* src;
size_t dictSize;
size_t srcSize;
S32* forwardDiag; /* Entries of the forward diagonal stored here */
S32* backwardDiag; /* Entries of the backward diagonal stored here.
* Note: this buffer and the 'forwardDiag' buffer
* are contiguous. See the ZSTD_eDist_genSequences */
ZSTD_eDist_match* matches; /* Accumulate matches of length 1 in this buffer.
* In a subsequence post-processing step, we combine
* contiguous matches. */
U32 nbMatches;
} ZSTD_eDist_state;
typedef struct {
S32 dictMid; /* The mid diagonal for the dictionary */
S32 srcMid; /* The mid diagonal for the source */
int lowUseHeuristics; /* Should we use heuristics for the low part */
int highUseHeuristics; /* Should we use heuristics for the high part */
} ZSTD_eDist_partition;
/*-*************************************
* Internal
***************************************/
static void ZSTD_eDist_diag(ZSTD_eDist_state* state,
ZSTD_eDist_partition* partition,
S32 dictLow, S32 dictHigh, S32 srcLow,
S32 srcHigh, int useHeuristics)
{
S32* const forwardDiag = state->forwardDiag;
S32* const backwardDiag = state->backwardDiag;
const BYTE* const dict = state->dict;
const BYTE* const src = state->src;
S32 const diagMin = dictLow - srcHigh;
S32 const diagMax = dictHigh - srcLow;
S32 const forwardMid = dictLow - srcLow;
S32 const backwardMid = dictHigh - srcHigh;
S32 forwardMin = forwardMid;
S32 forwardMax = forwardMid;
S32 backwardMin = backwardMid;
S32 backwardMax = backwardMid;
int odd = (forwardMid - backwardMid) & 1;
U32 iterations;
forwardDiag[forwardMid] = dictLow;
backwardDiag[backwardMid] = dictHigh;
/* Main loop for updating diag entries. Unless useHeuristics is
* set to false, this loop will run until it finds the minimal
* edit script */
for (iterations = 1;;iterations++) {
S32 diag;
int bigSnake = 0;
if (forwardMin > diagMin) {
forwardMin--;
forwardDiag[forwardMin - 1] = -1;
} else {
forwardMin++;
}
if (forwardMax < diagMax) {
forwardMax++;
forwardDiag[forwardMax + 1] = -1;
} else {
forwardMax--;
}
for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
S32 dictIdx;
S32 srcIdx;
S32 low = forwardDiag[diag - 1];
S32 high = forwardDiag[diag + 1];
S32 dictIdx0 = low < high ? high : low + 1;
for (dictIdx = dictIdx0, srcIdx = dictIdx0 - diag;
dictIdx < dictHigh && srcIdx < srcHigh && dict[dictIdx] == src[srcIdx];
dictIdx++, srcIdx++) continue;
if (dictIdx - dictIdx0 > ZSTD_EDIST_SNAKE_THRESH)
bigSnake = 1;
forwardDiag[diag] = dictIdx;
if (odd && backwardMin <= diag && diag <= backwardMax && backwardDiag[diag] <= dictIdx) {
partition->dictMid = dictIdx;
partition->srcMid = srcIdx;
partition->lowUseHeuristics = 0;
partition->highUseHeuristics = 0;
return;
}
}
if (backwardMin > diagMin) {
backwardMin--;
backwardDiag[backwardMin - 1] = ZSTD_EDIST_DIAG_MAX;
} else {
backwardMin++;
}
if (backwardMax < diagMax) {
backwardMax++;
backwardDiag[backwardMax + 1] = ZSTD_EDIST_DIAG_MAX;
} else {
backwardMax--;
}
for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
S32 dictIdx;
S32 srcIdx;
S32 low = backwardDiag[diag - 1];
S32 high = backwardDiag[diag + 1];
S32 dictIdx0 = low < high ? low : high - 1;
for (dictIdx = dictIdx0, srcIdx = dictIdx0 - diag;
dictLow < dictIdx && srcLow < srcIdx && dict[dictIdx - 1] == src[srcIdx - 1];
dictIdx--, srcIdx--) continue;
if (dictIdx0 - dictIdx > ZSTD_EDIST_SNAKE_THRESH)
bigSnake = 1;
backwardDiag[diag] = dictIdx;
if (!odd && forwardMin <= diag && diag <= forwardMax && dictIdx <= forwardDiag[diag]) {
partition->dictMid = dictIdx;
partition->srcMid = srcIdx;
partition->lowUseHeuristics = 0;
partition->highUseHeuristics = 0;
return;
}
}
if (!useHeuristics)
continue;
/* Everything under this point is a heuristic. Using these will
* substantially speed up the match finding. In some cases, taking
* the total match finding time from several minutes to seconds.
* Of course, the caveat is that the edit script found may no longer
* be optimal */
/* Big snake heuristic */
if (iterations > ZSTD_EDIST_SNAKE_ITER_THRESH && bigSnake) {
{
S32 best = 0;
for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
S32 diagDiag = diag - forwardMid;
S32 dictIdx = forwardDiag[diag];
S32 srcIdx = dictIdx - diag;
S32 v = (dictIdx - dictLow) * 2 - diagDiag;
if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) {
if (v > best
&& dictLow + ZSTD_EDIST_SNAKE_THRESH <= dictIdx && dictIdx <= dictHigh
&& srcLow + ZSTD_EDIST_SNAKE_THRESH <= srcIdx && srcIdx <= srcHigh) {
S32 k;
for (k = 1; dict[dictIdx - k] == src[srcIdx - k]; k++) {
if (k == ZSTD_EDIST_SNAKE_THRESH) {
best = v;
partition->dictMid = dictIdx;
partition->srcMid = srcIdx;
break;
}
}
}
}
}
if (best > 0) {
partition->lowUseHeuristics = 0;
partition->highUseHeuristics = 1;
return;
}
}
{
S32 best = 0;
for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
S32 diagDiag = diag - backwardMid;
S32 dictIdx = backwardDiag[diag];
S32 srcIdx = dictIdx - diag;
S32 v = (dictHigh - dictIdx) * 2 + diagDiag;
if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) {
if (v > best
&& dictLow < dictIdx && dictIdx <= dictHigh - ZSTD_EDIST_SNAKE_THRESH
&& srcLow < srcIdx && srcIdx <= srcHigh - ZSTD_EDIST_SNAKE_THRESH) {
int k;
for (k = 0; dict[dictIdx + k] == src[srcIdx + k]; k++) {
if (k == ZSTD_EDIST_SNAKE_THRESH - 1) {
best = v;
partition->dictMid = dictIdx;
partition->srcMid = srcIdx;
break;
}
}
}
}
}
if (best > 0) {
partition->lowUseHeuristics = 1;
partition->highUseHeuristics = 0;
return;
}
}
}
/* More general 'too expensive' heuristic */
if (iterations >= ZSTD_EDIST_EXPENSIVE_THRESH) {
S32 forwardDictSrcBest;
S32 forwardDictBest = 0;
S32 backwardDictSrcBest;
S32 backwardDictBest = 0;
forwardDictSrcBest = -1;
for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
S32 dictIdx = MIN(forwardDiag[diag], dictHigh);
S32 srcIdx = dictIdx - diag;
if (srcHigh < srcIdx) {
dictIdx = srcHigh + diag;
srcIdx = srcHigh;
}
if (forwardDictSrcBest < dictIdx + srcIdx) {
forwardDictSrcBest = dictIdx + srcIdx;
forwardDictBest = dictIdx;
}
}
backwardDictSrcBest = ZSTD_EDIST_DIAG_MAX;
for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
S32 dictIdx = MAX(dictLow, backwardDiag[diag]);
S32 srcIdx = dictIdx - diag;
if (srcIdx < srcLow) {
dictIdx = srcLow + diag;
srcIdx = srcLow;
}
if (dictIdx + srcIdx < backwardDictSrcBest) {
backwardDictSrcBest = dictIdx + srcIdx;
backwardDictBest = dictIdx;
}
}
if ((dictHigh + srcHigh) - backwardDictSrcBest < forwardDictSrcBest - (dictLow + srcLow)) {
partition->dictMid = forwardDictBest;
partition->srcMid = forwardDictSrcBest - forwardDictBest;
partition->lowUseHeuristics = 0;
partition->highUseHeuristics = 1;
} else {
partition->dictMid = backwardDictBest;
partition->srcMid = backwardDictSrcBest - backwardDictBest;
partition->lowUseHeuristics = 1;
partition->highUseHeuristics = 0;
}
return;
}
}
}
static void ZSTD_eDist_insertMatch(ZSTD_eDist_state* state,
S32 const dictIdx, S32 const srcIdx)
{
state->matches[state->nbMatches].dictIdx = dictIdx;
state->matches[state->nbMatches].srcIdx = srcIdx;
state->matches[state->nbMatches].matchLength = 1;
state->nbMatches++;
}
static int ZSTD_eDist_compare(ZSTD_eDist_state* state,
S32 dictLow, S32 dictHigh, S32 srcLow,
S32 srcHigh, int useHeuristics)
{
const BYTE* const dict = state->dict;
const BYTE* const src = state->src;
/* Found matches while traversing from the low end */
while (dictLow < dictHigh && srcLow < srcHigh && dict[dictLow] == src[srcLow]) {
ZSTD_eDist_insertMatch(state, dictLow, srcLow);
dictLow++;
srcLow++;
}
/* Found matches while traversing from the high end */
while (dictLow < dictHigh && srcLow < srcHigh && dict[dictHigh - 1] == src[srcHigh - 1]) {
ZSTD_eDist_insertMatch(state, dictHigh - 1, srcHigh - 1);
dictHigh--;
srcHigh--;
}
/* If the low and high end end up touching. If we wanted to make
* note of the differences like most diffing algorithms do, we would
* do so here. In our case, we're only concerned with matches
* Note: if you wanted to find the edit distance of the algorithm,
* you could just accumulate the cost for an insertion/deletion
* below. */
if (dictLow == dictHigh) {
while (srcLow < srcHigh) {
/* Reaching this point means inserting src[srcLow] into
* the current position of dict */
srcLow++;
}
} else if (srcLow == srcHigh) {
while (dictLow < dictHigh) {
/* Reaching this point means deleting dict[dictLow] from
* the current position of dict */
dictLow++;
}
} else {
ZSTD_eDist_partition partition;
partition.dictMid = 0;
partition.srcMid = 0;
ZSTD_eDist_diag(state, &partition, dictLow, dictHigh,
srcLow, srcHigh, useHeuristics);
if (ZSTD_eDist_compare(state, dictLow, partition.dictMid,
srcLow, partition.srcMid, partition.lowUseHeuristics))
return 1;
if (ZSTD_eDist_compare(state, partition.dictMid, dictHigh,
partition.srcMid, srcHigh, partition.highUseHeuristics))
return 1;
}
return 0;
}
static int ZSTD_eDist_matchComp(const void* p, const void* q)
{
S32 const l = ((ZSTD_eDist_match*)p)->srcIdx;
S32 const r = ((ZSTD_eDist_match*)q)->srcIdx;
return (l - r);
}
/* The matches from the approach above will all be of the form
* (dictIdx, srcIdx, 1). This method combines contiguous matches
* of length MINMATCH or greater. Matches less than MINMATCH
* are discarded */
static void ZSTD_eDist_combineMatches(ZSTD_eDist_state* state)
{
/* Create a new buffer to put the combined matches into
* and memcpy to state->matches after */
ZSTD_eDist_match* combinedMatches =
ZSTD_malloc(state->nbMatches * sizeof(ZSTD_eDist_match),
ZSTD_defaultCMem);
U32 nbCombinedMatches = 1;
size_t i;
/* Make sure that the srcIdx and dictIdx are in sorted order.
* The combination step won't work otherwise */
qsort(state->matches, state->nbMatches, sizeof(ZSTD_eDist_match), ZSTD_eDist_matchComp);
memcpy(combinedMatches, state->matches, sizeof(ZSTD_eDist_match));
for (i = 1; i < state->nbMatches; i++) {
ZSTD_eDist_match const match = state->matches[i];
ZSTD_eDist_match const combinedMatch =
combinedMatches[nbCombinedMatches - 1];
if (combinedMatch.srcIdx + combinedMatch.matchLength == match.srcIdx &&
combinedMatch.dictIdx + combinedMatch.matchLength == match.dictIdx) {
combinedMatches[nbCombinedMatches - 1].matchLength++;
} else {
/* Discard matches that are less than MINMATCH */
if (combinedMatches[nbCombinedMatches - 1].matchLength < MINMATCH) {
nbCombinedMatches--;
}
memcpy(combinedMatches + nbCombinedMatches,
state->matches + i, sizeof(ZSTD_eDist_match));
nbCombinedMatches++;
}
}
memcpy(state->matches, combinedMatches, nbCombinedMatches * sizeof(ZSTD_eDist_match));
state->nbMatches = nbCombinedMatches;
ZSTD_free(combinedMatches, ZSTD_defaultCMem);
}
static size_t ZSTD_eDist_convertMatchesToSequences(ZSTD_Sequence* sequences,
ZSTD_eDist_state* state)
{
const ZSTD_eDist_match* matches = state->matches;
size_t const nbMatches = state->nbMatches;
size_t const dictSize = state->dictSize;
size_t nbSequences = 0;
size_t i;
for (i = 0; i < nbMatches; i++) {
ZSTD_eDist_match const match = matches[i];
U32 const litLength = !i ? match.srcIdx :
match.srcIdx - (matches[i - 1].srcIdx + matches[i - 1].matchLength);
U32 const offset = (match.srcIdx + dictSize) - match.dictIdx;
U32 const matchLength = match.matchLength;
sequences[nbSequences].offset = offset;
sequences[nbSequences].litLength = litLength;
sequences[nbSequences].matchLength = matchLength;
nbSequences++;
}
return nbSequences;
}
/*-*************************************
* Internal utils
***************************************/
static size_t ZSTD_eDist_hamingDist(const BYTE* const a,
const BYTE* const b, size_t n)
{
size_t i;
size_t dist = 0;
for (i = 0; i < n; i++)
dist += a[i] != b[i];
return dist;
}
/* This is a pretty naive recursive implementation that should only
* be used for quick tests obviously. Don't try and run this on a
* GB file or something. There are faster implementations. Use those
* if you need to run it for large files. */
static size_t ZSTD_eDist_levenshteinDist(const BYTE* const s,
size_t const sn, const BYTE* const t,
size_t const tn)
{
size_t a, b, c;
if (!sn)
return tn;
if (!tn)
return sn;
if (s[sn - 1] == t[tn - 1])
return ZSTD_eDist_levenshteinDist(
s, sn - 1, t, tn - 1);
a = ZSTD_eDist_levenshteinDist(s, sn - 1, t, tn - 1);
b = ZSTD_eDist_levenshteinDist(s, sn, t, tn - 1);
c = ZSTD_eDist_levenshteinDist(s, sn - 1, t, tn);
if (a > b)
a = b;
if (a > c)
a = c;
return a + 1;
}
static void ZSTD_eDist_validateMatches(ZSTD_eDist_match* matches,
size_t const nbMatches, const BYTE* const dict,
size_t const dictSize, const BYTE* const src,
size_t const srcSize)
{
size_t i;
for (i = 0; i < nbMatches; i++) {
ZSTD_eDist_match match = matches[i];
U32 const dictIdx = match.dictIdx;
U32 const srcIdx = match.srcIdx;
U32 const matchLength = match.matchLength;
assert(dictIdx + matchLength < dictSize);
assert(srcIdx + matchLength < srcSize);
assert(!memcmp(dict + dictIdx, src + srcIdx, matchLength));
}
}
/*-*************************************
* API
***************************************/
size_t ZSTD_eDist_genSequences(ZSTD_Sequence* sequences,
const void* dict, size_t dictSize,
const void* src, size_t srcSize,
int useHeuristics)
{
size_t const nbDiags = dictSize + srcSize + 3;
S32* buffer = ZSTD_malloc(nbDiags * 2 * sizeof(S32), ZSTD_defaultCMem);
ZSTD_eDist_state state;
size_t nbSequences = 0;
state.dict = (const BYTE*)dict;
state.src = (const BYTE*)src;
state.dictSize = dictSize;
state.srcSize = srcSize;
state.forwardDiag = buffer;
state.backwardDiag = buffer + nbDiags;
state.forwardDiag += srcSize + 1;
state.backwardDiag += srcSize + 1;
state.matches = ZSTD_malloc(srcSize * sizeof(ZSTD_eDist_match), ZSTD_defaultCMem);
state.nbMatches = 0;
ZSTD_eDist_compare(&state, 0, dictSize, 0, srcSize, 1);
ZSTD_eDist_combineMatches(&state);
nbSequences = ZSTD_eDist_convertMatchesToSequences(sequences, &state);
ZSTD_free(buffer, ZSTD_defaultCMem);
ZSTD_free(state.matches, ZSTD_defaultCMem);
return nbSequences;
}

View File

@@ -0,0 +1,70 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* This match finder leverages techniques used in file comparison algorithms
* to find matches between a dictionary and a source file.
*
* The original motivation for studying this approach was to try and optimize
* Zstandard for the use case of patching: the most common scenario being
* updating an existing software package with the next version. When patching,
* the difference between the old version of the package and the new version
* is generally tiny (most of the new file will be identical to
* the old one). In more technical terms, the edit distance (the minimal number
* of changes required to take one sequence of bytes to another) between the
* files would be small relative to the size of the file.
*
* Various 'diffing' algorithms utilize this notion of edit distance and
* the corresponding concept of a minimal edit script between two
* sequences to identify the regions within two files where they differ.
* The core algorithm used in this match finder is described in:
*
* "An O(ND) Difference Algorithm and its Variations", Eugene W. Myers,
* Algorithmica Vol. 1, 1986, pp. 251-266,
* <https://doi.org/10.1007/BF01840446>.
*
* Additional algorithmic heuristics for speed improvement have also been included.
* These we inspired from implementations of various regular and binary diffing
* algorithms such as GNU diff, bsdiff, and Xdelta.
*
* Note: after some experimentation, this approach proved to not provide enough
* utility to justify the additional CPU used in finding matches. The one area
* where this approach consistently outperforms Zstandard even on level 19 is
* when compressing small files (<10 KB) using an equally small dictionary that
* is very similar to the source file. For the use case that this was intended,
* (large similar files) this approach by itself took 5-10X longer than zstd-19 and
* generally resulted in 2-3X larger files. The core advantage that zstd-19 has
* over this approach for match finding is the overlapping matches. This approach
* cannot find any.
*
* I'm leaving this in the contrib section in case this ever becomes interesting
* to explore again.
* */
#ifndef ZSTD_EDIST_H
#define ZSTD_EDIST_H
/*-*************************************
* Dependencies
***************************************/
#include <stddef.h>
#include "zstd_internal.h" /* ZSTD_Sequence */
/*! ZSTD_eDist_genSequences() :
* Will populate the provided ZSTD_Sequence buffer with sequences
* based on the optimal or near-optimal (depending on 'useHeuristics')
* edit script between 'dict' and 'src.'
* @return : the number of sequences found */
size_t ZSTD_eDist_genSequences(ZSTD_Sequence* sequences,
const void* dict, size_t dictSize,
const void* src, size_t srcSize,
int useHeuristics);
#endif

View File

@@ -0,0 +1,6 @@
-- Include zstd.lua in your GENie or premake4 file, which exposes a project_zstd function
dofile('zstd.lua')
solution 'example'
configurations { 'Debug', 'Release' }
project_zstd('../../lib/')

View File

@@ -0,0 +1,80 @@
-- This GENie/premake file copies the behavior of the Makefile in the lib folder.
-- Basic usage: project_zstd(ZSTD_DIR)
function project_zstd(dir, compression, decompression, deprecated, dictbuilder, legacy)
if compression == nil then compression = true end
if decompression == nil then decompression = true end
if deprecated == nil then deprecated = false end
if dictbuilder == nil then dictbuilder = false end
if legacy == nil then legacy = 0 end
if not compression then
dictbuilder = false
deprecated = false
end
if not decompression then
legacy = 0
deprecated = false
end
project 'zstd'
kind 'StaticLib'
language 'C'
files {
dir .. 'zstd.h',
dir .. 'common/**.c',
dir .. 'common/**.h'
}
if compression then
files {
dir .. 'compress/**.c',
dir .. 'compress/**.h'
}
end
if decompression then
files {
dir .. 'decompress/**.c',
dir .. 'decompress/**.h'
}
end
if dictbuilder then
files {
dir .. 'dictBuilder/**.c',
dir .. 'dictBuilder/**.h'
}
end
if deprecated then
files {
dir .. 'deprecated/**.c',
dir .. 'deprecated/**.h'
}
end
if legacy ~= 0 then
if legacy >= 8 then
files {
dir .. 'legacy/zstd_v0' .. (legacy - 7) .. '.*'
}
end
includedirs {
dir .. 'legacy'
}
end
includedirs {
dir,
dir .. 'common'
}
defines {
'XXH_NAMESPACE=ZSTD_',
'ZSTD_LEGACY_SUPPORT=' .. legacy
}
end

View File

@@ -0,0 +1,2 @@
# compilation result
pzstd

View File

@@ -0,0 +1,72 @@
cxx_library(
name='libpzstd',
visibility=['PUBLIC'],
header_namespace='',
exported_headers=[
'ErrorHolder.h',
'Logging.h',
'Pzstd.h',
],
headers=[
'SkippableFrame.h',
],
srcs=[
'Pzstd.cpp',
'SkippableFrame.cpp',
],
deps=[
':options',
'//contrib/pzstd/utils:utils',
'//lib:mem',
'//lib:zstd',
],
)
cxx_library(
name='options',
visibility=['PUBLIC'],
header_namespace='',
exported_headers=['Options.h'],
srcs=['Options.cpp'],
deps=[
'//contrib/pzstd/utils:scope_guard',
'//lib:zstd',
'//programs:util',
],
)
cxx_binary(
name='pzstd',
visibility=['PUBLIC'],
srcs=['main.cpp'],
deps=[
':libpzstd',
':options',
],
)
# Must run "make googletest" first
cxx_library(
name='gtest',
srcs=glob([
'googletest/googletest/src/gtest-all.cc',
'googletest/googlemock/src/gmock-all.cc',
'googletest/googlemock/src/gmock_main.cc',
]),
header_namespace='',
exported_headers=subdir_glob([
('googletest/googletest/include', '**/*.h'),
('googletest/googlemock/include', '**/*.h'),
]),
headers=subdir_glob([
('googletest/googletest', 'src/*.cc'),
('googletest/googletest', 'src/*.h'),
('googletest/googlemock', 'src/*.cc'),
('googletest/googlemock', 'src/*.h'),
]),
platform_linker_flags=[
('android', []),
('', ['-lpthread']),
],
visibility=['PUBLIC'],
)

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include <atomic>
#include <cassert>
#include <stdexcept>
#include <string>
namespace pzstd {
// Coordinates graceful shutdown of the pzstd pipeline
class ErrorHolder {
std::atomic<bool> error_;
std::string message_;
public:
ErrorHolder() : error_(false) {}
bool hasError() noexcept {
return error_.load();
}
void setError(std::string message) noexcept {
// Given multiple possibly concurrent calls, exactly one will ever succeed.
bool expected = false;
if (error_.compare_exchange_strong(expected, true)) {
message_ = std::move(message);
}
}
bool check(bool predicate, std::string message) noexcept {
if (!predicate) {
setError(std::move(message));
}
return !hasError();
}
std::string getError() noexcept {
error_.store(false);
return std::move(message_);
}
~ErrorHolder() {
assert(!hasError());
}
};
}

View File

@@ -0,0 +1,72 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include <cstdio>
#include <mutex>
namespace pzstd {
constexpr int kLogError = 1;
constexpr int kLogInfo = 2;
constexpr int kLogDebug = 3;
constexpr int kLogVerbose = 4;
class Logger {
std::mutex mutex_;
FILE* out_;
const int level_;
using Clock = std::chrono::system_clock;
Clock::time_point lastUpdate_;
std::chrono::milliseconds refreshRate_;
public:
explicit Logger(int level, FILE* out = stderr)
: out_(out), level_(level), lastUpdate_(Clock::now()),
refreshRate_(150) {}
bool logsAt(int level) {
return level <= level_;
}
template <typename... Args>
void operator()(int level, const char *fmt, Args... args) {
if (level > level_) {
return;
}
std::lock_guard<std::mutex> lock(mutex_);
std::fprintf(out_, fmt, args...);
}
template <typename... Args>
void update(int level, const char *fmt, Args... args) {
if (level > level_) {
return;
}
std::lock_guard<std::mutex> lock(mutex_);
auto now = Clock::now();
if (now - lastUpdate_ > refreshRate_) {
lastUpdate_ = now;
std::fprintf(out_, "\r");
std::fprintf(out_, fmt, args...);
}
}
void clear(int level) {
if (level > level_) {
return;
}
std::lock_guard<std::mutex> lock(mutex_);
std::fprintf(out_, "\r%79s\r", "");
}
};
}

View File

@@ -0,0 +1,424 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "Options.h"
#include "util.h"
#include "utils/ScopeGuard.h"
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <cstring>
#include <iterator>
#include <thread>
#include <vector>
namespace pzstd {
namespace {
unsigned defaultNumThreads() {
#ifdef PZSTD_NUM_THREADS
return PZSTD_NUM_THREADS;
#else
return std::thread::hardware_concurrency();
#endif
}
unsigned parseUnsigned(const char **arg) {
unsigned result = 0;
while (**arg >= '0' && **arg <= '9') {
result *= 10;
result += **arg - '0';
++(*arg);
}
return result;
}
const char *getArgument(const char *options, const char **argv, int &i,
int argc) {
if (options[1] != 0) {
return options + 1;
}
++i;
if (i == argc) {
std::fprintf(stderr, "Option -%c requires an argument, but none provided\n",
*options);
return nullptr;
}
return argv[i];
}
const std::string kZstdExtension = ".zst";
constexpr char kStdIn[] = "-";
constexpr char kStdOut[] = "-";
constexpr unsigned kDefaultCompressionLevel = 3;
constexpr unsigned kMaxNonUltraCompressionLevel = 19;
#ifdef _WIN32
const char nullOutput[] = "nul";
#else
const char nullOutput[] = "/dev/null";
#endif
void notSupported(const char *option) {
std::fprintf(stderr, "Operation not supported: %s\n", option);
}
void usage() {
std::fprintf(stderr, "Usage:\n");
std::fprintf(stderr, " pzstd [args] [FILE(s)]\n");
std::fprintf(stderr, "Parallel ZSTD options:\n");
std::fprintf(stderr, " -p, --processes # : number of threads to use for (de)compression (default:<numcpus>)\n");
std::fprintf(stderr, "ZSTD options:\n");
std::fprintf(stderr, " -# : # compression level (1-%d, default:%d)\n", kMaxNonUltraCompressionLevel, kDefaultCompressionLevel);
std::fprintf(stderr, " -d, --decompress : decompression\n");
std::fprintf(stderr, " -o file : result stored into `file` (only if 1 input file)\n");
std::fprintf(stderr, " -f, --force : overwrite output without prompting, (de)compress links\n");
std::fprintf(stderr, " --rm : remove source file(s) after successful (de)compression\n");
std::fprintf(stderr, " -k, --keep : preserve source file(s) (default)\n");
std::fprintf(stderr, " -h, --help : display help and exit\n");
std::fprintf(stderr, " -V, --version : display version number and exit\n");
std::fprintf(stderr, " -v, --verbose : verbose mode; specify multiple times to increase log level (default:2)\n");
std::fprintf(stderr, " -q, --quiet : suppress warnings; specify twice to suppress errors too\n");
std::fprintf(stderr, " -c, --stdout : write to standard output (even if it is the console)\n");
#ifdef UTIL_HAS_CREATEFILELIST
std::fprintf(stderr, " -r : operate recursively on directories\n");
#endif
std::fprintf(stderr, " --ultra : enable levels beyond %i, up to %i (requires more memory)\n", kMaxNonUltraCompressionLevel, ZSTD_maxCLevel());
std::fprintf(stderr, " -C, --check : integrity check (default)\n");
std::fprintf(stderr, " --no-check : no integrity check\n");
std::fprintf(stderr, " -t, --test : test compressed file integrity\n");
std::fprintf(stderr, " -- : all arguments after \"--\" are treated as files\n");
}
} // anonymous namespace
Options::Options()
: numThreads(defaultNumThreads()), maxWindowLog(23),
compressionLevel(kDefaultCompressionLevel), decompress(false),
overwrite(false), keepSource(true), writeMode(WriteMode::Auto),
checksum(true), verbosity(2) {}
Options::Status Options::parse(int argc, const char **argv) {
bool test = false;
bool recursive = false;
bool ultra = false;
bool forceStdout = false;
bool followLinks = false;
// Local copy of input files, which are pointers into argv.
std::vector<const char *> localInputFiles;
for (int i = 1; i < argc; ++i) {
const char *arg = argv[i];
// Protect against empty arguments
if (arg[0] == 0) {
continue;
}
// Everything after "--" is an input file
if (!std::strcmp(arg, "--")) {
++i;
std::copy(argv + i, argv + argc, std::back_inserter(localInputFiles));
break;
}
// Long arguments that don't have a short option
{
bool isLongOption = true;
if (!std::strcmp(arg, "--rm")) {
keepSource = false;
} else if (!std::strcmp(arg, "--ultra")) {
ultra = true;
maxWindowLog = 0;
} else if (!std::strcmp(arg, "--no-check")) {
checksum = false;
} else if (!std::strcmp(arg, "--sparse")) {
writeMode = WriteMode::Sparse;
notSupported("Sparse mode");
return Status::Failure;
} else if (!std::strcmp(arg, "--no-sparse")) {
writeMode = WriteMode::Regular;
notSupported("Sparse mode");
return Status::Failure;
} else if (!std::strcmp(arg, "--dictID")) {
notSupported(arg);
return Status::Failure;
} else if (!std::strcmp(arg, "--no-dictID")) {
notSupported(arg);
return Status::Failure;
} else {
isLongOption = false;
}
if (isLongOption) {
continue;
}
}
// Arguments with a short option simply set their short option.
const char *options = nullptr;
if (!std::strcmp(arg, "--processes")) {
options = "p";
} else if (!std::strcmp(arg, "--version")) {
options = "V";
} else if (!std::strcmp(arg, "--help")) {
options = "h";
} else if (!std::strcmp(arg, "--decompress")) {
options = "d";
} else if (!std::strcmp(arg, "--force")) {
options = "f";
} else if (!std::strcmp(arg, "--stdout")) {
options = "c";
} else if (!std::strcmp(arg, "--keep")) {
options = "k";
} else if (!std::strcmp(arg, "--verbose")) {
options = "v";
} else if (!std::strcmp(arg, "--quiet")) {
options = "q";
} else if (!std::strcmp(arg, "--check")) {
options = "C";
} else if (!std::strcmp(arg, "--test")) {
options = "t";
} else if (arg[0] == '-' && arg[1] != 0) {
options = arg + 1;
} else {
localInputFiles.emplace_back(arg);
continue;
}
assert(options != nullptr);
bool finished = false;
while (!finished && *options != 0) {
// Parse the compression level
if (*options >= '0' && *options <= '9') {
compressionLevel = parseUnsigned(&options);
continue;
}
switch (*options) {
case 'h':
case 'H':
usage();
return Status::Message;
case 'V':
std::fprintf(stderr, "PZSTD version: %s.\n", ZSTD_VERSION_STRING);
return Status::Message;
case 'p': {
finished = true;
const char *optionArgument = getArgument(options, argv, i, argc);
if (optionArgument == nullptr) {
return Status::Failure;
}
if (*optionArgument < '0' || *optionArgument > '9') {
std::fprintf(stderr, "Option -p expects a number, but %s provided\n",
optionArgument);
return Status::Failure;
}
numThreads = parseUnsigned(&optionArgument);
if (*optionArgument != 0) {
std::fprintf(stderr,
"Option -p expects a number, but %u%s provided\n",
numThreads, optionArgument);
return Status::Failure;
}
break;
}
case 'o': {
finished = true;
const char *optionArgument = getArgument(options, argv, i, argc);
if (optionArgument == nullptr) {
return Status::Failure;
}
outputFile = optionArgument;
break;
}
case 'C':
checksum = true;
break;
case 'k':
keepSource = true;
break;
case 'd':
decompress = true;
break;
case 'f':
overwrite = true;
forceStdout = true;
followLinks = true;
break;
case 't':
test = true;
decompress = true;
break;
#ifdef UTIL_HAS_CREATEFILELIST
case 'r':
recursive = true;
break;
#endif
case 'c':
outputFile = kStdOut;
forceStdout = true;
break;
case 'v':
++verbosity;
break;
case 'q':
--verbosity;
// Ignore them for now
break;
// Unsupported options from Zstd
case 'D':
case 's':
notSupported("Zstd dictionaries.");
return Status::Failure;
case 'b':
case 'e':
case 'i':
case 'B':
notSupported("Zstd benchmarking options.");
return Status::Failure;
default:
std::fprintf(stderr, "Invalid argument: %s\n", arg);
return Status::Failure;
}
if (!finished) {
++options;
}
} // while (*options != 0);
} // for (int i = 1; i < argc; ++i);
// Set options for test mode
if (test) {
outputFile = nullOutput;
keepSource = true;
}
// Input file defaults to standard input if not provided.
if (localInputFiles.empty()) {
localInputFiles.emplace_back(kStdIn);
}
// Check validity of input files
if (localInputFiles.size() > 1) {
const auto it = std::find(localInputFiles.begin(), localInputFiles.end(),
std::string{kStdIn});
if (it != localInputFiles.end()) {
std::fprintf(
stderr,
"Cannot specify standard input when handling multiple files\n");
return Status::Failure;
}
}
if (localInputFiles.size() > 1 || recursive) {
if (!outputFile.empty() && outputFile != nullOutput) {
std::fprintf(
stderr,
"Cannot specify an output file when handling multiple inputs\n");
return Status::Failure;
}
}
g_utilDisplayLevel = verbosity;
// Remove local input files that are symbolic links
if (!followLinks) {
localInputFiles.erase(std::remove_if(localInputFiles.begin(), localInputFiles.end(),
[&](const char *path) {
bool isLink = UTIL_isLink(path);
if (isLink && verbosity >= 2) {
std::fprintf(
stderr,
"Warning : %s is symbolic link, ignoring\n",
path);
}
return isLink;
}), localInputFiles.end());
}
// Translate input files/directories into files to (de)compress
if (recursive) {
FileNamesTable* const files = UTIL_createExpandedFNT(localInputFiles.data(), localInputFiles.size(), followLinks);
if (files == nullptr) {
std::fprintf(stderr, "Error traversing directories\n");
return Status::Failure;
}
auto guard =
makeScopeGuard([&] { UTIL_freeFileNamesTable(files); });
if (files->tableSize == 0) {
std::fprintf(stderr, "No files found\n");
return Status::Failure;
}
inputFiles.resize(files->tableSize);
std::copy(files->fileNames, files->fileNames + files->tableSize, inputFiles.begin());
} else {
inputFiles.resize(localInputFiles.size());
std::copy(localInputFiles.begin(), localInputFiles.end(),
inputFiles.begin());
}
localInputFiles.clear();
assert(!inputFiles.empty());
// If reading from standard input, default to standard output
if (inputFiles[0] == kStdIn && outputFile.empty()) {
assert(inputFiles.size() == 1);
outputFile = "-";
}
if (inputFiles[0] == kStdIn && IS_CONSOLE(stdin)) {
assert(inputFiles.size() == 1);
std::fprintf(stderr, "Cannot read input from interactive console\n");
return Status::Failure;
}
if (outputFile == "-" && IS_CONSOLE(stdout) && !(forceStdout && decompress)) {
std::fprintf(stderr, "Will not write to console stdout unless -c or -f is "
"specified and decompressing\n");
return Status::Failure;
}
// Check compression level
{
unsigned maxCLevel =
ultra ? ZSTD_maxCLevel() : kMaxNonUltraCompressionLevel;
if (compressionLevel > maxCLevel || compressionLevel == 0) {
std::fprintf(stderr, "Invalid compression level %u.\n", compressionLevel);
return Status::Failure;
}
}
// Check that numThreads is set
if (numThreads == 0) {
std::fprintf(stderr, "Invalid arguments: # of threads not specified "
"and unable to determine hardware concurrency.\n");
return Status::Failure;
}
// Modify verbosity
// If we are piping input and output, turn off interaction
if (inputFiles[0] == kStdIn && outputFile == kStdOut && verbosity == 2) {
verbosity = 1;
}
// If we are in multi-file mode, turn off interaction
if (inputFiles.size() > 1 && verbosity == 2) {
verbosity = 1;
}
return Status::Success;
}
std::string Options::getOutputFile(const std::string &inputFile) const {
if (!outputFile.empty()) {
return outputFile;
}
// Attempt to add/remove zstd extension from the input file
if (decompress) {
int stemSize = inputFile.size() - kZstdExtension.size();
if (stemSize > 0 && inputFile.substr(stemSize) == kZstdExtension) {
return inputFile.substr(0, stemSize);
} else {
return "";
}
} else {
return inputFile + kZstdExtension;
}
}
}

View File

@@ -0,0 +1,71 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#define ZSTD_STATIC_LINKING_ONLY
#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, pzstd itself is deprecated
* and uses deprecated functions
*/
#include "zstd.h"
#undef ZSTD_STATIC_LINKING_ONLY
#include <cstdint>
#include <string>
#include <vector>
namespace pzstd {
struct Options {
enum class WriteMode { Regular, Auto, Sparse };
unsigned numThreads;
unsigned maxWindowLog;
unsigned compressionLevel;
bool decompress;
std::vector<std::string> inputFiles;
std::string outputFile;
bool overwrite;
bool keepSource;
WriteMode writeMode;
bool checksum;
int verbosity;
enum class Status {
Success, // Successfully parsed options
Failure, // Failure to parse options
Message // Options specified to print a message (e.g. "-h")
};
Options();
Options(unsigned numThreads, unsigned maxWindowLog, unsigned compressionLevel,
bool decompress, std::vector<std::string> inputFiles,
std::string outputFile, bool overwrite, bool keepSource,
WriteMode writeMode, bool checksum, int verbosity)
: numThreads(numThreads), maxWindowLog(maxWindowLog),
compressionLevel(compressionLevel), decompress(decompress),
inputFiles(std::move(inputFiles)), outputFile(std::move(outputFile)),
overwrite(overwrite), keepSource(keepSource), writeMode(writeMode),
checksum(checksum), verbosity(verbosity) {}
Status parse(int argc, const char **argv);
ZSTD_parameters determineParameters() const {
ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, 0);
params.fParams.contentSizeFlag = 0;
params.fParams.checksumFlag = checksum;
if (maxWindowLog != 0 && params.cParams.windowLog > maxWindowLog) {
params.cParams.windowLog = maxWindowLog;
params.cParams = ZSTD_adjustCParams(params.cParams, 0, 0);
}
return params;
}
std::string getOutputFile(const std::string &inputFile) const;
};
}

View File

@@ -0,0 +1,626 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "platform.h" /* Large Files support, SET_BINARY_MODE */
#include "Pzstd.h"
#include "SkippableFrame.h"
#include "utils/FileSystem.h"
#include "utils/Portability.h"
#include "utils/Range.h"
#include "utils/ScopeGuard.h"
#include "utils/ThreadPool.h"
#include "utils/WorkQueue.h"
#include <algorithm>
#include <chrono>
#include <cinttypes>
#include <cstddef>
#include <cstdio>
#include <memory>
#include <string>
namespace pzstd {
namespace {
#ifdef _WIN32
const std::string nullOutput = "nul";
#else
const std::string nullOutput = "/dev/null";
#endif
}
using std::size_t;
static std::uintmax_t fileSizeOrZero(const std::string &file) {
if (file == "-") {
return 0;
}
std::error_code ec;
auto size = file_size(file, ec);
if (ec) {
size = 0;
}
return size;
}
static std::uint64_t handleOneInput(const Options &options,
const std::string &inputFile,
FILE* inputFd,
const std::string &outputFile,
FILE* outputFd,
SharedState& state) {
auto inputSize = fileSizeOrZero(inputFile);
// WorkQueue outlives ThreadPool so in the case of error we are certain
// we don't accidentally try to call push() on it after it is destroyed
WorkQueue<std::shared_ptr<BufferWorkQueue>> outs{options.numThreads + 1};
std::uint64_t bytesRead;
std::uint64_t bytesWritten;
{
// Initialize the (de)compression thread pool with numThreads
ThreadPool executor(options.numThreads);
// Run the reader thread on an extra thread
ThreadPool readExecutor(1);
if (!options.decompress) {
// Add a job that reads the input and starts all the compression jobs
readExecutor.add(
[&state, &outs, &executor, inputFd, inputSize, &options, &bytesRead] {
bytesRead = asyncCompressChunks(
state,
outs,
executor,
inputFd,
inputSize,
options.numThreads,
options.determineParameters());
});
// Start writing
bytesWritten = writeFile(state, outs, outputFd, options.decompress);
} else {
// Add a job that reads the input and starts all the decompression jobs
readExecutor.add([&state, &outs, &executor, inputFd, &bytesRead] {
bytesRead = asyncDecompressFrames(state, outs, executor, inputFd);
});
// Start writing
bytesWritten = writeFile(state, outs, outputFd, options.decompress);
}
}
if (!state.errorHolder.hasError()) {
std::string inputFileName = inputFile == "-" ? "stdin" : inputFile;
std::string outputFileName = outputFile == "-" ? "stdout" : outputFile;
if (!options.decompress) {
double ratio = static_cast<double>(bytesWritten) /
static_cast<double>(bytesRead + !bytesRead);
state.log(kLogInfo, "%-20s :%6.2f%% (%6" PRIu64 " => %6" PRIu64
" bytes, %s)\n",
inputFileName.c_str(), ratio * 100, bytesRead, bytesWritten,
outputFileName.c_str());
} else {
state.log(kLogInfo, "%-20s: %" PRIu64 " bytes \n",
inputFileName.c_str(),bytesWritten);
}
}
return bytesWritten;
}
static FILE *openInputFile(const std::string &inputFile,
ErrorHolder &errorHolder) {
if (inputFile == "-") {
SET_BINARY_MODE(stdin);
return stdin;
}
// Check if input file is a directory
{
std::error_code ec;
if (is_directory(inputFile, ec)) {
errorHolder.setError("Output file is a directory -- ignored");
return nullptr;
}
}
auto inputFd = std::fopen(inputFile.c_str(), "rb");
if (!errorHolder.check(inputFd != nullptr, "Failed to open input file")) {
return nullptr;
}
return inputFd;
}
static FILE *openOutputFile(const Options &options,
const std::string &outputFile,
SharedState& state) {
if (outputFile == "-") {
SET_BINARY_MODE(stdout);
return stdout;
}
// Check if the output file exists and then open it
if (!options.overwrite && outputFile != nullOutput) {
auto outputFd = std::fopen(outputFile.c_str(), "rb");
if (outputFd != nullptr) {
std::fclose(outputFd);
if (!state.log.logsAt(kLogInfo)) {
state.errorHolder.setError("Output file exists");
return nullptr;
}
state.log(
kLogInfo,
"pzstd: %s already exists; do you wish to overwrite (y/n) ? ",
outputFile.c_str());
int c = getchar();
if (c != 'y' && c != 'Y') {
state.errorHolder.setError("Not overwritten");
return nullptr;
}
}
}
auto outputFd = std::fopen(outputFile.c_str(), "wb");
if (!state.errorHolder.check(
outputFd != nullptr, "Failed to open output file")) {
return nullptr;
}
return outputFd;
}
int pzstdMain(const Options &options) {
int returnCode = 0;
SharedState state(options);
for (const auto& input : options.inputFiles) {
// Setup the shared state
auto printErrorGuard = makeScopeGuard([&] {
if (state.errorHolder.hasError()) {
returnCode = 1;
state.log(kLogError, "pzstd: %s: %s.\n", input.c_str(),
state.errorHolder.getError().c_str());
}
});
// Open the input file
auto inputFd = openInputFile(input, state.errorHolder);
if (inputFd == nullptr) {
continue;
}
auto closeInputGuard = makeScopeGuard([&] { std::fclose(inputFd); });
// Open the output file
auto outputFile = options.getOutputFile(input);
if (!state.errorHolder.check(outputFile != "",
"Input file does not have extension .zst")) {
continue;
}
auto outputFd = openOutputFile(options, outputFile, state);
if (outputFd == nullptr) {
continue;
}
auto closeOutputGuard = makeScopeGuard([&] { std::fclose(outputFd); });
// (de)compress the file
handleOneInput(options, input, inputFd, outputFile, outputFd, state);
if (state.errorHolder.hasError()) {
continue;
}
// Delete the input file if necessary
if (!options.keepSource) {
// Be sure that we are done and have written everything before we delete
if (!state.errorHolder.check(std::fclose(inputFd) == 0,
"Failed to close input file")) {
continue;
}
closeInputGuard.dismiss();
if (!state.errorHolder.check(std::fclose(outputFd) == 0,
"Failed to close output file")) {
continue;
}
closeOutputGuard.dismiss();
if (std::remove(input.c_str()) != 0) {
state.errorHolder.setError("Failed to remove input file");
continue;
}
}
}
// Returns 1 if any of the files failed to (de)compress.
return returnCode;
}
/// Construct a `ZSTD_inBuffer` that points to the data in `buffer`.
static ZSTD_inBuffer makeZstdInBuffer(const Buffer& buffer) {
return ZSTD_inBuffer{buffer.data(), buffer.size(), 0};
}
/**
* Advance `buffer` and `inBuffer` by the amount of data read, as indicated by
* `inBuffer.pos`.
*/
void advance(Buffer& buffer, ZSTD_inBuffer& inBuffer) {
auto pos = inBuffer.pos;
inBuffer.src = static_cast<const unsigned char*>(inBuffer.src) + pos;
inBuffer.size -= pos;
inBuffer.pos = 0;
return buffer.advance(pos);
}
/// Construct a `ZSTD_outBuffer` that points to the data in `buffer`.
static ZSTD_outBuffer makeZstdOutBuffer(Buffer& buffer) {
return ZSTD_outBuffer{buffer.data(), buffer.size(), 0};
}
/**
* Split `buffer` and advance `outBuffer` by the amount of data written, as
* indicated by `outBuffer.pos`.
*/
Buffer split(Buffer& buffer, ZSTD_outBuffer& outBuffer) {
auto pos = outBuffer.pos;
outBuffer.dst = static_cast<unsigned char*>(outBuffer.dst) + pos;
outBuffer.size -= pos;
outBuffer.pos = 0;
return buffer.splitAt(pos);
}
/**
* Stream chunks of input from `in`, compress it, and stream it out to `out`.
*
* @param state The shared state
* @param in Queue that we `pop()` input buffers from
* @param out Queue that we `push()` compressed output buffers to
* @param maxInputSize An upper bound on the size of the input
*/
static void compress(
SharedState& state,
std::shared_ptr<BufferWorkQueue> in,
std::shared_ptr<BufferWorkQueue> out,
size_t maxInputSize) {
auto& errorHolder = state.errorHolder;
auto guard = makeScopeGuard([&] {
in->finish();
out->finish();
});
// Initialize the CCtx
auto ctx = state.cStreamPool->get();
if (!errorHolder.check(ctx != nullptr, "Failed to allocate ZSTD_CStream")) {
return;
}
{
auto err = ZSTD_CCtx_reset(ctx.get(), ZSTD_reset_session_only);
if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
return;
}
}
// Allocate space for the result
auto outBuffer = Buffer(ZSTD_compressBound(maxInputSize));
auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
{
Buffer inBuffer;
// Read a buffer in from the input queue
while (in->pop(inBuffer) && !errorHolder.hasError()) {
auto zstdInBuffer = makeZstdInBuffer(inBuffer);
// Compress the whole buffer and send it to the output queue
while (!inBuffer.empty() && !errorHolder.hasError()) {
if (!errorHolder.check(
!outBuffer.empty(), "ZSTD_compressBound() was too small")) {
return;
}
// Compress
auto err =
ZSTD_compressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
return;
}
// Split the compressed data off outBuffer and pass to the output queue
out->push(split(outBuffer, zstdOutBuffer));
// Forget about the data we already compressed
advance(inBuffer, zstdInBuffer);
}
}
}
// Write the epilog
size_t bytesLeft;
do {
if (!errorHolder.check(
!outBuffer.empty(), "ZSTD_compressBound() was too small")) {
return;
}
bytesLeft = ZSTD_endStream(ctx.get(), &zstdOutBuffer);
if (!errorHolder.check(
!ZSTD_isError(bytesLeft), ZSTD_getErrorName(bytesLeft))) {
return;
}
out->push(split(outBuffer, zstdOutBuffer));
} while (bytesLeft != 0 && !errorHolder.hasError());
}
/**
* Calculates how large each independently compressed frame should be.
*
* @param size The size of the source if known, 0 otherwise
* @param numThreads The number of threads available to run compression jobs on
* @param params The zstd parameters to be used for compression
*/
static size_t calculateStep(
std::uintmax_t size,
size_t numThreads,
const ZSTD_parameters &params) {
(void)size;
(void)numThreads;
// Not validated to work correctly for window logs > 23.
// It will definitely fail if windowLog + 2 is >= 4GB because
// the skippable frame can only store sizes up to 4GB.
assert(params.cParams.windowLog <= 23);
return size_t{1} << (params.cParams.windowLog + 2);
}
namespace {
enum class FileStatus { Continue, Done, Error };
/// Determines the status of the file descriptor `fd`.
FileStatus fileStatus(FILE* fd) {
if (std::feof(fd)) {
return FileStatus::Done;
} else if (std::ferror(fd)) {
return FileStatus::Error;
}
return FileStatus::Continue;
}
} // anonymous namespace
/**
* Reads `size` data in chunks of `chunkSize` and puts it into `queue`.
* Will read less if an error or EOF occurs.
* Returns the status of the file after all of the reads have occurred.
*/
static FileStatus
readData(BufferWorkQueue& queue, size_t chunkSize, size_t size, FILE* fd,
std::uint64_t *totalBytesRead) {
Buffer buffer(size);
while (!buffer.empty()) {
auto bytesRead =
std::fread(buffer.data(), 1, std::min(chunkSize, buffer.size()), fd);
*totalBytesRead += bytesRead;
queue.push(buffer.splitAt(bytesRead));
auto status = fileStatus(fd);
if (status != FileStatus::Continue) {
return status;
}
}
return FileStatus::Continue;
}
std::uint64_t asyncCompressChunks(
SharedState& state,
WorkQueue<std::shared_ptr<BufferWorkQueue>>& chunks,
ThreadPool& executor,
FILE* fd,
std::uintmax_t size,
size_t numThreads,
ZSTD_parameters params) {
auto chunksGuard = makeScopeGuard([&] { chunks.finish(); });
std::uint64_t bytesRead = 0;
// Break the input up into chunks of size `step` and compress each chunk
// independently.
size_t step = calculateStep(size, numThreads, params);
state.log(kLogDebug, "Chosen frame size: %zu\n", step);
auto status = FileStatus::Continue;
while (status == FileStatus::Continue && !state.errorHolder.hasError()) {
// Make a new input queue that we will put the chunk's input data into.
auto in = std::make_shared<BufferWorkQueue>();
auto inGuard = makeScopeGuard([&] { in->finish(); });
// Make a new output queue that compress will put the compressed data into.
auto out = std::make_shared<BufferWorkQueue>();
// Start compression in the thread pool
executor.add([&state, in, out, step] {
return compress(
state, std::move(in), std::move(out), step);
});
// Pass the output queue to the writer thread.
chunks.push(std::move(out));
state.log(kLogVerbose, "%s\n", "Starting a new frame");
// Fill the input queue for the compression job we just started
status = readData(*in, ZSTD_CStreamInSize(), step, fd, &bytesRead);
}
state.errorHolder.check(status != FileStatus::Error, "Error reading input");
return bytesRead;
}
/**
* Decompress a frame, whose data is streamed into `in`, and stream the output
* to `out`.
*
* @param state The shared state
* @param in Queue that we `pop()` input buffers from. It contains
* exactly one compressed frame.
* @param out Queue that we `push()` decompressed output buffers to
*/
static void decompress(
SharedState& state,
std::shared_ptr<BufferWorkQueue> in,
std::shared_ptr<BufferWorkQueue> out) {
auto& errorHolder = state.errorHolder;
auto guard = makeScopeGuard([&] {
in->finish();
out->finish();
});
// Initialize the DCtx
auto ctx = state.dStreamPool->get();
if (!errorHolder.check(ctx != nullptr, "Failed to allocate ZSTD_DStream")) {
return;
}
{
auto err = ZSTD_DCtx_reset(ctx.get(), ZSTD_reset_session_only);
if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
return;
}
}
const size_t outSize = ZSTD_DStreamOutSize();
Buffer inBuffer;
size_t returnCode = 0;
// Read a buffer in from the input queue
while (in->pop(inBuffer) && !errorHolder.hasError()) {
auto zstdInBuffer = makeZstdInBuffer(inBuffer);
// Decompress the whole buffer and send it to the output queue
while (!inBuffer.empty() && !errorHolder.hasError()) {
// Allocate a buffer with at least outSize bytes.
Buffer outBuffer(outSize);
auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
// Decompress
returnCode =
ZSTD_decompressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
if (!errorHolder.check(
!ZSTD_isError(returnCode), ZSTD_getErrorName(returnCode))) {
return;
}
// Pass the buffer with the decompressed data to the output queue
out->push(split(outBuffer, zstdOutBuffer));
// Advance past the input we already read
advance(inBuffer, zstdInBuffer);
if (returnCode == 0) {
// The frame is over, prepare to (maybe) start a new frame
ZSTD_initDStream(ctx.get());
}
}
}
if (!errorHolder.check(returnCode <= 1, "Incomplete block")) {
return;
}
// We've given ZSTD_decompressStream all of our data, but there may still
// be data to read.
while (returnCode == 1) {
// Allocate a buffer with at least outSize bytes.
Buffer outBuffer(outSize);
auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
// Pass in no input.
ZSTD_inBuffer zstdInBuffer{nullptr, 0, 0};
// Decompress
returnCode =
ZSTD_decompressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
if (!errorHolder.check(
!ZSTD_isError(returnCode), ZSTD_getErrorName(returnCode))) {
return;
}
// Pass the buffer with the decompressed data to the output queue
out->push(split(outBuffer, zstdOutBuffer));
}
}
std::uint64_t asyncDecompressFrames(
SharedState& state,
WorkQueue<std::shared_ptr<BufferWorkQueue>>& frames,
ThreadPool& executor,
FILE* fd) {
auto framesGuard = makeScopeGuard([&] { frames.finish(); });
std::uint64_t totalBytesRead = 0;
// Split the source up into its component frames.
// If we find our recognized skippable frame we know the next frames size
// which means that we can decompress each standard frame in independently.
// Otherwise, we will decompress using only one decompression task.
const size_t chunkSize = ZSTD_DStreamInSize();
auto status = FileStatus::Continue;
while (status == FileStatus::Continue && !state.errorHolder.hasError()) {
// Make a new input queue that we will put the frames's bytes into.
auto in = std::make_shared<BufferWorkQueue>();
auto inGuard = makeScopeGuard([&] { in->finish(); });
// Make a output queue that decompress will put the decompressed data into
auto out = std::make_shared<BufferWorkQueue>();
size_t frameSize;
{
// Calculate the size of the next frame.
// frameSize is 0 if the frame info can't be decoded.
Buffer buffer(SkippableFrame::kSize);
auto bytesRead = std::fread(buffer.data(), 1, buffer.size(), fd);
totalBytesRead += bytesRead;
status = fileStatus(fd);
if (bytesRead == 0 && status != FileStatus::Continue) {
break;
}
buffer.subtract(buffer.size() - bytesRead);
frameSize = SkippableFrame::tryRead(buffer.range());
in->push(std::move(buffer));
}
if (frameSize == 0) {
// We hit a non SkippableFrame, so this will be the last job.
// Make sure that we don't use too much memory
in->setMaxSize(64);
out->setMaxSize(64);
}
// Start decompression in the thread pool
executor.add([&state, in, out] {
return decompress(state, std::move(in), std::move(out));
});
// Pass the output queue to the writer thread
frames.push(std::move(out));
if (frameSize == 0) {
// We hit a non SkippableFrame ==> not compressed by pzstd or corrupted
// Pass the rest of the source to this decompression task
state.log(kLogVerbose, "%s\n",
"Input not in pzstd format, falling back to serial decompression");
while (status == FileStatus::Continue && !state.errorHolder.hasError()) {
status = readData(*in, chunkSize, chunkSize, fd, &totalBytesRead);
}
break;
}
state.log(kLogVerbose, "Decompressing a frame of size %zu", frameSize);
// Fill the input queue for the decompression job we just started
status = readData(*in, chunkSize, frameSize, fd, &totalBytesRead);
}
state.errorHolder.check(status != FileStatus::Error, "Error reading input");
return totalBytesRead;
}
/// Write `data` to `fd`, returns true iff success.
static bool writeData(ByteRange data, FILE* fd) {
while (!data.empty()) {
data.advance(std::fwrite(data.begin(), 1, data.size(), fd));
if (std::ferror(fd)) {
return false;
}
}
return true;
}
std::uint64_t writeFile(
SharedState& state,
WorkQueue<std::shared_ptr<BufferWorkQueue>>& outs,
FILE* outputFd,
bool decompress) {
auto& errorHolder = state.errorHolder;
auto outsFinishGuard = makeScopeGuard([&outs] { outs.finish(); });
auto lineClearGuard = makeScopeGuard([&state] {
state.log.clear(kLogInfo);
});
std::uint64_t bytesWritten = 0;
std::shared_ptr<BufferWorkQueue> out;
// Grab the output queue for each decompression job (in order).
while (outs.pop(out)) {
auto outFinishGuard = makeScopeGuard([&out] { out->finish(); });
if (errorHolder.hasError()) {
continue;
}
if (!decompress) {
// If we are compressing and want to write skippable frames we can't
// start writing before compression is done because we need to know the
// compressed size.
// Wait for the compressed size to be available and write skippable frame
assert(uint64_t(out->size()) < uint64_t(1) << 32);
SkippableFrame frame(uint32_t(out->size()));
if (!writeData(frame.data(), outputFd)) {
errorHolder.setError("Failed to write output");
return bytesWritten;
}
bytesWritten += frame.kSize;
}
// For each chunk of the frame: Pop it from the queue and write it
Buffer buffer;
while (out->pop(buffer) && !errorHolder.hasError()) {
if (!writeData(buffer.range(), outputFd)) {
errorHolder.setError("Failed to write output");
return bytesWritten;
}
bytesWritten += buffer.size();
state.log.update(kLogInfo, "Written: %u MB ",
static_cast<std::uint32_t>(bytesWritten >> 20));
}
}
return bytesWritten;
}
}

View File

@@ -0,0 +1,153 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include "ErrorHolder.h"
#include "Logging.h"
#include "Options.h"
#include "utils/Buffer.h"
#include "utils/Range.h"
#include "utils/ResourcePool.h"
#include "utils/ThreadPool.h"
#include "utils/WorkQueue.h"
#define ZSTD_STATIC_LINKING_ONLY
#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, pzstd itself is deprecated
* and uses deprecated functions
*/
#include "zstd.h"
#undef ZSTD_STATIC_LINKING_ONLY
#include <cstddef>
#include <cstdint>
#include <memory>
namespace pzstd {
/**
* Runs pzstd with `options` and returns the number of bytes written.
* An error occurred if `errorHandler.hasError()`.
*
* @param options The pzstd options to use for (de)compression
* @returns 0 upon success and non-zero on failure.
*/
int pzstdMain(const Options& options);
class SharedState {
public:
SharedState(const Options& options) : log(options.verbosity) {
if (!options.decompress) {
auto parameters = options.determineParameters();
cStreamPool.reset(new ResourcePool<ZSTD_CStream>{
[this, parameters]() -> ZSTD_CStream* {
this->log(kLogVerbose, "%s\n", "Creating new ZSTD_CStream");
auto zcs = ZSTD_createCStream();
if (zcs) {
auto err = ZSTD_initCStream_advanced(
zcs, nullptr, 0, parameters, 0);
if (ZSTD_isError(err)) {
ZSTD_freeCStream(zcs);
return nullptr;
}
}
return zcs;
},
[](ZSTD_CStream *zcs) {
ZSTD_freeCStream(zcs);
}});
} else {
dStreamPool.reset(new ResourcePool<ZSTD_DStream>{
[this]() -> ZSTD_DStream* {
this->log(kLogVerbose, "%s\n", "Creating new ZSTD_DStream");
auto zds = ZSTD_createDStream();
if (zds) {
auto err = ZSTD_initDStream(zds);
if (ZSTD_isError(err)) {
ZSTD_freeDStream(zds);
return nullptr;
}
}
return zds;
},
[](ZSTD_DStream *zds) {
ZSTD_freeDStream(zds);
}});
}
}
~SharedState() {
// The resource pools have references to this, so destroy them first.
cStreamPool.reset();
dStreamPool.reset();
}
Logger log;
ErrorHolder errorHolder;
std::unique_ptr<ResourcePool<ZSTD_CStream>> cStreamPool;
std::unique_ptr<ResourcePool<ZSTD_DStream>> dStreamPool;
};
/**
* Streams input from `fd`, breaks input up into chunks, and compresses each
* chunk independently. Output of each chunk gets streamed to a queue, and
* the output queues get put into `chunks` in order.
*
* @param state The shared state
* @param chunks Each compression jobs output queue gets `pushed()` here
* as soon as it is available
* @param executor The thread pool to run compression jobs in
* @param fd The input file descriptor
* @param size The size of the input file if known, 0 otherwise
* @param numThreads The number of threads in the thread pool
* @param parameters The zstd parameters to use for compression
* @returns The number of bytes read from the file
*/
std::uint64_t asyncCompressChunks(
SharedState& state,
WorkQueue<std::shared_ptr<BufferWorkQueue>>& chunks,
ThreadPool& executor,
FILE* fd,
std::uintmax_t size,
std::size_t numThreads,
ZSTD_parameters parameters);
/**
* Streams input from `fd`. If pzstd headers are available it breaks the input
* up into independent frames. It sends each frame to an independent
* decompression job. Output of each frame gets streamed to a queue, and
* the output queues get put into `frames` in order.
*
* @param state The shared state
* @param frames Each decompression jobs output queue gets `pushed()` here
* as soon as it is available
* @param executor The thread pool to run compression jobs in
* @param fd The input file descriptor
* @returns The number of bytes read from the file
*/
std::uint64_t asyncDecompressFrames(
SharedState& state,
WorkQueue<std::shared_ptr<BufferWorkQueue>>& frames,
ThreadPool& executor,
FILE* fd);
/**
* Streams input in from each queue in `outs` in order, and writes the data to
* `outputFd`.
*
* @param state The shared state
* @param outs A queue of output queues, one for each
* (de)compression job.
* @param outputFd The file descriptor to write to
* @param decompress Are we decompressing?
* @returns The number of bytes written
*/
std::uint64_t writeFile(
SharedState& state,
WorkQueue<std::shared_ptr<BufferWorkQueue>>& outs,
FILE* outputFd,
bool decompress);
}

View File

@@ -0,0 +1,56 @@
# Parallel Zstandard (PZstandard)
Parallel Zstandard is a Pigz-like tool for Zstandard.
It provides Zstandard format compatible compression and decompression that is able to utilize multiple cores.
It breaks the input up into equal sized chunks and compresses each chunk independently into a Zstandard frame.
It then concatenates the frames together to produce the final compressed output.
Pzstandard will write a 12 byte header for each frame that is a skippable frame in the Zstandard format, which tells PZstandard the size of the next compressed frame.
PZstandard supports parallel decompression of files compressed with PZstandard.
When decompressing files compressed with Zstandard, PZstandard does IO in one thread, and decompression in another.
## Usage
PZstandard supports the same command line interface as Zstandard, but also provides the `-p` option to specify the number of threads.
Dictionary mode is not currently supported.
Basic usage
pzstd input-file -o output-file -p num-threads -# # Compression
pzstd -d input-file -o output-file -p num-threads # Decompression
PZstandard also supports piping and fifo pipes
cat input-file | pzstd -p num-threads -# -c > /dev/null
For more options
pzstd --help
PZstandard tries to pick a smart default number of threads if not specified (displayed in `pzstd --help`).
If this number is not suitable, during compilation you can define `PZSTD_NUM_THREADS` to the number of threads you prefer.
## Benchmarks
As a reference, PZstandard and Pigz were compared on an Intel Core i7 @ 3.1 GHz, each using 4 threads, with the [Silesia compression corpus](https://sun.aei.polsl.pl//~sdeor/index.php?page=silesia).
Compression Speed vs Ratio with 4 Threads | Decompression Speed with 4 Threads
------------------------------------------|-----------------------------------
![Compression Speed vs Ratio](images/Cspeed.png "Compression Speed vs Ratio") | ![Decompression Speed](images/Dspeed.png "Decompression Speed")
The test procedure was to run each of the following commands 2 times for each compression level, and take the minimum time.
time pzstd -# -p 4 -c silesia.tar > silesia.tar.zst
time pzstd -d -p 4 -c silesia.tar.zst > /dev/null
time pigz -# -p 4 -k -c silesia.tar > silesia.tar.gz
time pigz -d -p 4 -k -c silesia.tar.gz > /dev/null
PZstandard was tested using compression levels 1-19, and Pigz was tested using compression levels 1-9.
Pigz cannot do parallel decompression, it simply does each of reading, decompression, and writing on separate threads.
## Tests
Tests require that you have [gtest](https://github.com/google/googletest) installed.
Set `GTEST_INC` and `GTEST_LIB` in `Makefile` to specify the location of the gtest headers and libraries.
Alternatively, run `make googletest`, which will clone googletest and build it.
Run `make tests && make check` to run tests.

View File

@@ -0,0 +1,30 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "SkippableFrame.h"
#include "mem.h"
#include "utils/Range.h"
#include <cstdio>
using namespace pzstd;
SkippableFrame::SkippableFrame(std::uint32_t size) : frameSize_(size) {
MEM_writeLE32(data_.data(), kSkippableFrameMagicNumber);
MEM_writeLE32(data_.data() + 4, kFrameContentsSize);
MEM_writeLE32(data_.data() + 8, frameSize_);
}
/* static */ std::size_t SkippableFrame::tryRead(ByteRange bytes) {
if (bytes.size() < SkippableFrame::kSize ||
MEM_readLE32(bytes.begin()) != kSkippableFrameMagicNumber ||
MEM_readLE32(bytes.begin() + 4) != kFrameContentsSize) {
return 0;
}
return MEM_readLE32(bytes.begin() + 8);
}

View File

@@ -0,0 +1,64 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include "utils/Range.h"
#include <array>
#include <cstddef>
#include <cstdint>
#include <cstdio>
namespace pzstd {
/**
* We put a skippable frame before each frame.
* It contains a skippable frame magic number, the size of the skippable frame,
* and the size of the next frame.
* Each skippable frame is exactly 12 bytes in little endian format.
* The first 8 bytes are for compatibility with the ZSTD format.
* If we have N threads, the output will look like
*
* [0x184D2A50|4|size1] [frame1 of size size1]
* [0x184D2A50|4|size2] [frame2 of size size2]
* ...
* [0x184D2A50|4|sizeN] [frameN of size sizeN]
*
* Each sizeX is 4 bytes.
*
* These skippable frames should allow us to skip through the compressed file
* and only load at most N pages.
*/
class SkippableFrame {
public:
static constexpr std::size_t kSize = 12;
private:
std::uint32_t frameSize_;
std::array<std::uint8_t, kSize> data_;
static constexpr std::uint32_t kSkippableFrameMagicNumber = 0x184D2A50;
// Could be improved if the size fits in less bytes
static constexpr std::uint32_t kFrameContentsSize = kSize - 8;
public:
// Write the skippable frame to data_ in LE format.
explicit SkippableFrame(std::uint32_t size);
// Read the skippable frame from bytes in LE format.
static std::size_t tryRead(ByteRange bytes);
ByteRange data() const {
return {data_.data(), data_.size()};
}
// Size of the next frame.
std::size_t frameSize() const {
return frameSize_;
}
};
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

View File

@@ -0,0 +1,27 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "ErrorHolder.h"
#include "Options.h"
#include "Pzstd.h"
using namespace pzstd;
int main(int argc, const char** argv) {
Options options;
switch (options.parse(argc, argv)) {
case Options::Status::Failure:
return 1;
case Options::Status::Message:
return 0;
default:
break;
}
return pzstdMain(options);
}

View File

@@ -0,0 +1,37 @@
cxx_test(
name='options_test',
srcs=['OptionsTest.cpp'],
deps=['//contrib/pzstd:options'],
)
cxx_test(
name='pzstd_test',
srcs=['PzstdTest.cpp'],
deps=[
':round_trip',
'//contrib/pzstd:libpzstd',
'//contrib/pzstd/utils:scope_guard',
'//programs:datagen',
],
)
cxx_binary(
name='round_trip_test',
srcs=['RoundTripTest.cpp'],
deps=[
':round_trip',
'//contrib/pzstd/utils:scope_guard',
'//programs:datagen',
]
)
cxx_library(
name='round_trip',
header_namespace='test',
exported_headers=['RoundTrip.h'],
deps=[
'//contrib/pzstd:libpzstd',
'//contrib/pzstd:options',
'//contrib/pzstd/utils:scope_guard',
]
)

View File

@@ -0,0 +1,536 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "Options.h"
#include <array>
#include <gtest/gtest.h>
using namespace pzstd;
namespace pzstd {
bool operator==(const Options &lhs, const Options &rhs) {
return lhs.numThreads == rhs.numThreads &&
lhs.maxWindowLog == rhs.maxWindowLog &&
lhs.compressionLevel == rhs.compressionLevel &&
lhs.decompress == rhs.decompress && lhs.inputFiles == rhs.inputFiles &&
lhs.outputFile == rhs.outputFile && lhs.overwrite == rhs.overwrite &&
lhs.keepSource == rhs.keepSource && lhs.writeMode == rhs.writeMode &&
lhs.checksum == rhs.checksum && lhs.verbosity == rhs.verbosity;
}
std::ostream &operator<<(std::ostream &out, const Options &opt) {
out << "{";
{
out << "\n\t"
<< "numThreads: " << opt.numThreads;
out << ",\n\t"
<< "maxWindowLog: " << opt.maxWindowLog;
out << ",\n\t"
<< "compressionLevel: " << opt.compressionLevel;
out << ",\n\t"
<< "decompress: " << opt.decompress;
out << ",\n\t"
<< "inputFiles: {";
{
bool first = true;
for (const auto &file : opt.inputFiles) {
if (!first) {
out << ",";
}
first = false;
out << "\n\t\t" << file;
}
}
out << "\n\t}";
out << ",\n\t"
<< "outputFile: " << opt.outputFile;
out << ",\n\t"
<< "overwrite: " << opt.overwrite;
out << ",\n\t"
<< "keepSource: " << opt.keepSource;
out << ",\n\t"
<< "writeMode: " << static_cast<int>(opt.writeMode);
out << ",\n\t"
<< "checksum: " << opt.checksum;
out << ",\n\t"
<< "verbosity: " << opt.verbosity;
}
out << "\n}";
return out;
}
}
namespace {
#ifdef _WIN32
const char nullOutput[] = "nul";
#else
const char nullOutput[] = "/dev/null";
#endif
constexpr auto autoMode = Options::WriteMode::Auto;
} // anonymous namespace
#define EXPECT_SUCCESS(...) EXPECT_EQ(Options::Status::Success, __VA_ARGS__)
#define EXPECT_FAILURE(...) EXPECT_EQ(Options::Status::Failure, __VA_ARGS__)
#define EXPECT_MESSAGE(...) EXPECT_EQ(Options::Status::Message, __VA_ARGS__)
template <typename... Args>
std::array<const char *, sizeof...(Args) + 1> makeArray(Args... args) {
return {{nullptr, args...}};
}
TEST(Options, ValidInputs) {
{
Options options;
auto args = makeArray("--processes", "5", "-o", "x", "y", "-f");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {5, 23, 3, false, {"y"}, "x",
true, true, autoMode, true, 2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("-p", "1", "input", "-19");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {1, 23, 19, false, {"input"}, "",
false, true, autoMode, true, 2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args =
makeArray("--ultra", "-22", "-p", "1", "-o", "x", "-d", "x.zst", "-f");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {1, 0, 22, true, {"x.zst"}, "x",
true, true, autoMode, true, 2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("--processes", "100", "hello.zst", "--decompress",
"--force");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {100, 23, 3, true, {"hello.zst"}, "", true,
true, autoMode, true, 2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("x", "-dp", "1", "-c");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {1, 23, 3, true, {"x"}, "-",
false, true, autoMode, true, 2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("x", "-dp", "1", "--stdout");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {1, 23, 3, true, {"x"}, "-",
false, true, autoMode, true, 2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("-p", "1", "x", "-5", "-fo", "-", "--ultra", "-d");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {1, 0, 5, true, {"x"}, "-",
true, true, autoMode, true, 2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("silesia.tar", "-o", "silesia.tar.pzstd", "-p", "2");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {2,
23,
3,
false,
{"silesia.tar"},
"silesia.tar.pzstd",
false,
true,
autoMode,
true,
2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("x", "-p", "1");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "-p", "1");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
}
}
TEST(Options, GetOutputFile) {
{
Options options;
auto args = makeArray("x");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ("x.zst", options.getOutputFile(options.inputFiles[0]));
}
{
Options options;
auto args = makeArray("x", "y", "-o", nullOutput);
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(nullOutput, options.getOutputFile(options.inputFiles[0]));
}
{
Options options;
auto args = makeArray("x.zst", "-do", nullOutput);
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(nullOutput, options.getOutputFile(options.inputFiles[0]));
}
{
Options options;
auto args = makeArray("x.zst", "-d");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ("x", options.getOutputFile(options.inputFiles[0]));
}
{
Options options;
auto args = makeArray("xzst", "-d");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ("", options.getOutputFile(options.inputFiles[0]));
}
{
Options options;
auto args = makeArray("xzst", "-doxx");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ("xx", options.getOutputFile(options.inputFiles[0]));
}
}
TEST(Options, MultipleFiles) {
{
Options options;
auto args = makeArray("x", "y", "z");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected;
expected.inputFiles = {"x", "y", "z"};
expected.verbosity = 1;
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("x", "y", "z", "-o", nullOutput);
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected;
expected.inputFiles = {"x", "y", "z"};
expected.outputFile = nullOutput;
expected.verbosity = 1;
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("x", "y", "-o-");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "y", "-o", "file");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("-qqvd12qp4", "-f", "x", "--", "--rm", "-c");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {4, 23, 12, true, {"x", "--rm", "-c"},
"", true, true, autoMode, true,
0};
EXPECT_EQ(expected, options);
}
}
TEST(Options, NumThreads) {
{
Options options;
auto args = makeArray("x", "-dfo", "-");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "-p", "0", "-fo", "-");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("-f", "-p", "-o", "-");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
}
TEST(Options, BadCompressionLevel) {
{
Options options;
auto args = makeArray("x", "-20");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "--ultra", "-23");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "--1"); // negative 1?
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
}
TEST(Options, InvalidOption) {
{
Options options;
auto args = makeArray("x", "-x");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
}
TEST(Options, BadOutputFile) {
{
Options options;
auto args = makeArray("notzst", "-d", "-p", "1");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ("", options.getOutputFile(options.inputFiles.front()));
}
}
TEST(Options, BadOptionsWithArguments) {
{
Options options;
auto args = makeArray("x", "-pf");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "-p", "10f");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "-p");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "-o");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "-o");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
}
TEST(Options, KeepSource) {
{
Options options;
auto args = makeArray("x", "--rm", "-k");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.keepSource);
}
{
Options options;
auto args = makeArray("x", "--rm", "--keep");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.keepSource);
}
{
Options options;
auto args = makeArray("x");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.keepSource);
}
{
Options options;
auto args = makeArray("x", "--rm");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(false, options.keepSource);
}
}
TEST(Options, Verbosity) {
{
Options options;
auto args = makeArray("x");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(2, options.verbosity);
}
{
Options options;
auto args = makeArray("--quiet", "-qq", "x");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(-1, options.verbosity);
}
{
Options options;
auto args = makeArray("x", "y");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(1, options.verbosity);
}
{
Options options;
auto args = makeArray("--", "x", "y");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(1, options.verbosity);
}
{
Options options;
auto args = makeArray("-qv", "x", "y");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(1, options.verbosity);
}
{
Options options;
auto args = makeArray("-v", "x", "y");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(3, options.verbosity);
}
{
Options options;
auto args = makeArray("-v", "x");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(3, options.verbosity);
}
}
TEST(Options, TestMode) {
{
Options options;
auto args = makeArray("x", "-t");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.keepSource);
EXPECT_EQ(true, options.decompress);
EXPECT_EQ(nullOutput, options.outputFile);
}
{
Options options;
auto args = makeArray("x", "--test", "--rm", "-ohello");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.keepSource);
EXPECT_EQ(true, options.decompress);
EXPECT_EQ(nullOutput, options.outputFile);
}
}
TEST(Options, Checksum) {
{
Options options;
auto args = makeArray("x.zst", "--no-check", "-Cd");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.checksum);
}
{
Options options;
auto args = makeArray("x");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.checksum);
}
{
Options options;
auto args = makeArray("x", "--no-check", "--check");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.checksum);
}
{
Options options;
auto args = makeArray("x", "--no-check");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(false, options.checksum);
}
}
TEST(Options, InputFiles) {
{
Options options;
auto args = makeArray("-cd");
options.parse(args.size(), args.data());
EXPECT_EQ(1, options.inputFiles.size());
EXPECT_EQ("-", options.inputFiles[0]);
EXPECT_EQ("-", options.outputFile);
}
{
Options options;
auto args = makeArray();
options.parse(args.size(), args.data());
EXPECT_EQ(1, options.inputFiles.size());
EXPECT_EQ("-", options.inputFiles[0]);
EXPECT_EQ("-", options.outputFile);
}
{
Options options;
auto args = makeArray("-d");
options.parse(args.size(), args.data());
EXPECT_EQ(1, options.inputFiles.size());
EXPECT_EQ("-", options.inputFiles[0]);
EXPECT_EQ("-", options.outputFile);
}
{
Options options;
auto args = makeArray("x", "-");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
}
TEST(Options, InvalidOptions) {
{
Options options;
auto args = makeArray("-ibasdf");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("- ");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("-n15");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("-0", "x");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
}
TEST(Options, Extras) {
{
Options options;
auto args = makeArray("-h");
EXPECT_MESSAGE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("-H");
EXPECT_MESSAGE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("-V");
EXPECT_MESSAGE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("--help");
EXPECT_MESSAGE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("--version");
EXPECT_MESSAGE(options.parse(args.size(), args.data()));
}
}

View File

@@ -0,0 +1,147 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "Pzstd.h"
#include "datagen.h"
#include "test/RoundTrip.h"
#include "utils/ScopeGuard.h"
#include <cstddef>
#include <cstdio>
#include <gtest/gtest.h>
#include <memory>
#include <random>
using namespace std;
using namespace pzstd;
TEST(Pzstd, SmallSizes) {
unsigned seed = std::random_device{}();
std::fprintf(stderr, "Pzstd.SmallSizes seed: %u\n", seed);
std::mt19937 gen(seed);
for (unsigned len = 1; len < 256; ++len) {
if (len % 16 == 0) {
std::fprintf(stderr, "%u / 16\n", len / 16);
}
std::string inputFile = std::tmpnam(nullptr);
auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
{
static uint8_t buf[256];
RDG_genBuffer(buf, len, 0.5, 0.0, gen());
auto fd = std::fopen(inputFile.c_str(), "wb");
auto written = std::fwrite(buf, 1, len, fd);
std::fclose(fd);
ASSERT_EQ(written, len);
}
for (unsigned numThreads = 1; numThreads <= 2; ++numThreads) {
for (unsigned level = 1; level <= 4; level *= 4) {
auto errorGuard = makeScopeGuard([&] {
std::fprintf(stderr, "# threads: %u\n", numThreads);
std::fprintf(stderr, "compression level: %u\n", level);
});
Options options;
options.overwrite = true;
options.inputFiles = {inputFile};
options.numThreads = numThreads;
options.compressionLevel = level;
options.verbosity = 1;
ASSERT_TRUE(roundTrip(options));
errorGuard.dismiss();
}
}
}
}
TEST(Pzstd, LargeSizes) {
unsigned seed = std::random_device{}();
std::fprintf(stderr, "Pzstd.LargeSizes seed: %u\n", seed);
std::mt19937 gen(seed);
for (unsigned len = 1 << 20; len <= (1 << 24); len *= 2) {
std::string inputFile = std::tmpnam(nullptr);
auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
{
std::unique_ptr<uint8_t[]> buf(new uint8_t[len]);
RDG_genBuffer(buf.get(), len, 0.5, 0.0, gen());
auto fd = std::fopen(inputFile.c_str(), "wb");
auto written = std::fwrite(buf.get(), 1, len, fd);
std::fclose(fd);
ASSERT_EQ(written, len);
}
for (unsigned numThreads = 1; numThreads <= 16; numThreads *= 4) {
for (unsigned level = 1; level <= 4; level *= 4) {
auto errorGuard = makeScopeGuard([&] {
std::fprintf(stderr, "# threads: %u\n", numThreads);
std::fprintf(stderr, "compression level: %u\n", level);
});
Options options;
options.overwrite = true;
options.inputFiles = {inputFile};
options.numThreads = std::min(numThreads, options.numThreads);
options.compressionLevel = level;
options.verbosity = 1;
ASSERT_TRUE(roundTrip(options));
errorGuard.dismiss();
}
}
}
}
TEST(Pzstd, DISABLED_ExtremelyLargeSize) {
unsigned seed = std::random_device{}();
std::fprintf(stderr, "Pzstd.ExtremelyLargeSize seed: %u\n", seed);
std::mt19937 gen(seed);
std::string inputFile = std::tmpnam(nullptr);
auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
{
// Write 4GB + 64 MB
constexpr size_t kLength = 1 << 26;
std::unique_ptr<uint8_t[]> buf(new uint8_t[kLength]);
auto fd = std::fopen(inputFile.c_str(), "wb");
auto closeGuard = makeScopeGuard([&] { std::fclose(fd); });
for (size_t i = 0; i < (1 << 6) + 1; ++i) {
RDG_genBuffer(buf.get(), kLength, 0.5, 0.0, gen());
auto written = std::fwrite(buf.get(), 1, kLength, fd);
if (written != kLength) {
std::fprintf(stderr, "Failed to write file, skipping test\n");
return;
}
}
}
Options options;
options.overwrite = true;
options.inputFiles = {inputFile};
options.compressionLevel = 1;
if (options.numThreads == 0) {
options.numThreads = 1;
}
ASSERT_TRUE(roundTrip(options));
}
TEST(Pzstd, ExtremelyCompressible) {
std::string inputFile = std::tmpnam(nullptr);
auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
{
std::unique_ptr<uint8_t[]> buf(new uint8_t[10000]);
std::memset(buf.get(), 'a', 10000);
auto fd = std::fopen(inputFile.c_str(), "wb");
auto written = std::fwrite(buf.get(), 1, 10000, fd);
std::fclose(fd);
ASSERT_EQ(written, 10000);
}
Options options;
options.overwrite = true;
options.inputFiles = {inputFile};
options.numThreads = 1;
options.compressionLevel = 1;
ASSERT_TRUE(roundTrip(options));
}

View File

@@ -0,0 +1,86 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include "Options.h"
#include "Pzstd.h"
#include "utils/ScopeGuard.h"
#include <cstdio>
#include <string>
#include <cstdint>
#include <memory>
namespace pzstd {
inline bool check(std::string source, std::string decompressed) {
std::unique_ptr<std::uint8_t[]> sBuf(new std::uint8_t[1024]);
std::unique_ptr<std::uint8_t[]> dBuf(new std::uint8_t[1024]);
auto sFd = std::fopen(source.c_str(), "rb");
auto dFd = std::fopen(decompressed.c_str(), "rb");
auto guard = makeScopeGuard([&] {
std::fclose(sFd);
std::fclose(dFd);
});
size_t sRead, dRead;
do {
sRead = std::fread(sBuf.get(), 1, 1024, sFd);
dRead = std::fread(dBuf.get(), 1, 1024, dFd);
if (std::ferror(sFd) || std::ferror(dFd)) {
return false;
}
if (sRead != dRead) {
return false;
}
for (size_t i = 0; i < sRead; ++i) {
if (sBuf.get()[i] != dBuf.get()[i]) {
return false;
}
}
} while (sRead == 1024);
if (!std::feof(sFd) || !std::feof(dFd)) {
return false;
}
return true;
}
inline bool roundTrip(Options& options) {
if (options.inputFiles.size() != 1) {
return false;
}
std::string source = options.inputFiles.front();
std::string compressedFile = std::tmpnam(nullptr);
std::string decompressedFile = std::tmpnam(nullptr);
auto guard = makeScopeGuard([&] {
std::remove(compressedFile.c_str());
std::remove(decompressedFile.c_str());
});
{
options.outputFile = compressedFile;
options.decompress = false;
if (pzstdMain(options) != 0) {
return false;
}
}
{
options.decompress = true;
options.inputFiles.front() = compressedFile;
options.outputFile = decompressedFile;
if (pzstdMain(options) != 0) {
return false;
}
}
return check(source, decompressedFile);
}
}

Some files were not shown because too many files have changed in this diff Show More