dropshell release 2025.0513.2134
Some checks failed
Dropshell Test / Build_and_Test (push) Has been cancelled
Some checks failed
Dropshell Test / Build_and_Test (push) Has been cancelled
This commit is contained in:
42
build_amd64/_deps/zstd-src/contrib/seekable_format/README.md
Normal file
42
build_amd64/_deps/zstd-src/contrib/seekable_format/README.md
Normal file
@ -0,0 +1,42 @@
|
||||
# Zstandard Seekable Format
|
||||
|
||||
The seekable format splits compressed data into a series of independent "frames",
|
||||
each compressed individually,
|
||||
so that decompression of a section in the middle of an archive
|
||||
only requires zstd to decompress at most a frame's worth of extra data,
|
||||
instead of the entire archive.
|
||||
|
||||
The frames are appended, so that the decompression of the entire payload
|
||||
still regenerates the original content, using any compliant zstd decoder.
|
||||
|
||||
On top of that, the seekable format generates a jump table,
|
||||
which makes it possible to jump directly to the position of the relevant frame
|
||||
when requesting only a segment of the data.
|
||||
The jump table is simply ignored by zstd decoders unaware of the seekable format.
|
||||
|
||||
The format is delivered with an API to create seekable archives
|
||||
and to retrieve arbitrary segments inside the archive.
|
||||
|
||||
### Maximum Frame Size parameter
|
||||
|
||||
When creating a seekable archive, the main parameter is the maximum frame size.
|
||||
|
||||
At compression time, user can manually select the boundaries between segments,
|
||||
but they don't have to: long segments will be automatically split
|
||||
when larger than selected maximum frame size.
|
||||
|
||||
Small frame sizes reduce decompression cost when requesting small segments,
|
||||
because the decoder will nonetheless have to decompress an entire frame
|
||||
to recover just a single byte from it.
|
||||
|
||||
A good rule of thumb is to select a maximum frame size roughly equivalent
|
||||
to the access pattern when it's known.
|
||||
For example, if the application tends to request 4KB blocks,
|
||||
then it's a good idea to set a maximum frame size in the vicinity of 4 KB.
|
||||
|
||||
But small frame sizes also reduce compression ratio,
|
||||
and increase the cost for the jump table,
|
||||
so there is a balance to find.
|
||||
|
||||
In general, try to avoid really tiny frame sizes (<1 KB),
|
||||
which would have a large negative impact on compression ratio.
|
5
build_amd64/_deps/zstd-src/contrib/seekable_format/examples/.gitignore
vendored
Normal file
5
build_amd64/_deps/zstd-src/contrib/seekable_format/examples/.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
seekable_compression
|
||||
seekable_decompression
|
||||
seekable_decompression_mem
|
||||
parallel_processing
|
||||
parallel_compression
|
@ -0,0 +1,214 @@
|
||||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
*/
|
||||
|
||||
#include <stdlib.h> // malloc, free, exit, atoi
|
||||
#include <stdio.h> // fprintf, perror, feof, fopen, etc.
|
||||
#include <string.h> // strlen, memset, strcat
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include <zstd_errors.h>
|
||||
#if defined(WIN32) || defined(_WIN32)
|
||||
# include <windows.h>
|
||||
# define SLEEP(x) Sleep(x)
|
||||
#else
|
||||
# include <unistd.h>
|
||||
# define SLEEP(x) usleep(x * 1000)
|
||||
#endif
|
||||
|
||||
#include "xxhash.h"
|
||||
|
||||
#include "pool.h" // use zstd thread pool for demo
|
||||
|
||||
#include "../zstd_seekable.h"
|
||||
|
||||
static void* malloc_orDie(size_t size)
|
||||
{
|
||||
void* const buff = malloc(size);
|
||||
if (buff) return buff;
|
||||
/* error */
|
||||
perror("malloc:");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static FILE* fopen_orDie(const char *filename, const char *instruction)
|
||||
{
|
||||
FILE* const inFile = fopen(filename, instruction);
|
||||
if (inFile) return inFile;
|
||||
/* error */
|
||||
perror(filename);
|
||||
exit(3);
|
||||
}
|
||||
|
||||
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
|
||||
{
|
||||
size_t const readSize = fread(buffer, 1, sizeToRead, file);
|
||||
if (readSize == sizeToRead) return readSize; /* good */
|
||||
if (feof(file)) return readSize; /* good, reached end of file */
|
||||
/* error */
|
||||
perror("fread");
|
||||
exit(4);
|
||||
}
|
||||
|
||||
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
|
||||
{
|
||||
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
|
||||
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
|
||||
/* error */
|
||||
perror("fwrite");
|
||||
exit(5);
|
||||
}
|
||||
|
||||
static size_t fclose_orDie(FILE* file)
|
||||
{
|
||||
if (!fclose(file)) return 0;
|
||||
/* error */
|
||||
perror("fclose");
|
||||
exit(6);
|
||||
}
|
||||
|
||||
static void fseek_orDie(FILE* file, long int offset, int origin)
|
||||
{
|
||||
if (!fseek(file, offset, origin)) {
|
||||
if (!fflush(file)) return;
|
||||
}
|
||||
/* error */
|
||||
perror("fseek");
|
||||
exit(7);
|
||||
}
|
||||
|
||||
static long int ftell_orDie(FILE* file)
|
||||
{
|
||||
long int off = ftell(file);
|
||||
if (off != -1) return off;
|
||||
/* error */
|
||||
perror("ftell");
|
||||
exit(8);
|
||||
}
|
||||
|
||||
struct job {
|
||||
const void* src;
|
||||
size_t srcSize;
|
||||
void* dst;
|
||||
size_t dstSize;
|
||||
|
||||
unsigned checksum;
|
||||
|
||||
int compressionLevel;
|
||||
int done;
|
||||
};
|
||||
|
||||
static void compressFrame(void* opaque)
|
||||
{
|
||||
struct job* job = opaque;
|
||||
|
||||
job->checksum = XXH64(job->src, job->srcSize, 0);
|
||||
|
||||
size_t ret = ZSTD_compress(job->dst, job->dstSize, job->src, job->srcSize, job->compressionLevel);
|
||||
if (ZSTD_isError(ret)) {
|
||||
fprintf(stderr, "ZSTD_compress() error : %s \n", ZSTD_getErrorName(ret));
|
||||
exit(20);
|
||||
}
|
||||
|
||||
job->dstSize = ret;
|
||||
job->done = 1;
|
||||
}
|
||||
|
||||
static void compressFile_orDie(const char* fname, const char* outName, int cLevel, unsigned frameSize, int nbThreads)
|
||||
{
|
||||
POOL_ctx* pool = POOL_create(nbThreads, nbThreads);
|
||||
if (pool == NULL) { fprintf(stderr, "POOL_create() error \n"); exit(9); }
|
||||
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
FILE* const fout = fopen_orDie(outName, "wb");
|
||||
|
||||
if (ZSTD_compressBound(frameSize) > 0xFFFFFFFFU) { fprintf(stderr, "Frame size too large \n"); exit(10); }
|
||||
unsigned dstSize = ZSTD_compressBound(frameSize);
|
||||
|
||||
|
||||
fseek_orDie(fin, 0, SEEK_END);
|
||||
long int length = ftell_orDie(fin);
|
||||
fseek_orDie(fin, 0, SEEK_SET);
|
||||
|
||||
size_t numFrames = (length + frameSize - 1) / frameSize;
|
||||
|
||||
struct job* jobs = malloc_orDie(sizeof(struct job) * numFrames);
|
||||
|
||||
size_t i;
|
||||
for(i = 0; i < numFrames; i++) {
|
||||
void* in = malloc_orDie(frameSize);
|
||||
void* out = malloc_orDie(dstSize);
|
||||
|
||||
size_t inSize = fread_orDie(in, frameSize, fin);
|
||||
|
||||
jobs[i].src = in;
|
||||
jobs[i].srcSize = inSize;
|
||||
jobs[i].dst = out;
|
||||
jobs[i].dstSize = dstSize;
|
||||
jobs[i].compressionLevel = cLevel;
|
||||
jobs[i].done = 0;
|
||||
POOL_add(pool, compressFrame, &jobs[i]);
|
||||
}
|
||||
|
||||
ZSTD_frameLog* fl = ZSTD_seekable_createFrameLog(1);
|
||||
if (fl == NULL) { fprintf(stderr, "ZSTD_seekable_createFrameLog() failed \n"); exit(11); }
|
||||
for (i = 0; i < numFrames; i++) {
|
||||
while (!jobs[i].done) SLEEP(5); /* wake up every 5 milliseconds to check */
|
||||
fwrite_orDie(jobs[i].dst, jobs[i].dstSize, fout);
|
||||
free((void*)jobs[i].src);
|
||||
free(jobs[i].dst);
|
||||
|
||||
size_t ret = ZSTD_seekable_logFrame(fl, jobs[i].dstSize, jobs[i].srcSize, jobs[i].checksum);
|
||||
if (ZSTD_isError(ret)) { fprintf(stderr, "ZSTD_seekable_logFrame() error : %s \n", ZSTD_getErrorName(ret)); }
|
||||
}
|
||||
|
||||
{ unsigned char seekTableBuff[1024];
|
||||
ZSTD_outBuffer out = {seekTableBuff, 1024, 0};
|
||||
while (ZSTD_seekable_writeSeekTable(fl, &out) != 0) {
|
||||
fwrite_orDie(seekTableBuff, out.pos, fout);
|
||||
out.pos = 0;
|
||||
}
|
||||
fwrite_orDie(seekTableBuff, out.pos, fout);
|
||||
}
|
||||
|
||||
ZSTD_seekable_freeFrameLog(fl);
|
||||
free(jobs);
|
||||
fclose_orDie(fout);
|
||||
fclose_orDie(fin);
|
||||
}
|
||||
|
||||
static const char* createOutFilename_orDie(const char* filename)
|
||||
{
|
||||
size_t const inL = strlen(filename);
|
||||
size_t const outL = inL + 5;
|
||||
void* outSpace = malloc_orDie(outL);
|
||||
memset(outSpace, 0, outL);
|
||||
strcat(outSpace, filename);
|
||||
strcat(outSpace, ".zst");
|
||||
return (const char*)outSpace;
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv) {
|
||||
const char* const exeName = argv[0];
|
||||
if (argc!=4) {
|
||||
printf("wrong arguments\n");
|
||||
printf("usage:\n");
|
||||
printf("%s FILE FRAME_SIZE NB_THREADS\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
{ const char* const inFileName = argv[1];
|
||||
unsigned const frameSize = (unsigned)atoi(argv[2]);
|
||||
int const nbThreads = atoi(argv[3]);
|
||||
|
||||
const char* const outFileName = createOutFilename_orDie(inFileName);
|
||||
compressFile_orDie(inFileName, outFileName, 5, frameSize, nbThreads);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,194 @@
|
||||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
*/
|
||||
|
||||
/*
|
||||
* A simple demo that sums up all the bytes in the file in parallel using
|
||||
* seekable decompression and the zstd thread pool
|
||||
*/
|
||||
|
||||
#include <stdlib.h> // malloc, exit
|
||||
#include <stdio.h> // fprintf, perror, feof
|
||||
#include <string.h> // strerror
|
||||
#include <errno.h> // errno
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include <zstd_errors.h>
|
||||
#if defined(_WIN32)
|
||||
# include <windows.h>
|
||||
# define SLEEP(x) Sleep(x)
|
||||
#else
|
||||
# include <unistd.h>
|
||||
# define SLEEP(x) usleep(x * 1000)
|
||||
#endif
|
||||
|
||||
#include "pool.h" // use zstd thread pool for demo
|
||||
|
||||
#include "../zstd_seekable.h"
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
static void* malloc_orDie(size_t size)
|
||||
{
|
||||
void* const buff = malloc(size);
|
||||
if (buff) return buff;
|
||||
/* error */
|
||||
perror("malloc");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void* realloc_orDie(void* ptr, size_t size)
|
||||
{
|
||||
ptr = realloc(ptr, size);
|
||||
if (ptr) return ptr;
|
||||
/* error */
|
||||
perror("realloc");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static FILE* fopen_orDie(const char *filename, const char *instruction)
|
||||
{
|
||||
FILE* const inFile = fopen(filename, instruction);
|
||||
if (inFile) return inFile;
|
||||
/* error */
|
||||
perror(filename);
|
||||
exit(3);
|
||||
}
|
||||
|
||||
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
|
||||
{
|
||||
size_t const readSize = fread(buffer, 1, sizeToRead, file);
|
||||
if (readSize == sizeToRead) return readSize; /* good */
|
||||
if (feof(file)) return readSize; /* good, reached end of file */
|
||||
/* error */
|
||||
perror("fread");
|
||||
exit(4);
|
||||
}
|
||||
|
||||
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
|
||||
{
|
||||
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
|
||||
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
|
||||
/* error */
|
||||
perror("fwrite");
|
||||
exit(5);
|
||||
}
|
||||
|
||||
static size_t fclose_orDie(FILE* file)
|
||||
{
|
||||
if (!fclose(file)) return 0;
|
||||
/* error */
|
||||
perror("fclose");
|
||||
exit(6);
|
||||
}
|
||||
|
||||
static void fseek_orDie(FILE* file, long int offset, int origin) {
|
||||
if (!fseek(file, offset, origin)) {
|
||||
if (!fflush(file)) return;
|
||||
}
|
||||
/* error */
|
||||
perror("fseek");
|
||||
exit(7);
|
||||
}
|
||||
|
||||
struct sum_job {
|
||||
const char* fname;
|
||||
unsigned long long sum;
|
||||
unsigned frameNb;
|
||||
int done;
|
||||
};
|
||||
|
||||
static void sumFrame(void* opaque)
|
||||
{
|
||||
struct sum_job* job = (struct sum_job*)opaque;
|
||||
job->done = 0;
|
||||
|
||||
FILE* const fin = fopen_orDie(job->fname, "rb");
|
||||
|
||||
ZSTD_seekable* const seekable = ZSTD_seekable_create();
|
||||
if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
|
||||
|
||||
size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
|
||||
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
|
||||
|
||||
size_t const frameSize = ZSTD_seekable_getFrameDecompressedSize(seekable, job->frameNb);
|
||||
unsigned char* data = malloc_orDie(frameSize);
|
||||
|
||||
size_t result = ZSTD_seekable_decompressFrame(seekable, data, frameSize, job->frameNb);
|
||||
if (ZSTD_isError(result)) { fprintf(stderr, "ZSTD_seekable_decompressFrame() error : %s \n", ZSTD_getErrorName(result)); exit(12); }
|
||||
|
||||
unsigned long long sum = 0;
|
||||
size_t i;
|
||||
for (i = 0; i < frameSize; i++) {
|
||||
sum += data[i];
|
||||
}
|
||||
job->sum = sum;
|
||||
job->done = 1;
|
||||
|
||||
fclose(fin);
|
||||
ZSTD_seekable_free(seekable);
|
||||
free(data);
|
||||
}
|
||||
|
||||
static void sumFile_orDie(const char* fname, int nbThreads)
|
||||
{
|
||||
POOL_ctx* pool = POOL_create(nbThreads, nbThreads);
|
||||
if (pool == NULL) { fprintf(stderr, "POOL_create() error \n"); exit(9); }
|
||||
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
|
||||
ZSTD_seekable* const seekable = ZSTD_seekable_create();
|
||||
if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
|
||||
|
||||
size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
|
||||
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
|
||||
|
||||
unsigned const numFrames = ZSTD_seekable_getNumFrames(seekable);
|
||||
struct sum_job* jobs = (struct sum_job*)malloc(numFrames * sizeof(struct sum_job));
|
||||
|
||||
unsigned fnb;
|
||||
for (fnb = 0; fnb < numFrames; fnb++) {
|
||||
jobs[fnb] = (struct sum_job){ fname, 0, fnb, 0 };
|
||||
POOL_add(pool, sumFrame, &jobs[fnb]);
|
||||
}
|
||||
|
||||
unsigned long long total = 0;
|
||||
|
||||
for (fnb = 0; fnb < numFrames; fnb++) {
|
||||
while (!jobs[fnb].done) SLEEP(5); /* wake up every 5 milliseconds to check */
|
||||
total += jobs[fnb].sum;
|
||||
}
|
||||
|
||||
printf("Sum: %llu\n", total);
|
||||
|
||||
POOL_free(pool);
|
||||
ZSTD_seekable_free(seekable);
|
||||
fclose(fin);
|
||||
free(jobs);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc!=3) {
|
||||
fprintf(stderr, "wrong arguments\n");
|
||||
fprintf(stderr, "usage:\n");
|
||||
fprintf(stderr, "%s FILE NB_THREADS\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
{
|
||||
const char* const inFilename = argv[1];
|
||||
int const nbThreads = atoi(argv[2]);
|
||||
sumFile_orDie(inFilename, nbThreads);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,136 @@
|
||||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
*/
|
||||
|
||||
#include <stdlib.h> // malloc, free, exit, atoi
|
||||
#include <stdio.h> // fprintf, perror, feof, fopen, etc.
|
||||
#include <string.h> // strlen, memset, strcat
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
|
||||
#include "../zstd_seekable.h"
|
||||
|
||||
static void* malloc_orDie(size_t size)
|
||||
{
|
||||
void* const buff = malloc(size);
|
||||
if (buff) return buff;
|
||||
/* error */
|
||||
perror("malloc:");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static FILE* fopen_orDie(const char *filename, const char *instruction)
|
||||
{
|
||||
FILE* const inFile = fopen(filename, instruction);
|
||||
if (inFile) return inFile;
|
||||
/* error */
|
||||
perror(filename);
|
||||
exit(3);
|
||||
}
|
||||
|
||||
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
|
||||
{
|
||||
size_t const readSize = fread(buffer, 1, sizeToRead, file);
|
||||
if (readSize == sizeToRead) return readSize; /* good */
|
||||
if (feof(file)) return readSize; /* good, reached end of file */
|
||||
/* error */
|
||||
perror("fread");
|
||||
exit(4);
|
||||
}
|
||||
|
||||
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
|
||||
{
|
||||
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
|
||||
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
|
||||
/* error */
|
||||
perror("fwrite");
|
||||
exit(5);
|
||||
}
|
||||
|
||||
static size_t fclose_orDie(FILE* file)
|
||||
{
|
||||
if (!fclose(file)) return 0;
|
||||
/* error */
|
||||
perror("fclose");
|
||||
exit(6);
|
||||
}
|
||||
|
||||
static void compressFile_orDie(const char* fname, const char* outName, int cLevel, unsigned frameSize)
|
||||
{
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
FILE* const fout = fopen_orDie(outName, "wb");
|
||||
size_t const buffInSize = ZSTD_CStreamInSize(); /* can always read one full block */
|
||||
void* const buffIn = malloc_orDie(buffInSize);
|
||||
size_t const buffOutSize = ZSTD_CStreamOutSize(); /* can always flush a full block */
|
||||
void* const buffOut = malloc_orDie(buffOutSize);
|
||||
|
||||
ZSTD_seekable_CStream* const cstream = ZSTD_seekable_createCStream();
|
||||
if (cstream==NULL) { fprintf(stderr, "ZSTD_seekable_createCStream() error \n"); exit(10); }
|
||||
size_t const initResult = ZSTD_seekable_initCStream(cstream, cLevel, 1, frameSize);
|
||||
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_initCStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
|
||||
|
||||
size_t read, toRead = buffInSize;
|
||||
while( (read = fread_orDie(buffIn, toRead, fin)) ) {
|
||||
ZSTD_inBuffer input = { buffIn, read, 0 };
|
||||
while (input.pos < input.size) {
|
||||
ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
|
||||
toRead = ZSTD_seekable_compressStream(cstream, &output , &input); /* toRead is guaranteed to be <= ZSTD_CStreamInSize() */
|
||||
if (ZSTD_isError(toRead)) { fprintf(stderr, "ZSTD_seekable_compressStream() error : %s \n", ZSTD_getErrorName(toRead)); exit(12); }
|
||||
if (toRead > buffInSize) toRead = buffInSize; /* Safely handle case when `buffInSize` is manually changed to a value < ZSTD_CStreamInSize()*/
|
||||
fwrite_orDie(buffOut, output.pos, fout);
|
||||
}
|
||||
}
|
||||
|
||||
while (1) {
|
||||
ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
|
||||
size_t const remainingToFlush = ZSTD_seekable_endStream(cstream, &output); /* close stream */
|
||||
if (ZSTD_isError(remainingToFlush)) { fprintf(stderr, "ZSTD_seekable_endStream() error : %s \n", ZSTD_getErrorName(remainingToFlush)); exit(13); }
|
||||
fwrite_orDie(buffOut, output.pos, fout);
|
||||
if (!remainingToFlush) break;
|
||||
}
|
||||
|
||||
ZSTD_seekable_freeCStream(cstream);
|
||||
fclose_orDie(fout);
|
||||
fclose_orDie(fin);
|
||||
free(buffIn);
|
||||
free(buffOut);
|
||||
}
|
||||
|
||||
static char* createOutFilename_orDie(const char* filename)
|
||||
{
|
||||
size_t const inL = strlen(filename);
|
||||
size_t const outL = inL + 5;
|
||||
void* outSpace = malloc_orDie(outL);
|
||||
memset(outSpace, 0, outL);
|
||||
strcat(outSpace, filename);
|
||||
strcat(outSpace, ".zst");
|
||||
return (char*)outSpace;
|
||||
}
|
||||
|
||||
#define CLEVEL_DEFAULT 5
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
if (argc<3 || argc>4) {
|
||||
printf("wrong arguments \n");
|
||||
printf("usage: \n");
|
||||
printf("%s FILE FRAME_SIZE [LEVEL] \n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
{ const char* const inFileName = argv[1];
|
||||
unsigned const frameSize = (unsigned)atoi(argv[2]);
|
||||
int const cLevel = (argc==4) ? atoi(argv[3]) : CLEVEL_DEFAULT;
|
||||
|
||||
char* const outFileName = createOutFilename_orDie(inFileName);
|
||||
compressFile_orDie(inFileName, outFileName, cLevel, frameSize);
|
||||
free(outFileName);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,141 @@
|
||||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h> // malloc, exit
|
||||
#include <stdio.h> // fprintf, perror, feof
|
||||
#include <string.h> // strerror
|
||||
#include <errno.h> // errno
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include <zstd_errors.h>
|
||||
|
||||
#include "../zstd_seekable.h"
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
static void* malloc_orDie(size_t size)
|
||||
{
|
||||
void* const buff = malloc(size);
|
||||
if (buff) return buff;
|
||||
/* error */
|
||||
perror("malloc");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void* realloc_orDie(void* ptr, size_t size)
|
||||
{
|
||||
ptr = realloc(ptr, size);
|
||||
if (ptr) return ptr;
|
||||
/* error */
|
||||
perror("realloc");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static FILE* fopen_orDie(const char *filename, const char *instruction)
|
||||
{
|
||||
FILE* const inFile = fopen(filename, instruction);
|
||||
if (inFile) return inFile;
|
||||
/* error */
|
||||
perror(filename);
|
||||
exit(3);
|
||||
}
|
||||
|
||||
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
|
||||
{
|
||||
size_t const readSize = fread(buffer, 1, sizeToRead, file);
|
||||
if (readSize == sizeToRead) return readSize; /* good */
|
||||
if (feof(file)) return readSize; /* good, reached end of file */
|
||||
/* error */
|
||||
perror("fread");
|
||||
exit(4);
|
||||
}
|
||||
|
||||
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
|
||||
{
|
||||
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
|
||||
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
|
||||
/* error */
|
||||
perror("fwrite");
|
||||
exit(5);
|
||||
}
|
||||
|
||||
static size_t fclose_orDie(FILE* file)
|
||||
{
|
||||
if (!fclose(file)) return 0;
|
||||
/* error */
|
||||
perror("fclose");
|
||||
exit(6);
|
||||
}
|
||||
|
||||
static void fseek_orDie(FILE* file, long int offset, int origin) {
|
||||
if (!fseek(file, offset, origin)) {
|
||||
if (!fflush(file)) return;
|
||||
}
|
||||
/* error */
|
||||
perror("fseek");
|
||||
exit(7);
|
||||
}
|
||||
|
||||
|
||||
static void decompressFile_orDie(const char* fname, off_t startOffset, off_t endOffset)
|
||||
{
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
FILE* const fout = stdout;
|
||||
size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */
|
||||
void* const buffOut = malloc_orDie(buffOutSize);
|
||||
|
||||
ZSTD_seekable* const seekable = ZSTD_seekable_create();
|
||||
if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
|
||||
|
||||
size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
|
||||
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
|
||||
|
||||
while (startOffset < endOffset) {
|
||||
size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);
|
||||
if (!result) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (ZSTD_isError(result)) {
|
||||
fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
|
||||
ZSTD_getErrorName(result));
|
||||
exit(12);
|
||||
}
|
||||
fwrite_orDie(buffOut, result, fout);
|
||||
startOffset += result;
|
||||
}
|
||||
|
||||
ZSTD_seekable_free(seekable);
|
||||
fclose_orDie(fin);
|
||||
fclose_orDie(fout);
|
||||
free(buffOut);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc!=4) {
|
||||
fprintf(stderr, "wrong arguments\n");
|
||||
fprintf(stderr, "usage:\n");
|
||||
fprintf(stderr, "%s FILE START END\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
{
|
||||
const char* const inFilename = argv[1];
|
||||
off_t const startOffset = atoll(argv[2]);
|
||||
off_t const endOffset = atoll(argv[3]);
|
||||
decompressFile_orDie(inFilename, startOffset, endOffset);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,147 @@
|
||||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h> // malloc, exit
|
||||
#include <stdio.h> // fprintf, perror, feof
|
||||
#include <string.h> // strerror
|
||||
#include <errno.h> // errno
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include <zstd.h> // presumes zstd library is installed
|
||||
#include <zstd_errors.h>
|
||||
|
||||
#include "zstd_seekable.h"
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
#define MAX_FILE_SIZE (8 * 1024 * 1024)
|
||||
|
||||
static void* malloc_orDie(size_t size)
|
||||
{
|
||||
void* const buff = malloc(size);
|
||||
if (buff) return buff;
|
||||
/* error */
|
||||
perror("malloc");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void* realloc_orDie(void* ptr, size_t size)
|
||||
{
|
||||
ptr = realloc(ptr, size);
|
||||
if (ptr) return ptr;
|
||||
/* error */
|
||||
perror("realloc");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static FILE* fopen_orDie(const char *filename, const char *instruction)
|
||||
{
|
||||
FILE* const inFile = fopen(filename, instruction);
|
||||
if (inFile) return inFile;
|
||||
/* error */
|
||||
perror(filename);
|
||||
exit(3);
|
||||
}
|
||||
|
||||
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
|
||||
{
|
||||
size_t const readSize = fread(buffer, 1, sizeToRead, file);
|
||||
if (readSize == sizeToRead) return readSize; /* good */
|
||||
if (feof(file)) return readSize; /* good, reached end of file */
|
||||
/* error */
|
||||
perror("fread");
|
||||
exit(4);
|
||||
}
|
||||
|
||||
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
|
||||
{
|
||||
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
|
||||
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
|
||||
/* error */
|
||||
perror("fwrite");
|
||||
exit(5);
|
||||
}
|
||||
|
||||
static size_t fclose_orDie(FILE* file)
|
||||
{
|
||||
if (!fclose(file)) return 0;
|
||||
/* error */
|
||||
perror("fclose");
|
||||
exit(6);
|
||||
}
|
||||
|
||||
static void fseek_orDie(FILE* file, long int offset, int origin) {
|
||||
if (!fseek(file, offset, origin)) {
|
||||
if (!fflush(file)) return;
|
||||
}
|
||||
/* error */
|
||||
perror("fseek");
|
||||
exit(7);
|
||||
}
|
||||
|
||||
|
||||
static void decompressFile_orDie(const char* fname, off_t startOffset, off_t endOffset)
|
||||
{
|
||||
FILE* const fin = fopen_orDie(fname, "rb");
|
||||
FILE* const fout = stdout;
|
||||
// Just for demo purposes, assume file is <= MAX_FILE_SIZE
|
||||
void* const buffIn = malloc_orDie(MAX_FILE_SIZE);
|
||||
size_t const inSize = fread_orDie(buffIn, MAX_FILE_SIZE, fin);
|
||||
size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */
|
||||
void* const buffOut = malloc_orDie(buffOutSize);
|
||||
|
||||
ZSTD_seekable* const seekable = ZSTD_seekable_create();
|
||||
if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
|
||||
|
||||
size_t const initResult = ZSTD_seekable_initBuff(seekable, buffIn, inSize);
|
||||
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
|
||||
|
||||
while (startOffset < endOffset) {
|
||||
size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);
|
||||
if (!result) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (ZSTD_isError(result)) {
|
||||
fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
|
||||
ZSTD_getErrorName(result));
|
||||
exit(12);
|
||||
}
|
||||
fwrite_orDie(buffOut, result, fout);
|
||||
startOffset += result;
|
||||
}
|
||||
|
||||
ZSTD_seekable_free(seekable);
|
||||
fclose_orDie(fin);
|
||||
fclose_orDie(fout);
|
||||
free(buffIn);
|
||||
free(buffOut);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc!=4) {
|
||||
fprintf(stderr, "wrong arguments\n");
|
||||
fprintf(stderr, "usage:\n");
|
||||
fprintf(stderr, "%s FILE START END\n", exeName);
|
||||
return 1;
|
||||
}
|
||||
|
||||
{
|
||||
const char* const inFilename = argv[1];
|
||||
off_t const startOffset = atoll(argv[2]);
|
||||
off_t const endOffset = atoll(argv[3]);
|
||||
decompressFile_orDie(inFilename, startOffset, endOffset);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
1
build_amd64/_deps/zstd-src/contrib/seekable_format/tests/.gitignore
vendored
Normal file
1
build_amd64/_deps/zstd-src/contrib/seekable_format/tests/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
seekable_tests
|
@ -0,0 +1,363 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h> // malloc
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../zstd_seekable.h"
|
||||
|
||||
|
||||
/* ZSTD_seekable_customFile implementation that reads/seeks a buffer while keeping track of total bytes read */
|
||||
typedef struct {
|
||||
const void *ptr;
|
||||
size_t size;
|
||||
size_t pos;
|
||||
size_t totalRead;
|
||||
} buffWrapperWithTotal_t;
|
||||
|
||||
static int readBuffWithTotal(void* opaque, void* buffer, size_t n)
|
||||
{
|
||||
buffWrapperWithTotal_t* const buff = (buffWrapperWithTotal_t*)opaque;
|
||||
assert(buff != NULL);
|
||||
if (buff->pos + n > buff->size) return -1;
|
||||
memcpy(buffer, (const char*)buff->ptr + buff->pos, n);
|
||||
buff->pos += n;
|
||||
buff->totalRead += n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int seekBuffWithTotal(void* opaque, long long offset, int origin)
|
||||
{
|
||||
buffWrapperWithTotal_t* const buff = (buffWrapperWithTotal_t*) opaque;
|
||||
unsigned long long newOffset;
|
||||
assert(buff != NULL);
|
||||
switch (origin) {
|
||||
case SEEK_SET:
|
||||
assert(offset >= 0);
|
||||
newOffset = (unsigned long long)offset;
|
||||
break;
|
||||
case SEEK_CUR:
|
||||
newOffset = (unsigned long long)((long long)buff->pos + offset);
|
||||
break;
|
||||
case SEEK_END:
|
||||
newOffset = (unsigned long long)((long long)buff->size + offset);
|
||||
break;
|
||||
default:
|
||||
assert(0); /* not possible */
|
||||
}
|
||||
if (newOffset > buff->size) {
|
||||
return -1;
|
||||
}
|
||||
buff->pos = newOffset;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Basic unit tests for zstd seekable format */
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
unsigned testNb = 1;
|
||||
(void)argc; (void)argv;
|
||||
printf("Beginning zstd seekable format tests...\n");
|
||||
|
||||
printf("Test %u - simple round trip: ", testNb++);
|
||||
{ size_t const inSize = 4000;
|
||||
void* const inBuffer = malloc(inSize);
|
||||
assert(inBuffer != NULL);
|
||||
|
||||
size_t const seekCapacity = 5000;
|
||||
void* const seekBuffer = malloc(seekCapacity);
|
||||
assert(seekBuffer != NULL);
|
||||
size_t seekSize;
|
||||
|
||||
size_t const outCapacity = inSize;
|
||||
void* const outBuffer = malloc(outCapacity);
|
||||
assert(outBuffer != NULL);
|
||||
|
||||
ZSTD_seekable_CStream* const zscs = ZSTD_seekable_createCStream();
|
||||
assert(zscs != NULL);
|
||||
|
||||
{ size_t const initStatus = ZSTD_seekable_initCStream(zscs, 9, 0 /* checksumFlag */, (unsigned)inSize /* maxFrameSize */);
|
||||
assert(!ZSTD_isError(initStatus));
|
||||
}
|
||||
|
||||
{ ZSTD_outBuffer outb = { .dst=seekBuffer, .pos=0, .size=seekCapacity };
|
||||
ZSTD_inBuffer inb = { .src=inBuffer, .pos=0, .size=inSize };
|
||||
|
||||
size_t const cStatus = ZSTD_seekable_compressStream(zscs, &outb, &inb);
|
||||
assert(!ZSTD_isError(cStatus));
|
||||
assert(inb.pos == inb.size);
|
||||
|
||||
size_t const endStatus = ZSTD_seekable_endStream(zscs, &outb);
|
||||
assert(!ZSTD_isError(endStatus));
|
||||
seekSize = outb.pos;
|
||||
}
|
||||
|
||||
ZSTD_seekable* const stream = ZSTD_seekable_create();
|
||||
assert(stream != NULL);
|
||||
{ size_t const initStatus = ZSTD_seekable_initBuff(stream, seekBuffer, seekSize);
|
||||
assert(!ZSTD_isError(initStatus)); }
|
||||
|
||||
{ size_t const decStatus = ZSTD_seekable_decompress(stream, outBuffer, outCapacity, 0);
|
||||
assert(decStatus == inSize); }
|
||||
|
||||
/* unit test ZSTD_seekTable functions */
|
||||
ZSTD_seekTable* const zst = ZSTD_seekTable_create_fromSeekable(stream);
|
||||
assert(zst != NULL);
|
||||
|
||||
unsigned const nbFrames = ZSTD_seekTable_getNumFrames(zst);
|
||||
assert(nbFrames > 0);
|
||||
|
||||
unsigned long long const frame0Offset = ZSTD_seekTable_getFrameCompressedOffset(zst, 0);
|
||||
assert(frame0Offset == 0);
|
||||
|
||||
unsigned long long const content0Offset = ZSTD_seekTable_getFrameDecompressedOffset(zst, 0);
|
||||
assert(content0Offset == 0);
|
||||
|
||||
size_t const cSize = ZSTD_seekTable_getFrameCompressedSize(zst, 0);
|
||||
assert(!ZSTD_isError(cSize));
|
||||
assert(cSize <= seekCapacity);
|
||||
|
||||
size_t const origSize = ZSTD_seekTable_getFrameDecompressedSize(zst, 0);
|
||||
assert(origSize == inSize);
|
||||
|
||||
unsigned const fo1idx = ZSTD_seekTable_offsetToFrameIndex(zst, 1);
|
||||
assert(fo1idx == 0);
|
||||
|
||||
free(inBuffer);
|
||||
free(seekBuffer);
|
||||
free(outBuffer);
|
||||
ZSTD_seekable_freeCStream(zscs);
|
||||
ZSTD_seekTable_free(zst);
|
||||
ZSTD_seekable_free(stream);
|
||||
}
|
||||
printf("Success!\n");
|
||||
|
||||
|
||||
printf("Test %u - check that seekable decompress does not hang: ", testNb++);
|
||||
{ /* Github issue #2335 */
|
||||
const size_t compressed_size = 17;
|
||||
const uint8_t compressed_data[17] = {
|
||||
'^',
|
||||
'*',
|
||||
'M',
|
||||
'\x18',
|
||||
'\t',
|
||||
'\x00',
|
||||
'\x00',
|
||||
'\x00',
|
||||
'\x00',
|
||||
'\x00',
|
||||
'\x00',
|
||||
'\x00',
|
||||
(uint8_t)('\x03'),
|
||||
(uint8_t)('\xb1'),
|
||||
(uint8_t)('\xea'),
|
||||
(uint8_t)('\x92'),
|
||||
(uint8_t)('\x8f'),
|
||||
};
|
||||
const size_t uncompressed_size = 32;
|
||||
uint8_t uncompressed_data[32];
|
||||
|
||||
ZSTD_seekable* const stream = ZSTD_seekable_create();
|
||||
assert(stream != NULL);
|
||||
{ size_t const status = ZSTD_seekable_initBuff(stream, compressed_data, compressed_size);
|
||||
if (ZSTD_isError(status)) {
|
||||
ZSTD_seekable_free(stream);
|
||||
goto _test_error;
|
||||
} }
|
||||
|
||||
/* Should return an error, but not hang */
|
||||
{ const size_t offset = 2;
|
||||
size_t const status = ZSTD_seekable_decompress(stream, uncompressed_data, uncompressed_size, offset);
|
||||
if (!ZSTD_isError(status)) {
|
||||
ZSTD_seekable_free(stream);
|
||||
goto _test_error;
|
||||
} }
|
||||
|
||||
ZSTD_seekable_free(stream);
|
||||
}
|
||||
printf("Success!\n");
|
||||
|
||||
printf("Test %u - check #2 that seekable decompress does not hang: ", testNb++);
|
||||
{ /* Github issue #FIXME */
|
||||
const size_t compressed_size = 27;
|
||||
const uint8_t compressed_data[27] = {
|
||||
(uint8_t)'\x28',
|
||||
(uint8_t)'\xb5',
|
||||
(uint8_t)'\x2f',
|
||||
(uint8_t)'\xfd',
|
||||
(uint8_t)'\x00',
|
||||
(uint8_t)'\x32',
|
||||
(uint8_t)'\x91',
|
||||
(uint8_t)'\x00',
|
||||
(uint8_t)'\x00',
|
||||
(uint8_t)'\x00',
|
||||
(uint8_t)'\x5e',
|
||||
(uint8_t)'\x2a',
|
||||
(uint8_t)'\x4d',
|
||||
(uint8_t)'\x18',
|
||||
(uint8_t)'\x09',
|
||||
(uint8_t)'\x00',
|
||||
(uint8_t)'\x00',
|
||||
(uint8_t)'\x00',
|
||||
(uint8_t)'\x00',
|
||||
(uint8_t)'\x00',
|
||||
(uint8_t)'\x00',
|
||||
(uint8_t)'\x00',
|
||||
(uint8_t)'\x00',
|
||||
(uint8_t)'\xb1',
|
||||
(uint8_t)'\xea',
|
||||
(uint8_t)'\x92',
|
||||
(uint8_t)'\x8f',
|
||||
};
|
||||
const size_t uncompressed_size = 400;
|
||||
uint8_t uncompressed_data[400];
|
||||
|
||||
ZSTD_seekable* stream = ZSTD_seekable_create();
|
||||
size_t status = ZSTD_seekable_initBuff(stream, compressed_data, compressed_size);
|
||||
if (ZSTD_isError(status)) {
|
||||
ZSTD_seekable_free(stream);
|
||||
goto _test_error;
|
||||
}
|
||||
|
||||
const size_t offset = 2;
|
||||
/* Should return an error, but not hang */
|
||||
status = ZSTD_seekable_decompress(stream, uncompressed_data, uncompressed_size, offset);
|
||||
if (!ZSTD_isError(status)) {
|
||||
ZSTD_seekable_free(stream);
|
||||
goto _test_error;
|
||||
}
|
||||
|
||||
ZSTD_seekable_free(stream);
|
||||
}
|
||||
printf("Success!\n");
|
||||
|
||||
|
||||
printf("Test %u - check ZSTD magic in compressing empty string: ", testNb++);
|
||||
{ // compressing empty string should return a zstd header
|
||||
size_t const capacity = 255;
|
||||
char* inBuffer = malloc(capacity);
|
||||
assert(inBuffer != NULL);
|
||||
inBuffer[0] = '\0';
|
||||
void* const outBuffer = malloc(capacity);
|
||||
assert(outBuffer != NULL);
|
||||
|
||||
ZSTD_seekable_CStream *s = ZSTD_seekable_createCStream();
|
||||
ZSTD_seekable_initCStream(s, 1, 1, 255);
|
||||
|
||||
ZSTD_inBuffer input = { .src=inBuffer, .pos=0, .size=0 };
|
||||
ZSTD_outBuffer output = { .dst=outBuffer, .pos=0, .size=capacity };
|
||||
|
||||
ZSTD_seekable_compressStream(s, &output, &input);
|
||||
ZSTD_seekable_endStream(s, &output);
|
||||
|
||||
if((((char*)output.dst)[0] != '\x28') | (((char*)output.dst)[1] != '\xb5') | (((char*)output.dst)[2] != '\x2f') | (((char*)output.dst)[3] != '\xfd')) {
|
||||
printf("%#02x %#02x %#02x %#02x\n", ((char*)output.dst)[0], ((char*)output.dst)[1] , ((char*)output.dst)[2] , ((char*)output.dst)[3] );
|
||||
|
||||
free(inBuffer);
|
||||
free(outBuffer);
|
||||
ZSTD_seekable_freeCStream(s);
|
||||
goto _test_error;
|
||||
}
|
||||
|
||||
free(inBuffer);
|
||||
free(outBuffer);
|
||||
ZSTD_seekable_freeCStream(s);
|
||||
}
|
||||
printf("Success!\n");
|
||||
|
||||
|
||||
printf("Test %u - multiple decompress calls: ", testNb++);
|
||||
{ char const inBuffer[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt";
|
||||
size_t const inSize = sizeof(inBuffer);
|
||||
|
||||
size_t const seekCapacity = 5000;
|
||||
void* const seekBuffer = malloc(seekCapacity);
|
||||
assert(seekBuffer != NULL);
|
||||
size_t seekSize;
|
||||
|
||||
size_t const outCapacity = inSize;
|
||||
char* const outBuffer = malloc(outCapacity);
|
||||
assert(outBuffer != NULL);
|
||||
|
||||
ZSTD_seekable_CStream* const zscs = ZSTD_seekable_createCStream();
|
||||
assert(zscs != NULL);
|
||||
|
||||
/* compress test data with a small frame size to ensure multiple frames in the output */
|
||||
unsigned const maxFrameSize = 40;
|
||||
{ size_t const initStatus = ZSTD_seekable_initCStream(zscs, 9, 0 /* checksumFlag */, maxFrameSize);
|
||||
assert(!ZSTD_isError(initStatus));
|
||||
}
|
||||
|
||||
{ ZSTD_outBuffer outb = { .dst=seekBuffer, .pos=0, .size=seekCapacity };
|
||||
ZSTD_inBuffer inb = { .src=inBuffer, .pos=0, .size=inSize };
|
||||
|
||||
while (inb.pos < inb.size) {
|
||||
size_t const cStatus = ZSTD_seekable_compressStream(zscs, &outb, &inb);
|
||||
assert(!ZSTD_isError(cStatus));
|
||||
}
|
||||
|
||||
size_t const endStatus = ZSTD_seekable_endStream(zscs, &outb);
|
||||
assert(!ZSTD_isError(endStatus));
|
||||
seekSize = outb.pos;
|
||||
}
|
||||
|
||||
ZSTD_seekable* const stream = ZSTD_seekable_create();
|
||||
assert(stream != NULL);
|
||||
buffWrapperWithTotal_t buffWrapper = {seekBuffer, seekSize, 0, 0};
|
||||
{ ZSTD_seekable_customFile srcFile = {&buffWrapper, &readBuffWithTotal, &seekBuffWithTotal};
|
||||
size_t const initStatus = ZSTD_seekable_initAdvanced(stream, srcFile);
|
||||
assert(!ZSTD_isError(initStatus)); }
|
||||
|
||||
/* Perform a series of small reads and seeks (repeatedly read 1 byte and skip 1 byte)
|
||||
and check that we didn't reread input data unnecessarily */
|
||||
size_t pos;
|
||||
for (pos = 0; pos < inSize; pos += 2) {
|
||||
size_t const decStatus = ZSTD_seekable_decompress(stream, outBuffer, 1, pos);
|
||||
if (decStatus != 1 || outBuffer[0] != inBuffer[pos]) {
|
||||
goto _test_error;
|
||||
}
|
||||
}
|
||||
if (buffWrapper.totalRead > seekSize) {
|
||||
/* We read more than the compressed size, meaning there were some rereads.
|
||||
This is unneeded because we only seeked forward. */
|
||||
printf("Too much data read: %zu read, with compressed size %zu\n", buffWrapper.totalRead, seekSize);
|
||||
goto _test_error;
|
||||
}
|
||||
|
||||
/* Perform some reads and seeks to ensure correctness */
|
||||
struct {
|
||||
size_t offset;
|
||||
size_t size;
|
||||
} const tests[] = { /* Assume the frame size is 40 */
|
||||
{20, 40}, /* read partial data from two frames */
|
||||
{60, 10}, /* continue reading from the same offset */
|
||||
{50, 20}, /* seek backward within the same frame */
|
||||
{10, 10}, /* seek backward to a different frame */
|
||||
{25, 10}, /* seek forward within the same frame */
|
||||
{60, 10}, /* seek forward to a different frame */
|
||||
};
|
||||
size_t idx;
|
||||
for (idx = 0; idx < sizeof(tests) / sizeof(tests[0]); idx++) {
|
||||
size_t const decStatus = ZSTD_seekable_decompress(stream, outBuffer, tests[idx].size, tests[idx].offset);
|
||||
if (decStatus != tests[idx].size || memcmp(outBuffer, inBuffer + tests[idx].offset, tests[idx].size) != 0) {
|
||||
goto _test_error;
|
||||
}
|
||||
}
|
||||
|
||||
free(seekBuffer);
|
||||
free(outBuffer);
|
||||
ZSTD_seekable_freeCStream(zscs);
|
||||
ZSTD_seekable_free(stream);
|
||||
}
|
||||
printf("Success!\n");
|
||||
|
||||
/* TODO: Add more tests */
|
||||
printf("Finished tests\n");
|
||||
return 0;
|
||||
|
||||
_test_error:
|
||||
printf("test failed! Exiting..\n");
|
||||
return 1;
|
||||
}
|
@ -0,0 +1,226 @@
|
||||
#ifndef SEEKABLE_H
|
||||
#define SEEKABLE_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include "zstd.h" /* ZSTDLIB_API */
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#define ZSTD_seekTableFooterSize 9
|
||||
|
||||
#define ZSTD_SEEKABLE_MAGICNUMBER 0x8F92EAB1
|
||||
|
||||
#define ZSTD_SEEKABLE_MAXFRAMES 0x8000000U
|
||||
|
||||
/* Limit maximum size to avoid potential issues storing the compressed size */
|
||||
#define ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE 0x40000000U
|
||||
|
||||
/*-****************************************************************************
|
||||
* Seekable Format
|
||||
*
|
||||
* The seekable format splits the compressed data into a series of "frames",
|
||||
* each compressed individually so that decompression of a section in the
|
||||
* middle of an archive only requires zstd to decompress at most a frame's
|
||||
* worth of extra data, instead of the entire archive.
|
||||
******************************************************************************/
|
||||
|
||||
typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream;
|
||||
typedef struct ZSTD_seekable_s ZSTD_seekable;
|
||||
typedef struct ZSTD_seekTable_s ZSTD_seekTable;
|
||||
|
||||
/*-****************************************************************************
|
||||
* Seekable compression - HowTo
|
||||
* A ZSTD_seekable_CStream object is required to tracking streaming operation.
|
||||
* Use ZSTD_seekable_createCStream() and ZSTD_seekable_freeCStream() to create/
|
||||
* release resources.
|
||||
*
|
||||
* Streaming objects are reusable to avoid allocation and deallocation,
|
||||
* to start a new compression operation call ZSTD_seekable_initCStream() on the
|
||||
* compressor.
|
||||
*
|
||||
* Data streamed to the seekable compressor will automatically be split into
|
||||
* frames of size `maxFrameSize` (provided in ZSTD_seekable_initCStream()),
|
||||
* or if none is provided, will be cut off whenever ZSTD_seekable_endFrame() is
|
||||
* called or when the default maximum frame size (2GB) is reached.
|
||||
*
|
||||
* Use ZSTD_seekable_initCStream() to initialize a ZSTD_seekable_CStream object
|
||||
* for a new compression operation.
|
||||
* - `maxFrameSize` indicates the size at which to automatically start a new
|
||||
* seekable frame.
|
||||
* `maxFrameSize == 0` implies the default maximum size.
|
||||
* Smaller frame sizes allow faster decompression of small segments,
|
||||
* since retrieving a single byte requires decompression of
|
||||
* the full frame where the byte belongs.
|
||||
* In general, size the frames to roughly correspond to
|
||||
* the access granularity (when it's known).
|
||||
* But small sizes also reduce compression ratio.
|
||||
* Avoid really tiny frame sizes (< 1 KB),
|
||||
* that would hurt compression ratio considerably.
|
||||
* - `checksumFlag` indicates whether or not the seek table should include frame
|
||||
* checksums on the uncompressed data for verification.
|
||||
* @return : a size hint for input to provide for compression, or an error code
|
||||
* checkable with ZSTD_isError()
|
||||
*
|
||||
* Use ZSTD_seekable_compressStream() repetitively to consume input stream.
|
||||
* The function will automatically update both `pos` fields.
|
||||
* Note that it may not consume the entire input, in which case `pos < size`,
|
||||
* and it's up to the caller to present again remaining data.
|
||||
* @return : a size hint, preferred nb of bytes to use as input for next
|
||||
* function call or an error code, which can be tested using
|
||||
* ZSTD_isError().
|
||||
* Note 1 : it's just a hint, to help latency a little, any other
|
||||
* value will work fine.
|
||||
*
|
||||
* At any time, call ZSTD_seekable_endFrame() to end the current frame and
|
||||
* start a new one.
|
||||
*
|
||||
* ZSTD_seekable_endStream() will end the current frame, and then write the seek
|
||||
* table so that decompressors can efficiently find compressed frames.
|
||||
* ZSTD_seekable_endStream() may return a number > 0 if it was unable to flush
|
||||
* all the necessary data to `output`. In this case, it should be called again
|
||||
* until all remaining data is flushed out and 0 is returned.
|
||||
******************************************************************************/
|
||||
|
||||
/*===== Seekable compressor management =====*/
|
||||
ZSTDLIB_API ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs);
|
||||
|
||||
/*===== Seekable compression functions =====*/
|
||||
ZSTDLIB_API size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs, int compressionLevel, int checksumFlag, unsigned maxFrameSize);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output);
|
||||
|
||||
/*= Raw seek table API
|
||||
* These functions allow for the seek table to be constructed directly.
|
||||
* This table can then be appended to a file of concatenated frames.
|
||||
* This allows the frames to be compressed independently, even in parallel,
|
||||
* and compiled together afterward into a seekable archive.
|
||||
*
|
||||
* Use ZSTD_seekable_createFrameLog() to allocate and initialize a tracking
|
||||
* structure.
|
||||
*
|
||||
* Call ZSTD_seekable_logFrame() once for each frame in the archive.
|
||||
* checksum is optional, and will not be used if checksumFlag was 0 when the
|
||||
* frame log was created. If present, it should be the least significant 32
|
||||
* bits of the XXH64 hash of the uncompressed data.
|
||||
*
|
||||
* Call ZSTD_seekable_writeSeekTable to serialize the data into a seek table.
|
||||
* If the entire table was written, the return value will be 0. Otherwise,
|
||||
* it will be equal to the number of bytes left to write. */
|
||||
typedef struct ZSTD_frameLog_s ZSTD_frameLog;
|
||||
ZSTDLIB_API ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl, unsigned compressedSize, unsigned decompressedSize, unsigned checksum);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output);
|
||||
|
||||
|
||||
/*-****************************************************************************
|
||||
* Seekable decompression - HowTo
|
||||
* A ZSTD_seekable object is required to tracking the seekTable.
|
||||
*
|
||||
* Call ZSTD_seekable_init* to initialize a ZSTD_seekable object with the
|
||||
* the seek table provided in the input.
|
||||
* There are three modes for ZSTD_seekable_init:
|
||||
* - ZSTD_seekable_initBuff() : An in-memory API. The data contained in
|
||||
* `src` should be the entire seekable file, including the seek table.
|
||||
* `src` should be kept alive and unmodified until the ZSTD_seekable object
|
||||
* is freed or reset.
|
||||
* - ZSTD_seekable_initFile() : A simplified file API using stdio. fread and
|
||||
* fseek will be used to access the required data for building the seek
|
||||
* table and doing decompression operations. `src` should not be closed
|
||||
* or modified until the ZSTD_seekable object is freed or reset.
|
||||
* - ZSTD_seekable_initAdvanced() : A general API allowing the client to
|
||||
* provide its own read and seek callbacks.
|
||||
* + ZSTD_seekable_read() : read exactly `n` bytes into `buffer`.
|
||||
* Premature EOF should be treated as an error.
|
||||
* + ZSTD_seekable_seek() : seek the read head to `offset` from `origin`,
|
||||
* where origin is either SEEK_SET (beginning of
|
||||
* file), or SEEK_END (end of file).
|
||||
* Both functions should return a non-negative value in case of success, and a
|
||||
* negative value in case of failure. If implementing using this API and
|
||||
* stdio, be careful with files larger than 4GB and fseek. All of these
|
||||
* functions return an error code checkable with ZSTD_isError().
|
||||
*
|
||||
* Call ZSTD_seekable_decompress to decompress `dstSize` bytes at decompressed
|
||||
* offset `offset`. ZSTD_seekable_decompress may have to decompress the entire
|
||||
* prefix of the frame before the desired data if it has not already processed
|
||||
* this section. If ZSTD_seekable_decompress is called multiple times for a
|
||||
* consecutive range of data, it will efficiently retain the decompressor object
|
||||
* and avoid redecompressing frame prefixes. The return value is the number of
|
||||
* bytes decompressed, or an error code checkable with ZSTD_isError().
|
||||
*
|
||||
* The seek table access functions can be used to obtain the data contained
|
||||
* in the seek table. If frameIndex is larger than the value returned by
|
||||
* ZSTD_seekable_getNumFrames(), they will return error codes checkable with
|
||||
* ZSTD_isError(). Note that since the offset access functions return
|
||||
* unsigned long long instead of size_t, in this case they will instead return
|
||||
* the value ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE.
|
||||
******************************************************************************/
|
||||
|
||||
/*===== Seekable decompressor management =====*/
|
||||
ZSTDLIB_API ZSTD_seekable* ZSTD_seekable_create(void);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_free(ZSTD_seekable* zs);
|
||||
|
||||
/*===== Seekable decompression functions =====*/
|
||||
ZSTDLIB_API size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned long long offset);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex);
|
||||
|
||||
#define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2)
|
||||
/*===== Seekable seek table access functions =====*/
|
||||
ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs);
|
||||
ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex);
|
||||
ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex);
|
||||
ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex);
|
||||
ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long offset);
|
||||
|
||||
|
||||
/*-****************************************************************************
|
||||
* Direct exploitation of the seekTable
|
||||
*
|
||||
* Memory constrained use cases that manage multiple archives
|
||||
* benefit from retaining multiple archive seek tables
|
||||
* without retaining a ZSTD_seekable instance for each.
|
||||
*
|
||||
* Below API allow the above-mentioned use cases
|
||||
* to initialize a ZSTD_seekable, extract its (smaller) ZSTD_seekTable,
|
||||
* then throw the ZSTD_seekable away to save memory.
|
||||
*
|
||||
* Standard ZSTD operations can then be used
|
||||
* to decompress frames based on seek table offsets.
|
||||
******************************************************************************/
|
||||
|
||||
/*===== Independent seek table management =====*/
|
||||
ZSTDLIB_API ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs);
|
||||
ZSTDLIB_API size_t ZSTD_seekTable_free(ZSTD_seekTable* st);
|
||||
|
||||
/*===== Direct seek table access functions =====*/
|
||||
ZSTDLIB_API unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st);
|
||||
ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex);
|
||||
ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex);
|
||||
ZSTDLIB_API size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex);
|
||||
ZSTDLIB_API size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex);
|
||||
ZSTDLIB_API unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long offset);
|
||||
|
||||
|
||||
/*===== Seekable advanced I/O API =====*/
|
||||
typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n);
|
||||
typedef int(ZSTD_seekable_seek)(void* opaque, long long offset, int origin);
|
||||
typedef struct {
|
||||
void* opaque;
|
||||
ZSTD_seekable_read* read;
|
||||
ZSTD_seekable_seek* seek;
|
||||
} ZSTD_seekable_customFile;
|
||||
ZSTDLIB_API size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@ -0,0 +1,116 @@
|
||||
# Zstandard Seekable Format
|
||||
|
||||
### Notices
|
||||
|
||||
Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
|
||||
Permission is granted to copy and distribute this document
|
||||
for any purpose and without charge,
|
||||
including translations into other languages
|
||||
and incorporation into compilations,
|
||||
provided that the copyright notice and this notice are preserved,
|
||||
and that any substantive changes or deletions from the original
|
||||
are clearly marked.
|
||||
Distribution of this document is unlimited.
|
||||
|
||||
### Version
|
||||
0.1.0 (11/04/17)
|
||||
|
||||
## Introduction
|
||||
This document defines a format for compressed data to be stored so that subranges of the data can be efficiently decompressed without requiring the entire document to be decompressed.
|
||||
This is done by splitting up the input data into frames,
|
||||
each of which are compressed independently,
|
||||
and so can be decompressed independently.
|
||||
Decompression then takes advantage of a provided 'seek table', which allows the decompressor to immediately jump to the desired data. This is done in a way that is compatible with the original Zstandard format by placing the seek table in a Zstandard skippable frame.
|
||||
|
||||
### Overall conventions
|
||||
In this document:
|
||||
- square brackets i.e. `[` and `]` are used to indicate optional fields or parameters.
|
||||
- the naming convention for identifiers is `Mixed_Case_With_Underscores`
|
||||
- All numeric fields are little-endian unless specified otherwise
|
||||
|
||||
## Format
|
||||
|
||||
The format consists of a number of frames (Zstandard compressed frames and skippable frames), followed by a final skippable frame at the end containing the seek table.
|
||||
|
||||
### Seek Table Format
|
||||
The structure of the seek table frame is as follows:
|
||||
|
||||
|`Skippable_Magic_Number`|`Frame_Size`|`[Seek_Table_Entries]`|`Seek_Table_Footer`|
|
||||
|------------------------|------------|----------------------|-------------------|
|
||||
| 4 bytes | 4 bytes | 8-12 bytes each | 9 bytes |
|
||||
|
||||
__`Skippable_Magic_Number`__
|
||||
|
||||
Value : 0x184D2A5E.
|
||||
This is for compatibility with [Zstandard skippable frames].
|
||||
Since it is legal for other Zstandard skippable frames to use the same
|
||||
magic number, it is not recommended for a decoder to recognize frames
|
||||
solely on this.
|
||||
|
||||
__`Frame_Size`__
|
||||
|
||||
The total size of the skippable frame, not including the `Skippable_Magic_Number` or `Frame_Size`.
|
||||
This is for compatibility with [Zstandard skippable frames].
|
||||
|
||||
[Zstandard skippable frames]: https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#skippable-frames
|
||||
|
||||
#### `Seek_Table_Footer`
|
||||
The seek table footer format is as follows:
|
||||
|
||||
|`Number_Of_Frames`|`Seek_Table_Descriptor`|`Seekable_Magic_Number`|
|
||||
|------------------|-----------------------|-----------------------|
|
||||
| 4 bytes | 1 byte | 4 bytes |
|
||||
|
||||
__`Seekable_Magic_Number`__
|
||||
|
||||
Value : 0x8F92EAB1.
|
||||
This value must be the last bytes present in the compressed file so that decoders
|
||||
can efficiently find it and determine if there is an actual seek table present.
|
||||
|
||||
__`Number_Of_Frames`__
|
||||
|
||||
The number of stored frames in the data.
|
||||
|
||||
__`Seek_Table_Descriptor`__
|
||||
|
||||
A bitfield describing the format of the seek table.
|
||||
|
||||
| Bit number | Field name |
|
||||
| ---------- | ---------- |
|
||||
| 7 | `Checksum_Flag` |
|
||||
| 6-2 | `Reserved_Bits` |
|
||||
| 1-0 | `Unused_Bits` |
|
||||
|
||||
While only `Checksum_Flag` currently exists, there are 7 other bits in this field that can be used for future changes to the format,
|
||||
for example the addition of inline dictionaries.
|
||||
|
||||
__`Checksum_Flag`__
|
||||
|
||||
If the checksum flag is set, each of the seek table entries contains a 4 byte checksum of the uncompressed data contained in its frame.
|
||||
|
||||
`Reserved_Bits` are not currently used but may be used in the future for breaking changes, so a compliant decoder should ensure they are set to 0. `Unused_Bits` may be used in the future for non-breaking changes, so a compliant decoder should not interpret these bits.
|
||||
|
||||
#### __`Seek_Table_Entries`__
|
||||
|
||||
`Seek_Table_Entries` consists of `Number_Of_Frames` (one for each frame in the data, not including the seek table frame) entries of the following form, in sequence:
|
||||
|
||||
|`Compressed_Size`|`Decompressed_Size`|`[Checksum]`|
|
||||
|-----------------|-------------------|------------|
|
||||
| 4 bytes | 4 bytes | 4 bytes |
|
||||
|
||||
__`Compressed_Size`__
|
||||
|
||||
The compressed size of the frame.
|
||||
The cumulative sum of the `Compressed_Size` fields of frames `0` to `i` gives the offset in the compressed file of frame `i+1`.
|
||||
|
||||
__`Decompressed_Size`__
|
||||
|
||||
The size of the decompressed data contained in the frame. For skippable or otherwise empty frames, this value is 0.
|
||||
|
||||
__`Checksum`__
|
||||
|
||||
Only present if `Checksum_Flag` is set in the `Seek_Table_Descriptor`. Value : the least significant 32 bits of the XXH64 digest of the uncompressed data, stored in little-endian format.
|
||||
|
||||
## Version Changes
|
||||
- 0.1.0: initial version
|
@ -0,0 +1,365 @@
|
||||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
*/
|
||||
|
||||
#include <stdlib.h> /* malloc, free */
|
||||
#include <limits.h> /* UINT_MAX */
|
||||
#include <assert.h>
|
||||
|
||||
#define XXH_STATIC_LINKING_ONLY
|
||||
#include "xxhash.h"
|
||||
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include "zstd.h"
|
||||
#include "zstd_errors.h"
|
||||
#include "mem.h"
|
||||
|
||||
#include "zstd_seekable.h"
|
||||
|
||||
#define CHECK_Z(f) { size_t const ret = (f); if (ret != 0) return ret; }
|
||||
|
||||
#undef ERROR
|
||||
#define ERROR(name) ((size_t)-ZSTD_error_##name)
|
||||
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
typedef struct {
|
||||
U32 cSize;
|
||||
U32 dSize;
|
||||
U32 checksum;
|
||||
} framelogEntry_t;
|
||||
|
||||
struct ZSTD_frameLog_s {
|
||||
framelogEntry_t* entries;
|
||||
U32 size;
|
||||
U32 capacity;
|
||||
|
||||
int checksumFlag;
|
||||
|
||||
/* for use when streaming out the seek table */
|
||||
U32 seekTablePos;
|
||||
U32 seekTableIndex;
|
||||
} framelog_t;
|
||||
|
||||
struct ZSTD_seekable_CStream_s {
|
||||
ZSTD_CStream* cstream;
|
||||
ZSTD_frameLog framelog;
|
||||
|
||||
U32 frameCSize;
|
||||
U32 frameDSize;
|
||||
|
||||
XXH64_state_t xxhState;
|
||||
|
||||
U32 maxFrameSize;
|
||||
|
||||
int writingSeekTable;
|
||||
};
|
||||
|
||||
static size_t ZSTD_seekable_frameLog_allocVec(ZSTD_frameLog* fl)
|
||||
{
|
||||
/* allocate some initial space */
|
||||
size_t const FRAMELOG_STARTING_CAPACITY = 16;
|
||||
fl->entries = (framelogEntry_t*)malloc(
|
||||
sizeof(framelogEntry_t) * FRAMELOG_STARTING_CAPACITY);
|
||||
if (fl->entries == NULL) return ERROR(memory_allocation);
|
||||
fl->capacity = (U32)FRAMELOG_STARTING_CAPACITY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t ZSTD_seekable_frameLog_freeVec(ZSTD_frameLog* fl)
|
||||
{
|
||||
if (fl != NULL) free(fl->entries);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag)
|
||||
{
|
||||
ZSTD_frameLog* const fl = (ZSTD_frameLog*)malloc(sizeof(ZSTD_frameLog));
|
||||
if (fl == NULL) return NULL;
|
||||
|
||||
if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(fl))) {
|
||||
free(fl);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fl->checksumFlag = checksumFlag;
|
||||
fl->seekTablePos = 0;
|
||||
fl->seekTableIndex = 0;
|
||||
fl->size = 0;
|
||||
|
||||
return fl;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl)
|
||||
{
|
||||
ZSTD_seekable_frameLog_freeVec(fl);
|
||||
free(fl);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void)
|
||||
{
|
||||
ZSTD_seekable_CStream* const zcs = (ZSTD_seekable_CStream*)malloc(sizeof(ZSTD_seekable_CStream));
|
||||
if (zcs == NULL) return NULL;
|
||||
|
||||
memset(zcs, 0, sizeof(*zcs));
|
||||
|
||||
zcs->cstream = ZSTD_createCStream();
|
||||
if (zcs->cstream == NULL) goto failed1;
|
||||
|
||||
if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(&zcs->framelog))) goto failed2;
|
||||
|
||||
return zcs;
|
||||
|
||||
failed2:
|
||||
ZSTD_freeCStream(zcs->cstream);
|
||||
failed1:
|
||||
free(zcs);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs)
|
||||
{
|
||||
if (zcs == NULL) return 0; /* support free on null */
|
||||
ZSTD_freeCStream(zcs->cstream);
|
||||
ZSTD_seekable_frameLog_freeVec(&zcs->framelog);
|
||||
free(zcs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs,
|
||||
int compressionLevel,
|
||||
int checksumFlag,
|
||||
unsigned maxFrameSize)
|
||||
{
|
||||
zcs->framelog.size = 0;
|
||||
zcs->frameCSize = 0;
|
||||
zcs->frameDSize = 0;
|
||||
|
||||
/* make sure maxFrameSize has a reasonable value */
|
||||
if (maxFrameSize > ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE) {
|
||||
return ERROR(frameParameter_unsupported);
|
||||
}
|
||||
|
||||
zcs->maxFrameSize = maxFrameSize ?
|
||||
maxFrameSize : ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE;
|
||||
|
||||
zcs->framelog.checksumFlag = checksumFlag;
|
||||
if (zcs->framelog.checksumFlag) {
|
||||
XXH64_reset(&zcs->xxhState, 0);
|
||||
}
|
||||
|
||||
zcs->framelog.seekTablePos = 0;
|
||||
zcs->framelog.seekTableIndex = 0;
|
||||
zcs->writingSeekTable = 0;
|
||||
|
||||
return ZSTD_initCStream(zcs->cstream, compressionLevel);
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl,
|
||||
unsigned compressedSize,
|
||||
unsigned decompressedSize,
|
||||
unsigned checksum)
|
||||
{
|
||||
if (fl->size == ZSTD_SEEKABLE_MAXFRAMES)
|
||||
return ERROR(frameIndex_tooLarge);
|
||||
|
||||
/* grow the buffer if required */
|
||||
if (fl->size == fl->capacity) {
|
||||
/* exponential size increase for constant amortized runtime */
|
||||
size_t const newCapacity = fl->capacity * 2;
|
||||
framelogEntry_t* const newEntries = (framelogEntry_t*)realloc(fl->entries,
|
||||
sizeof(framelogEntry_t) * newCapacity);
|
||||
|
||||
if (newEntries == NULL) return ERROR(memory_allocation);
|
||||
|
||||
fl->entries = newEntries;
|
||||
assert(newCapacity <= UINT_MAX);
|
||||
fl->capacity = (U32)newCapacity;
|
||||
}
|
||||
|
||||
fl->entries[fl->size] = (framelogEntry_t){
|
||||
compressedSize, decompressedSize, checksum
|
||||
};
|
||||
fl->size++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output)
|
||||
{
|
||||
size_t const prevOutPos = output->pos;
|
||||
/* end the frame */
|
||||
size_t ret = ZSTD_endStream(zcs->cstream, output);
|
||||
|
||||
zcs->frameCSize += (U32)(output->pos - prevOutPos);
|
||||
|
||||
/* need to flush before doing the rest */
|
||||
if (ret) return ret;
|
||||
|
||||
/* frame done */
|
||||
|
||||
/* store the frame data for later */
|
||||
ret = ZSTD_seekable_logFrame(
|
||||
&zcs->framelog, zcs->frameCSize, zcs->frameDSize,
|
||||
zcs->framelog.checksumFlag
|
||||
? XXH64_digest(&zcs->xxhState) & 0xFFFFFFFFU
|
||||
: 0);
|
||||
if (ret) return ret;
|
||||
|
||||
/* reset for the next frame */
|
||||
zcs->frameCSize = 0;
|
||||
zcs->frameDSize = 0;
|
||||
|
||||
ZSTD_CCtx_reset(zcs->cstream, ZSTD_reset_session_only);
|
||||
if (zcs->framelog.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
||||
{
|
||||
const BYTE* const inBase = (const BYTE*) input->src + input->pos;
|
||||
size_t inLen = input->size - input->pos;
|
||||
|
||||
assert(zcs->maxFrameSize < INT_MAX);
|
||||
ZSTD_CCtx_setParameter(zcs->cstream, ZSTD_c_srcSizeHint, (int)zcs->maxFrameSize);
|
||||
inLen = MIN(inLen, (size_t)(zcs->maxFrameSize - zcs->frameDSize));
|
||||
|
||||
/* if we haven't finished flushing the last frame, don't start writing a new one */
|
||||
if (inLen > 0) {
|
||||
ZSTD_inBuffer inTmp = { inBase, inLen, 0 };
|
||||
size_t const prevOutPos = output->pos;
|
||||
|
||||
size_t const ret = ZSTD_compressStream(zcs->cstream, output, &inTmp);
|
||||
|
||||
if (zcs->framelog.checksumFlag) {
|
||||
XXH64_update(&zcs->xxhState, inBase, inTmp.pos);
|
||||
}
|
||||
|
||||
zcs->frameCSize += (U32)(output->pos - prevOutPos);
|
||||
zcs->frameDSize += (U32)inTmp.pos;
|
||||
|
||||
input->pos += inTmp.pos;
|
||||
|
||||
if (ZSTD_isError(ret)) return ret;
|
||||
}
|
||||
|
||||
if (zcs->maxFrameSize == zcs->frameDSize) {
|
||||
/* log the frame and start over */
|
||||
size_t const ret = ZSTD_seekable_endFrame(zcs, output);
|
||||
if (ZSTD_isError(ret)) return ret;
|
||||
|
||||
/* get the client ready for the next frame */
|
||||
return (size_t)zcs->maxFrameSize;
|
||||
}
|
||||
|
||||
return (size_t)(zcs->maxFrameSize - zcs->frameDSize);
|
||||
}
|
||||
|
||||
static inline size_t ZSTD_seekable_seekTableSize(const ZSTD_frameLog* fl)
|
||||
{
|
||||
size_t const sizePerFrame = 8 + (fl->checksumFlag?4:0);
|
||||
size_t const seekTableLen = ZSTD_SKIPPABLEHEADERSIZE +
|
||||
sizePerFrame * fl->size +
|
||||
ZSTD_seekTableFooterSize;
|
||||
|
||||
return seekTableLen;
|
||||
}
|
||||
|
||||
static inline size_t ZSTD_stwrite32(ZSTD_frameLog* fl,
|
||||
ZSTD_outBuffer* output, U32 const value,
|
||||
U32 const offset)
|
||||
{
|
||||
if (fl->seekTablePos < offset + 4) {
|
||||
BYTE tmp[4]; /* so that we can work with buffers too small to write a whole word to */
|
||||
size_t const lenWrite =
|
||||
MIN(output->size - output->pos, offset + 4 - fl->seekTablePos);
|
||||
MEM_writeLE32(tmp, value);
|
||||
memcpy((BYTE*)output->dst + output->pos,
|
||||
tmp + (fl->seekTablePos - offset), lenWrite);
|
||||
output->pos += lenWrite;
|
||||
fl->seekTablePos += (U32)lenWrite;
|
||||
|
||||
if (lenWrite < 4) return ZSTD_seekable_seekTableSize(fl) - fl->seekTablePos;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output)
|
||||
{
|
||||
/* seekTableIndex: the current index in the table and
|
||||
* seekTableSize: the amount of the table written so far
|
||||
*
|
||||
* This function is written this way so that if it has to return early
|
||||
* because of a small buffer, it can keep going where it left off.
|
||||
*/
|
||||
|
||||
size_t const sizePerFrame = 8 + (fl->checksumFlag?4:0);
|
||||
size_t const seekTableLen = ZSTD_seekable_seekTableSize(fl);
|
||||
|
||||
CHECK_Z(ZSTD_stwrite32(fl, output, ZSTD_MAGIC_SKIPPABLE_START | 0xE, 0));
|
||||
assert(seekTableLen <= (size_t)UINT_MAX);
|
||||
CHECK_Z(ZSTD_stwrite32(fl, output, (U32)seekTableLen - ZSTD_SKIPPABLEHEADERSIZE, 4));
|
||||
|
||||
while (fl->seekTableIndex < fl->size) {
|
||||
unsigned long long const start = ZSTD_SKIPPABLEHEADERSIZE + sizePerFrame * fl->seekTableIndex;
|
||||
assert(start + 8 <= UINT_MAX);
|
||||
CHECK_Z(ZSTD_stwrite32(fl, output,
|
||||
fl->entries[fl->seekTableIndex].cSize,
|
||||
(U32)start + 0));
|
||||
|
||||
CHECK_Z(ZSTD_stwrite32(fl, output,
|
||||
fl->entries[fl->seekTableIndex].dSize,
|
||||
(U32)start + 4));
|
||||
|
||||
if (fl->checksumFlag) {
|
||||
CHECK_Z(ZSTD_stwrite32(
|
||||
fl, output, fl->entries[fl->seekTableIndex].checksum,
|
||||
(U32)start + 8));
|
||||
}
|
||||
|
||||
fl->seekTableIndex++;
|
||||
}
|
||||
|
||||
assert(seekTableLen <= UINT_MAX);
|
||||
CHECK_Z(ZSTD_stwrite32(fl, output, fl->size,
|
||||
(U32)seekTableLen - ZSTD_seekTableFooterSize));
|
||||
|
||||
if (output->size - output->pos < 1) return seekTableLen - fl->seekTablePos;
|
||||
if (fl->seekTablePos < seekTableLen - 4) {
|
||||
BYTE const sfd = (BYTE)((fl->checksumFlag) << 7);
|
||||
|
||||
((BYTE*)output->dst)[output->pos] = sfd;
|
||||
output->pos++;
|
||||
fl->seekTablePos++;
|
||||
}
|
||||
|
||||
CHECK_Z(ZSTD_stwrite32(fl, output, ZSTD_SEEKABLE_MAGICNUMBER,
|
||||
(U32)seekTableLen - 4));
|
||||
|
||||
if (fl->seekTablePos != seekTableLen) return ERROR(GENERIC);
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output)
|
||||
{
|
||||
if (!zcs->writingSeekTable) {
|
||||
const size_t endFrame = ZSTD_seekable_endFrame(zcs, output);
|
||||
if (ZSTD_isError(endFrame)) return endFrame;
|
||||
/* return an accurate size hint */
|
||||
if (endFrame) return endFrame + ZSTD_seekable_seekTableSize(&zcs->framelog);
|
||||
}
|
||||
|
||||
zcs->writingSeekTable = 1;
|
||||
|
||||
return ZSTD_seekable_writeSeekTable(&zcs->framelog, output);
|
||||
}
|
@ -0,0 +1,600 @@
|
||||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/* *********************************************************
|
||||
* Turn on Large Files support (>4GB) for 32-bit Linux/Unix
|
||||
***********************************************************/
|
||||
#if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */
|
||||
# if !defined(_FILE_OFFSET_BITS)
|
||||
# define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */
|
||||
# endif
|
||||
# if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */
|
||||
# define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */
|
||||
# endif
|
||||
# if defined(_AIX) || defined(__hpux)
|
||||
# define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* ************************************************************
|
||||
* Detect POSIX version
|
||||
* PLATFORM_POSIX_VERSION = 0 for non-Unix e.g. Windows
|
||||
* PLATFORM_POSIX_VERSION = 1 for Unix-like but non-POSIX
|
||||
* PLATFORM_POSIX_VERSION > 1 is equal to found _POSIX_VERSION
|
||||
* Value of PLATFORM_POSIX_VERSION can be forced on command line
|
||||
***************************************************************/
|
||||
#ifndef PLATFORM_POSIX_VERSION
|
||||
|
||||
# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \
|
||||
|| defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */
|
||||
/* exception rule : force posix version to 200112L,
|
||||
* note: it's better to use unistd.h's _POSIX_VERSION whenever possible */
|
||||
# define PLATFORM_POSIX_VERSION 200112L
|
||||
|
||||
/* try to determine posix version through official unistd.h's _POSIX_VERSION (https://pubs.opengroup.org/onlinepubs/7908799/xsh/unistd.h.html).
|
||||
* note : there is no simple way to know in advance if <unistd.h> is present or not on target system,
|
||||
* Posix specification mandates its presence and its content, but target system must respect this spec.
|
||||
* It's necessary to _not_ #include <unistd.h> whenever target OS is not unix-like
|
||||
* otherwise it will block preprocessing stage.
|
||||
* The following list of build macros tries to "guess" if target OS is likely unix-like, and therefore can #include <unistd.h>
|
||||
*/
|
||||
# elif !defined(_WIN32) \
|
||||
&& ( defined(__unix__) || defined(__unix) \
|
||||
|| defined(__midipix__) || defined(__VMS) || defined(__HAIKU__) )
|
||||
|
||||
# if defined(__linux__) || defined(__linux) || defined(__CYGWIN__)
|
||||
# ifndef _POSIX_C_SOURCE
|
||||
# define _POSIX_C_SOURCE 200809L /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */
|
||||
# endif
|
||||
# endif
|
||||
# include <unistd.h> /* declares _POSIX_VERSION */
|
||||
# if defined(_POSIX_VERSION) /* POSIX compliant */
|
||||
# define PLATFORM_POSIX_VERSION _POSIX_VERSION
|
||||
# else
|
||||
# define PLATFORM_POSIX_VERSION 1
|
||||
# endif
|
||||
|
||||
# ifdef __UCLIBC__
|
||||
# ifndef __USE_MISC
|
||||
# define __USE_MISC /* enable st_mtim on uclibc */
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# else /* non-unix target platform (like Windows) */
|
||||
# define PLATFORM_POSIX_VERSION 0
|
||||
# endif
|
||||
|
||||
#endif /* PLATFORM_POSIX_VERSION */
|
||||
|
||||
|
||||
/* ************************************************************
|
||||
* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
|
||||
***************************************************************/
|
||||
#if defined(LIBC_NO_FSEEKO)
|
||||
/* Some older libc implementations don't include these functions (e.g. Bionic < 24) */
|
||||
# define LONG_SEEK fseek
|
||||
#elif defined(_MSC_VER) && _MSC_VER >= 1400
|
||||
# define LONG_SEEK _fseeki64
|
||||
#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
|
||||
# define LONG_SEEK fseeko
|
||||
#elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
|
||||
# define LONG_SEEK fseeko64
|
||||
#elif defined(_WIN32) && !defined(__DJGPP__)
|
||||
# include <windows.h>
|
||||
static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
|
||||
LARGE_INTEGER off;
|
||||
DWORD method;
|
||||
off.QuadPart = offset;
|
||||
if (origin == SEEK_END)
|
||||
method = FILE_END;
|
||||
else if (origin == SEEK_CUR)
|
||||
method = FILE_CURRENT;
|
||||
else
|
||||
method = FILE_BEGIN;
|
||||
|
||||
if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
|
||||
return 0;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
# define LONG_SEEK fseek
|
||||
#endif
|
||||
|
||||
#include <stdlib.h> /* malloc, free */
|
||||
#include <stdio.h> /* FILE* */
|
||||
#include <limits.h> /* UNIT_MAX */
|
||||
#include <assert.h>
|
||||
|
||||
#define XXH_STATIC_LINKING_ONLY
|
||||
#include "xxhash.h"
|
||||
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include "zstd.h"
|
||||
#include "zstd_errors.h"
|
||||
#include "mem.h"
|
||||
#include "zstd_seekable.h"
|
||||
|
||||
#undef ERROR
|
||||
#define ERROR(name) ((size_t)-ZSTD_error_##name)
|
||||
|
||||
#define CHECK_IO(f) { int const errcod = (f); if (errcod < 0) return ERROR(seekableIO); }
|
||||
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define ZSTD_SEEKABLE_NO_OUTPUT_PROGRESS_MAX 16
|
||||
|
||||
/* Special-case callbacks for FILE* and in-memory modes, so that we can treat
|
||||
* them the same way as the advanced API */
|
||||
static int ZSTD_seekable_read_FILE(void* opaque, void* buffer, size_t n)
|
||||
{
|
||||
size_t const result = fread(buffer, 1, n, (FILE*)opaque);
|
||||
if (result != n) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ZSTD_seekable_seek_FILE(void* opaque, long long offset, int origin)
|
||||
{
|
||||
int const ret = LONG_SEEK((FILE*)opaque, offset, origin);
|
||||
if (ret) return ret;
|
||||
return fflush((FILE*)opaque);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
const void *ptr;
|
||||
size_t size;
|
||||
size_t pos;
|
||||
} buffWrapper_t;
|
||||
|
||||
static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n)
|
||||
{
|
||||
buffWrapper_t* const buff = (buffWrapper_t*)opaque;
|
||||
assert(buff != NULL);
|
||||
if (buff->pos + n > buff->size) return -1;
|
||||
memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n);
|
||||
buff->pos += n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin)
|
||||
{
|
||||
buffWrapper_t* const buff = (buffWrapper_t*) opaque;
|
||||
unsigned long long newOffset;
|
||||
assert(buff != NULL);
|
||||
switch (origin) {
|
||||
case SEEK_SET:
|
||||
assert(offset >= 0);
|
||||
newOffset = (unsigned long long)offset;
|
||||
break;
|
||||
case SEEK_CUR:
|
||||
newOffset = (unsigned long long)((long long)buff->pos + offset);
|
||||
break;
|
||||
case SEEK_END:
|
||||
newOffset = (unsigned long long)((long long)buff->size + offset);
|
||||
break;
|
||||
default:
|
||||
assert(0); /* not possible */
|
||||
}
|
||||
if (newOffset > buff->size) {
|
||||
return -1;
|
||||
}
|
||||
buff->pos = newOffset;
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
U64 cOffset;
|
||||
U64 dOffset;
|
||||
U32 checksum;
|
||||
} seekEntry_t;
|
||||
|
||||
struct ZSTD_seekTable_s {
|
||||
seekEntry_t* entries;
|
||||
size_t tableLen;
|
||||
|
||||
int checksumFlag;
|
||||
};
|
||||
|
||||
#define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_MAX
|
||||
|
||||
struct ZSTD_seekable_s {
|
||||
ZSTD_DStream* dstream;
|
||||
ZSTD_seekTable seekTable;
|
||||
ZSTD_seekable_customFile src;
|
||||
|
||||
U64 decompressedOffset;
|
||||
U32 curFrame;
|
||||
|
||||
BYTE inBuff[SEEKABLE_BUFF_SIZE]; /* need to do our own input buffering */
|
||||
BYTE outBuff[SEEKABLE_BUFF_SIZE]; /* so we can efficiently decompress the
|
||||
starts of chunks before we get to the
|
||||
desired section */
|
||||
ZSTD_inBuffer in; /* maintain continuity across ZSTD_seekable_decompress operations */
|
||||
buffWrapper_t buffWrapper; /* for `src.opaque` in in-memory mode */
|
||||
|
||||
XXH64_state_t xxhState;
|
||||
};
|
||||
|
||||
ZSTD_seekable* ZSTD_seekable_create(void)
|
||||
{
|
||||
ZSTD_seekable* const zs = (ZSTD_seekable*)malloc(sizeof(ZSTD_seekable));
|
||||
if (zs == NULL) return NULL;
|
||||
|
||||
/* also initializes stage to zsds_init */
|
||||
memset(zs, 0, sizeof(*zs));
|
||||
|
||||
zs->dstream = ZSTD_createDStream();
|
||||
if (zs->dstream == NULL) {
|
||||
free(zs);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return zs;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_free(ZSTD_seekable* zs)
|
||||
{
|
||||
if (zs == NULL) return 0; /* support free on null */
|
||||
ZSTD_freeDStream(zs->dstream);
|
||||
free(zs->seekTable.entries);
|
||||
free(zs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs)
|
||||
{
|
||||
assert(zs != NULL);
|
||||
if (zs->seekTable.entries == NULL) return NULL;
|
||||
ZSTD_seekTable* const st = (ZSTD_seekTable*)malloc(sizeof(ZSTD_seekTable));
|
||||
if (st==NULL) return NULL;
|
||||
|
||||
st->checksumFlag = zs->seekTable.checksumFlag;
|
||||
st->tableLen = zs->seekTable.tableLen;
|
||||
|
||||
/* Allocate an extra entry at the end to match logic of initial allocation */
|
||||
size_t const entriesSize = sizeof(seekEntry_t) * (zs->seekTable.tableLen + 1);
|
||||
seekEntry_t* const entries = (seekEntry_t*)malloc(entriesSize);
|
||||
if (entries==NULL) {
|
||||
free(st);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
memcpy(entries, zs->seekTable.entries, entriesSize);
|
||||
st->entries = entries;
|
||||
return st;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekTable_free(ZSTD_seekTable* st)
|
||||
{
|
||||
if (st == NULL) return 0; /* support free on null */
|
||||
free(st->entries);
|
||||
free(st);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** ZSTD_seekable_offsetToFrameIndex() :
|
||||
* Performs a binary search to find the last frame with a decompressed offset
|
||||
* <= pos
|
||||
* @return : the frame's index */
|
||||
unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long pos)
|
||||
{
|
||||
return ZSTD_seekTable_offsetToFrameIndex(&zs->seekTable, pos);
|
||||
}
|
||||
|
||||
unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long pos)
|
||||
{
|
||||
U32 lo = 0;
|
||||
U32 hi = (U32)st->tableLen;
|
||||
assert(st->tableLen <= UINT_MAX);
|
||||
|
||||
if (pos >= st->entries[st->tableLen].dOffset) {
|
||||
return (unsigned)st->tableLen;
|
||||
}
|
||||
|
||||
while (lo + 1 < hi) {
|
||||
U32 const mid = lo + ((hi - lo) >> 1);
|
||||
if (st->entries[mid].dOffset <= pos) {
|
||||
lo = mid;
|
||||
} else {
|
||||
hi = mid;
|
||||
}
|
||||
}
|
||||
return lo;
|
||||
}
|
||||
|
||||
unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs)
|
||||
{
|
||||
return ZSTD_seekTable_getNumFrames(&zs->seekTable);
|
||||
}
|
||||
|
||||
unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st)
|
||||
{
|
||||
assert(st->tableLen <= UINT_MAX);
|
||||
return (unsigned)st->tableLen;
|
||||
}
|
||||
|
||||
unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex)
|
||||
{
|
||||
return ZSTD_seekTable_getFrameCompressedOffset(&zs->seekTable, frameIndex);
|
||||
}
|
||||
|
||||
unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex)
|
||||
{
|
||||
if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
|
||||
return st->entries[frameIndex].cOffset;
|
||||
}
|
||||
|
||||
unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex)
|
||||
{
|
||||
return ZSTD_seekTable_getFrameDecompressedOffset(&zs->seekTable, frameIndex);
|
||||
}
|
||||
|
||||
unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex)
|
||||
{
|
||||
if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
|
||||
return st->entries[frameIndex].dOffset;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex)
|
||||
{
|
||||
return ZSTD_seekTable_getFrameCompressedSize(&zs->seekTable, frameIndex);
|
||||
}
|
||||
|
||||
size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex)
|
||||
{
|
||||
if (frameIndex >= st->tableLen) return ERROR(frameIndex_tooLarge);
|
||||
return st->entries[frameIndex + 1].cOffset -
|
||||
st->entries[frameIndex].cOffset;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex)
|
||||
{
|
||||
return ZSTD_seekTable_getFrameDecompressedSize(&zs->seekTable, frameIndex);
|
||||
}
|
||||
|
||||
size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex)
|
||||
{
|
||||
if (frameIndex > st->tableLen) return ERROR(frameIndex_tooLarge);
|
||||
return st->entries[frameIndex + 1].dOffset -
|
||||
st->entries[frameIndex].dOffset;
|
||||
}
|
||||
|
||||
static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs)
|
||||
{
|
||||
int checksumFlag;
|
||||
ZSTD_seekable_customFile src = zs->src;
|
||||
/* read the footer, fixed size */
|
||||
CHECK_IO(src.seek(src.opaque, -(int)ZSTD_seekTableFooterSize, SEEK_END));
|
||||
CHECK_IO(src.read(src.opaque, zs->inBuff, ZSTD_seekTableFooterSize));
|
||||
|
||||
if (MEM_readLE32(zs->inBuff + 5) != ZSTD_SEEKABLE_MAGICNUMBER) {
|
||||
return ERROR(prefix_unknown);
|
||||
}
|
||||
|
||||
{ BYTE const sfd = zs->inBuff[4];
|
||||
checksumFlag = sfd >> 7;
|
||||
|
||||
/* check reserved bits */
|
||||
if ((sfd >> 2) & 0x1f) {
|
||||
return ERROR(corruption_detected);
|
||||
} }
|
||||
|
||||
{ U32 const numFrames = MEM_readLE32(zs->inBuff);
|
||||
U32 const sizePerEntry = 8 + (checksumFlag?4:0);
|
||||
U32 const tableSize = sizePerEntry * numFrames;
|
||||
U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_SKIPPABLEHEADERSIZE;
|
||||
|
||||
U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */
|
||||
{ U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
|
||||
CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END));
|
||||
CHECK_IO(src.read(src.opaque, zs->inBuff, toRead));
|
||||
remaining -= toRead;
|
||||
}
|
||||
|
||||
if (MEM_readLE32(zs->inBuff) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) {
|
||||
return ERROR(prefix_unknown);
|
||||
}
|
||||
if (MEM_readLE32(zs->inBuff+4) + ZSTD_SKIPPABLEHEADERSIZE != frameSize) {
|
||||
return ERROR(prefix_unknown);
|
||||
}
|
||||
|
||||
{ /* Allocate an extra entry at the end so that we can do size
|
||||
* computations on the last element without special case */
|
||||
seekEntry_t* const entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
|
||||
|
||||
U32 idx = 0;
|
||||
U32 pos = 8;
|
||||
|
||||
U64 cOffset = 0;
|
||||
U64 dOffset = 0;
|
||||
|
||||
if (entries == NULL) return ERROR(memory_allocation);
|
||||
|
||||
/* compute cumulative positions */
|
||||
for (; idx < numFrames; idx++) {
|
||||
if (pos + sizePerEntry > SEEKABLE_BUFF_SIZE) {
|
||||
U32 const offset = SEEKABLE_BUFF_SIZE - pos;
|
||||
U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE - offset);
|
||||
memmove(zs->inBuff, zs->inBuff + pos, offset); /* move any data we haven't read yet */
|
||||
CHECK_IO(src.read(src.opaque, zs->inBuff+offset, toRead));
|
||||
remaining -= toRead;
|
||||
pos = 0;
|
||||
}
|
||||
entries[idx].cOffset = cOffset;
|
||||
entries[idx].dOffset = dOffset;
|
||||
|
||||
cOffset += MEM_readLE32(zs->inBuff + pos);
|
||||
pos += 4;
|
||||
dOffset += MEM_readLE32(zs->inBuff + pos);
|
||||
pos += 4;
|
||||
if (checksumFlag) {
|
||||
entries[idx].checksum = MEM_readLE32(zs->inBuff + pos);
|
||||
pos += 4;
|
||||
}
|
||||
}
|
||||
entries[numFrames].cOffset = cOffset;
|
||||
entries[numFrames].dOffset = dOffset;
|
||||
|
||||
zs->seekTable.entries = entries;
|
||||
zs->seekTable.tableLen = numFrames;
|
||||
zs->seekTable.checksumFlag = checksumFlag;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize)
|
||||
{
|
||||
zs->buffWrapper = (buffWrapper_t){src, srcSize, 0};
|
||||
{ ZSTD_seekable_customFile srcFile = {&zs->buffWrapper,
|
||||
&ZSTD_seekable_read_buff,
|
||||
&ZSTD_seekable_seek_buff};
|
||||
return ZSTD_seekable_initAdvanced(zs, srcFile); }
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src)
|
||||
{
|
||||
ZSTD_seekable_customFile srcFile = {src, &ZSTD_seekable_read_FILE,
|
||||
&ZSTD_seekable_seek_FILE};
|
||||
return ZSTD_seekable_initAdvanced(zs, srcFile);
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src)
|
||||
{
|
||||
zs->src = src;
|
||||
|
||||
{ const size_t seekTableInit = ZSTD_seekable_loadSeekTable(zs);
|
||||
if (ZSTD_isError(seekTableInit)) return seekTableInit; }
|
||||
|
||||
zs->decompressedOffset = (U64)-1;
|
||||
zs->curFrame = (U32)-1;
|
||||
|
||||
{ const size_t dstreamInit = ZSTD_initDStream(zs->dstream);
|
||||
if (ZSTD_isError(dstreamInit)) return dstreamInit; }
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsigned long long offset)
|
||||
{
|
||||
unsigned long long const eos = zs->seekTable.entries[zs->seekTable.tableLen].dOffset;
|
||||
if (offset + len > eos) {
|
||||
len = eos - offset;
|
||||
}
|
||||
|
||||
U32 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, offset);
|
||||
U32 noOutputProgressCount = 0;
|
||||
size_t srcBytesRead = 0;
|
||||
do {
|
||||
/* check if we can continue from a previous decompress job */
|
||||
if (targetFrame != zs->curFrame || offset < zs->decompressedOffset) {
|
||||
zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset;
|
||||
zs->curFrame = targetFrame;
|
||||
|
||||
assert(zs->seekTable.entries[targetFrame].cOffset < LLONG_MAX);
|
||||
CHECK_IO(zs->src.seek(zs->src.opaque,
|
||||
(long long)zs->seekTable.entries[targetFrame].cOffset,
|
||||
SEEK_SET));
|
||||
zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0};
|
||||
XXH64_reset(&zs->xxhState, 0);
|
||||
ZSTD_DCtx_reset(zs->dstream, ZSTD_reset_session_only);
|
||||
if (zs->buffWrapper.size && srcBytesRead > zs->buffWrapper.size) {
|
||||
return ERROR(seekableIO);
|
||||
}
|
||||
}
|
||||
|
||||
while (zs->decompressedOffset < offset + len) {
|
||||
size_t toRead;
|
||||
ZSTD_outBuffer outTmp;
|
||||
size_t prevOutPos;
|
||||
size_t prevInPos;
|
||||
size_t forwardProgress;
|
||||
if (zs->decompressedOffset < offset) {
|
||||
/* dummy decompressions until we get to the target offset */
|
||||
outTmp = (ZSTD_outBuffer){zs->outBuff, (size_t) (MIN(SEEKABLE_BUFF_SIZE, offset - zs->decompressedOffset)), 0};
|
||||
} else {
|
||||
outTmp = (ZSTD_outBuffer){dst, len, (size_t) (zs->decompressedOffset - offset)};
|
||||
}
|
||||
|
||||
prevOutPos = outTmp.pos;
|
||||
prevInPos = zs->in.pos;
|
||||
toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in);
|
||||
if (ZSTD_isError(toRead)) {
|
||||
return toRead;
|
||||
}
|
||||
|
||||
if (zs->seekTable.checksumFlag) {
|
||||
XXH64_update(&zs->xxhState, (BYTE*)outTmp.dst + prevOutPos,
|
||||
outTmp.pos - prevOutPos);
|
||||
}
|
||||
forwardProgress = outTmp.pos - prevOutPos;
|
||||
if (forwardProgress == 0) {
|
||||
if (noOutputProgressCount++ > ZSTD_SEEKABLE_NO_OUTPUT_PROGRESS_MAX) {
|
||||
return ERROR(seekableIO);
|
||||
}
|
||||
} else {
|
||||
noOutputProgressCount = 0;
|
||||
}
|
||||
zs->decompressedOffset += forwardProgress;
|
||||
srcBytesRead += zs->in.pos - prevInPos;
|
||||
|
||||
if (toRead == 0) {
|
||||
/* frame complete */
|
||||
|
||||
/* verify checksum */
|
||||
if (zs->seekTable.checksumFlag &&
|
||||
(XXH64_digest(&zs->xxhState) & 0xFFFFFFFFU) !=
|
||||
zs->seekTable.entries[targetFrame].checksum) {
|
||||
return ERROR(corruption_detected);
|
||||
}
|
||||
|
||||
if (zs->decompressedOffset < offset + len) {
|
||||
/* go back to the start and force a reset of the stream */
|
||||
targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, zs->decompressedOffset);
|
||||
/* in this case it will fail later with corruption_detected, since last block does not have checksum */
|
||||
assert(targetFrame != zs->seekTable.tableLen);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* read in more data if we're done with this buffer */
|
||||
if (zs->in.pos == zs->in.size) {
|
||||
toRead = MIN(toRead, SEEKABLE_BUFF_SIZE);
|
||||
CHECK_IO(zs->src.read(zs->src.opaque, zs->inBuff, toRead));
|
||||
zs->in.size = toRead;
|
||||
zs->in.pos = 0;
|
||||
}
|
||||
} /* while (zs->decompressedOffset < offset + len) */
|
||||
} while (zs->decompressedOffset != offset + len);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex)
|
||||
{
|
||||
if (frameIndex >= zs->seekTable.tableLen) {
|
||||
return ERROR(frameIndex_tooLarge);
|
||||
}
|
||||
|
||||
{ size_t const decompressedSize =
|
||||
zs->seekTable.entries[frameIndex + 1].dOffset -
|
||||
zs->seekTable.entries[frameIndex].dOffset;
|
||||
if (dstSize < decompressedSize) {
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
return ZSTD_seekable_decompress(
|
||||
zs, dst, decompressedSize,
|
||||
zs->seekTable.entries[frameIndex].dOffset);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user