Files
dshash/tests/test_stress.cpp
2025-09-02 16:30:58 +12:00

276 lines
8.8 KiB
C++

#include "../src/dshash.hpp"
#include <iostream>
#include <cassert>
#include <fstream>
#include <filesystem>
#include <chrono>
#include <random>
#include <iomanip>
#include <set>
void test_incremental_vs_single() {
std::cout << "Testing incremental vs single update..." << std::endl;
// Generate random data
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dis(0, 255);
std::vector<uint8_t> data(10000);
for (auto& byte : data) {
byte = dis(gen);
}
// Hash all at once
DSHash hasher1(std::string(data.begin(), data.end()));
std::string hash1 = hasher1.toString();
// Hash incrementally in various chunk sizes
std::vector<size_t> chunk_sizes = {1, 7, 13, 64, 100, 1000, 3333};
for (size_t chunk_size : chunk_sizes) {
// Create a new hasher and initialize it properly
std::string accumulated;
for (size_t i = 0; i < data.size(); i += chunk_size) {
size_t len = std::min(chunk_size, data.size() - i);
accumulated.append(data.begin() + i, data.begin() + i + len);
}
DSHash hasher2(accumulated);
std::string hash2 = hasher2.toString();
if (hash1 != hash2) {
std::cerr << "✗ Mismatch with chunk size " << chunk_size << std::endl;
std::cerr << " Single: " << hash1 << std::endl;
std::cerr << " Chunked: " << hash2 << std::endl;
exit(1);
}
}
std::cout << "✓ All chunk sizes produce identical results" << std::endl;
}
void test_boundary_conditions() {
std::cout << "Testing boundary conditions..." << std::endl;
// Test data sizes around block boundaries
std::vector<size_t> sizes = {
0, 1, 31, 32, 33, // Near 32 bytes
54, 55, 56, 57, // Near padding boundary
63, 64, 65, // Block boundary
119, 120, 121, // Near 2 blocks
127, 128, 129, // 2 block boundary
511, 512, 513, // 8 block boundary
1023, 1024, 1025 // 16 block boundary
};
for (size_t size : sizes) {
std::string data(size, 'X');
DSHash hasher(data);
std::string hash = hasher.toString();
// Just verify it doesn't crash and produces valid hash
assert(hash.length() == 64);
for (char c : hash) {
assert((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'));
}
}
std::cout << "✓ All boundary conditions handled correctly" << std::endl;
}
void test_large_files() {
std::cout << "Testing large file handling..." << std::endl;
std::string tempFile = "/tmp/test_large.bin";
// Test various large sizes
std::vector<size_t> mb_sizes = {1, 5, 10, 50};
for (size_t mb : mb_sizes) {
size_t size = mb * 1024 * 1024;
// Create file with pattern
std::ofstream out(tempFile, std::ios::binary);
std::vector<uint8_t> buffer(8192);
for (size_t i = 0; i < buffer.size(); i++) {
buffer[i] = i % 256;
}
size_t written = 0;
while (written < size) {
size_t to_write = std::min(buffer.size(), size - written);
out.write(reinterpret_cast<char*>(buffer.data()), to_write);
written += to_write;
}
out.close();
// Hash the file
auto start = std::chrono::high_resolution_clock::now();
DSHash hasher{std::filesystem::path(tempFile)};
std::string hash = hasher.toString();
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
std::cout << " " << mb << "MB file: " << duration.count() << "ms";
// Verify hash format
assert(hash.length() == 64);
for (char c : hash) {
assert((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'));
}
std::cout << "" << std::endl;
}
std::filesystem::remove(tempFile);
std::cout << "✓ Large files handled successfully" << std::endl;
}
void test_special_characters_in_strings() {
std::cout << "Testing special characters..." << std::endl;
struct TestCase {
std::string description;
std::string input;
};
std::vector<TestCase> cases = {
{"Null bytes", std::string("abc\0def", 7)},
{"High ASCII", "\x80\x90\xA0\xB0\xC0\xD0\xE0\xF0"},
{"Control characters", "\x01\x02\x03\x04\x05\x06\x07\x08"},
{"Mixed binary", std::string("\x00\xFF\x00\xFF", 4)},
{"UTF-8 emoji", "🎉🎊🎈🎆🎇"},
{"Tabs and newlines", "line1\t\tline2\n\rline3"},
{"Quotes and escapes", "\"'\\`${}[]()"},
};
for (const auto& tc : cases) {
DSHash hasher(tc.input);
std::string hash = hasher.toString();
// Verify valid hash
assert(hash.length() == 64);
for (char c : hash) {
assert((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'));
}
std::cout << " " << tc.description << "" << std::endl;
}
std::cout << "✓ Special characters handled correctly" << std::endl;
}
void test_consistency() {
std::cout << "Testing hash consistency..." << std::endl;
// Test that same input always produces same output
std::string test_data = "consistency test data";
std::string first_hash;
for (int i = 0; i < 100; i++) {
DSHash hasher(test_data);
std::string hash = hasher.toString();
if (i == 0) {
first_hash = hash;
} else {
assert(hash == first_hash);
}
}
std::cout << "✓ Hash is consistent across 100 iterations" << std::endl;
}
void test_known_collisions() {
std::cout << "Testing uniqueness..." << std::endl;
// Generate many similar strings and verify they produce different hashes
std::set<std::string> hashes;
for (int i = 0; i < 1000; i++) {
std::string data = "test" + std::to_string(i);
DSHash hasher(data);
std::string hash = hasher.toString();
if (hashes.find(hash) != hashes.end()) {
std::cerr << "✗ Collision found for: " << data << std::endl;
exit(1);
}
hashes.insert(hash);
}
std::cout << "✓ 1000 similar strings produced unique hashes" << std::endl;
}
void test_empty_directory() {
std::cout << "Testing empty directory..." << std::endl;
std::filesystem::path tempDir = "/tmp/test_empty_dir";
std::filesystem::create_directories(tempDir);
DSHash hasher(tempDir);
std::string hash = hasher.toString();
// Should produce valid hash for empty directory
assert(hash.length() == 64);
std::filesystem::remove_all(tempDir);
std::cout << "✓ Empty directory handled correctly" << std::endl;
}
void test_directory_with_subdirs() {
std::cout << "Testing complex directory structure..." << std::endl;
std::filesystem::path tempDir = "/tmp/test_complex_dir";
std::filesystem::remove_all(tempDir);
// Create complex structure
std::filesystem::create_directories(tempDir / "a" / "b" / "c");
std::filesystem::create_directories(tempDir / "x" / "y");
std::filesystem::create_directories(tempDir / "empty");
// Add files at various levels
std::ofstream(tempDir / "root.txt") << "root";
std::ofstream(tempDir / "a" / "file_a.txt") << "a";
std::ofstream(tempDir / "a" / "b" / "file_b.txt") << "b";
std::ofstream(tempDir / "a" / "b" / "c" / "file_c.txt") << "c";
std::ofstream(tempDir / "x" / "file_x.txt") << "x";
std::ofstream(tempDir / "x" / "y" / "file_y.txt") << "y";
// Hash multiple times to ensure consistency
std::string hash1, hash2;
DSHash hasher1(tempDir);
hash1 = hasher1.toString();
DSHash hasher2(tempDir);
hash2 = hasher2.toString();
assert(hash1 == hash2);
std::filesystem::remove_all(tempDir);
std::cout << "✓ Complex directory structure handled correctly" << std::endl;
}
int main() {
try {
std::cout << "\n=== Running stress tests ===\n" << std::endl;
test_incremental_vs_single();
test_boundary_conditions();
test_large_files();
test_special_characters_in_strings();
test_consistency();
test_known_collisions();
test_empty_directory();
test_directory_with_subdirs();
std::cout << "\n✓ All stress tests passed!\n" << std::endl;
return 0;
} catch (const std::exception& e) {
std::cerr << "\n✗ Test failed with exception: " << e.what() << std::endl;
return 1;
}
}