diff --git a/dshash/dshash b/dshash/dshash index 7068766..04f3670 100755 Binary files a/dshash/dshash and b/dshash/dshash differ diff --git a/dshash/dshash.o b/dshash/dshash.o index 06e3fea..65fb81f 100644 Binary files a/dshash/dshash.o and b/dshash/dshash.o differ diff --git a/dshash/main.o b/dshash/main.o index 88c9feb..bb15ad1 100644 Binary files a/dshash/main.o and b/dshash/main.o differ diff --git a/tests/test.sh b/tests/test.sh index be4040f..a7f7df9 100755 --- a/tests/test.sh +++ b/tests/test.sh @@ -14,10 +14,33 @@ cd "$PROJECT_DIR/dshash" make clean > /dev/null 2>&1 make > /dev/null 2>&1 +TOTAL_PASSED=0 +TOTAL_FAILED=0 + +# ===================================== +# SECTION 1: Library Unit Tests +# ===================================== +echo "" +echo "=== Section 1: Library Unit Tests ===" echo "Building test program..." cd "$SCRIPT_DIR" g++ -std=c++17 -o test_lib test_lib.cpp ../src/dshash.cpp -I../src +./test_lib +if [ $? -eq 0 ]; then + echo "✓ All library tests passed" + TOTAL_PASSED=$((TOTAL_PASSED + 7)) +else + echo "✗ Library tests failed" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) +fi + +# ===================================== +# SECTION 2: Basic Utility Tests +# ===================================== +echo "" +echo "=== Section 2: Basic Utility Tests ===" + FAILED=0 PASSED=0 @@ -37,20 +60,6 @@ run_test() { fi } -echo "" -echo "Running library tests..." -./test_lib -if [ $? -eq 0 ]; then - echo "✓ All library tests passed" - PASSED=$((PASSED + 1)) -else - echo "✗ Library tests failed" - FAILED=$((FAILED + 1)) -fi - -echo "" -echo "Running utility tests..." - echo -n "abc" > "$TEMP_DIR/test1.txt" HASH=$($DSHASH_BIN "$TEMP_DIR/test1.txt") run_test "Hash of 'abc'" "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" "$HASH" @@ -86,12 +95,222 @@ else run_test "Verbose mode for directory" "works" "failed" fi +TOTAL_PASSED=$((TOTAL_PASSED + PASSED)) +TOTAL_FAILED=$((TOTAL_FAILED + FAILED)) + +# ===================================== +# SECTION 3: System SHA256 Comparison +# ===================================== +echo "" +echo "=== Section 3: System SHA256 Comparison ===" + +FAILED=0 +PASSED=0 + +compare_with_system() { + local test_name="$1" + local file_path="$2" + + local our_hash=$($DSHASH_BIN "$file_path") + local system_hash=$(sha256sum "$file_path" | cut -d' ' -f1) + + if [ "$our_hash" = "$system_hash" ]; then + echo "✓ $test_name" + PASSED=$((PASSED + 1)) + else + echo "✗ $test_name" + echo " Our hash: $our_hash" + echo " System hash: $system_hash" + FAILED=$((FAILED + 1)) + fi +} + +# Test various file types +echo -n "a" > "$TEMP_DIR/single.txt" +compare_with_system "Single character" "$TEMP_DIR/single.txt" + +printf "\x00\x01\x02\x03\xFF\xFE\xFD" > "$TEMP_DIR/binary.bin" +compare_with_system "Binary data" "$TEMP_DIR/binary.bin" + +dd if=/dev/zero bs=1024 count=1024 2>/dev/null | tr '\0' 'A' > "$TEMP_DIR/large.txt" +compare_with_system "Large file (1MB)" "$TEMP_DIR/large.txt" + +echo -n "!@#$%^&*()_+-=[]{}|;':\",./<>?" > "$TEMP_DIR/special.txt" +compare_with_system "Special characters" "$TEMP_DIR/special.txt" + +echo -n "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" > "$TEMP_DIR/64bytes.txt" +compare_with_system "Exactly 64 bytes" "$TEMP_DIR/64bytes.txt" + +echo -n "Hello 世界 🌍 Здравствуй мир" > "$TEMP_DIR/unicode.txt" +compare_with_system "Unicode text" "$TEMP_DIR/unicode.txt" + +for i in {0..255}; do + printf "\\x$(printf %02x $i)" +done > "$TEMP_DIR/allbytes.bin" +compare_with_system "All byte values 0-255" "$TEMP_DIR/allbytes.bin" + +TOTAL_PASSED=$((TOTAL_PASSED + PASSED)) +TOTAL_FAILED=$((TOTAL_FAILED + FAILED)) + +# ===================================== +# SECTION 4: NIST Test Vectors +# ===================================== +echo "" +echo "=== Section 4: NIST Test Vectors ===" + +FAILED=0 +PASSED=0 + +# NIST test vectors +declare -a VECTORS=( + "|e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + "abc|ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq|248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1" + "a|ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb" +) + +for vector in "${VECTORS[@]}"; do + IFS='|' read -r input expected <<< "$vector" + echo -n "$input" > "$TEMP_DIR/test.txt" + our_hash=$($DSHASH_BIN "$TEMP_DIR/test.txt") + + if [ "$our_hash" = "$expected" ]; then + if [ -z "$input" ]; then + echo "✓ Empty string" + elif [ ${#input} -gt 20 ]; then + echo "✓ '${input:0:20}...'" + else + echo "✓ '$input'" + fi + PASSED=$((PASSED + 1)) + else + echo "✗ Failed for input: '$input'" + echo " Expected: $expected" + echo " Got: $our_hash" + FAILED=$((FAILED + 1)) + fi +done + +# Special test: one million 'a' characters +perl -e 'print "a" x 1000000' > "$TEMP_DIR/million_a.txt" +our_hash=$($DSHASH_BIN "$TEMP_DIR/million_a.txt") +expected="cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0" + +if [ "$our_hash" = "$expected" ]; then + echo "✓ One million 'a' characters" + PASSED=$((PASSED + 1)) +else + echo "✗ One million 'a' characters" + FAILED=$((FAILED + 1)) +fi + +TOTAL_PASSED=$((TOTAL_PASSED + PASSED)) +TOTAL_FAILED=$((TOTAL_FAILED + FAILED)) + +# ===================================== +# SECTION 5: Stress Tests +# ===================================== +echo "" +echo "=== Section 5: Stress Tests ===" + +cd "$SCRIPT_DIR" +g++ -std=c++17 -O2 -o test_stress test_stress.cpp ../src/dshash.cpp -I../src 2>/dev/null + +if ./test_stress > /dev/null 2>&1; then + echo "✓ All stress tests passed (8 tests)" + TOTAL_PASSED=$((TOTAL_PASSED + 8)) +else + echo "✗ Stress tests failed" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) +fi + +rm -f test_stress test_lib + +# ===================================== +# SECTION 6: Edge Cases +# ===================================== +echo "" +echo "=== Section 6: Edge Cases ===" + +FAILED=0 +PASSED=0 + +# File with spaces in name +echo -n "test" > "$TEMP_DIR/file with spaces.txt" +OUR_HASH=$($DSHASH_BIN "$TEMP_DIR/file with spaces.txt") +SYSTEM_HASH=$(sha256sum "$TEMP_DIR/file with spaces.txt" | cut -d' ' -f1) +if [ "$OUR_HASH" = "$SYSTEM_HASH" ]; then + echo "✓ File with spaces in name" + PASSED=$((PASSED + 1)) +else + echo "✗ File with spaces in name" + FAILED=$((FAILED + 1)) +fi + +# Symlink handling +echo -n "target" > "$TEMP_DIR/target.txt" +ln -s "$TEMP_DIR/target.txt" "$TEMP_DIR/link.txt" +OUR_HASH=$($DSHASH_BIN "$TEMP_DIR/link.txt") +SYSTEM_HASH=$(sha256sum "$TEMP_DIR/link.txt" | cut -d' ' -f1) +if [ "$OUR_HASH" = "$SYSTEM_HASH" ]; then + echo "✓ Symlink handling" + PASSED=$((PASSED + 1)) +else + echo "✗ Symlink handling" + FAILED=$((FAILED + 1)) +fi + +# 55 bytes (padding edge case) +echo -n "0123456789abcdef0123456789abcdef0123456789abcdef0123456" > "$TEMP_DIR/55bytes.txt" +compare_with_system "55 bytes (padding edge)" "$TEMP_DIR/55bytes.txt" + +# 56 bytes (padding edge case) +echo -n "0123456789abcdef0123456789abcdef0123456789abcdef01234567" > "$TEMP_DIR/56bytes.txt" +compare_with_system "56 bytes (padding edge)" "$TEMP_DIR/56bytes.txt" + +TOTAL_PASSED=$((TOTAL_PASSED + PASSED)) +TOTAL_FAILED=$((TOTAL_FAILED + FAILED)) + +# ===================================== +# SECTION 7: Performance Test +# ===================================== +echo "" +echo "=== Section 7: Performance Test ===" + +# Create a 10MB file +dd if=/dev/urandom bs=1024 count=10240 of="$TEMP_DIR/10mb.bin" 2>/dev/null + +# Time our implementation +START=$(date +%s%N) +OUR_HASH=$($DSHASH_BIN "$TEMP_DIR/10mb.bin") +END=$(date +%s%N) +OUR_TIME=$(( (END - START) / 1000000 )) + +# Time system sha256sum +START=$(date +%s%N) +SYSTEM_HASH=$(sha256sum "$TEMP_DIR/10mb.bin" | cut -d' ' -f1) +END=$(date +%s%N) +SYSTEM_TIME=$(( (END - START) / 1000000 )) + +if [ "$OUR_HASH" = "$SYSTEM_HASH" ]; then + echo "✓ 10MB file hash matches" + echo " Our time: ${OUR_TIME}ms" + echo " System time: ${SYSTEM_TIME}ms" + TOTAL_PASSED=$((TOTAL_PASSED + 1)) +else + echo "✗ 10MB file hash mismatch" + TOTAL_FAILED=$((TOTAL_FAILED + 1)) +fi + +# ===================================== +# FINAL SUMMARY +# ===================================== echo "" echo "=========================================" -echo "Test Results: $PASSED passed, $FAILED failed" +echo "Test Results: $TOTAL_PASSED passed, $TOTAL_FAILED failed" echo "=========================================" -if [ $FAILED -eq 0 ]; then +if [ $TOTAL_FAILED -eq 0 ]; then echo "All tests passed!" exit 0 else diff --git a/tests/test_lib b/tests/test_lib deleted file mode 100755 index 534c6d6..0000000 Binary files a/tests/test_lib and /dev/null differ diff --git a/tests/test_stress.cpp b/tests/test_stress.cpp new file mode 100644 index 0000000..5beecca --- /dev/null +++ b/tests/test_stress.cpp @@ -0,0 +1,276 @@ +#include "../src/dshash.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +void test_incremental_vs_single() { + std::cout << "Testing incremental vs single update..." << std::endl; + + // Generate random data + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(0, 255); + + std::vector data(10000); + for (auto& byte : data) { + byte = dis(gen); + } + + // Hash all at once + DSHash hasher1(std::string(data.begin(), data.end())); + std::string hash1 = hasher1.toString(); + + // Hash incrementally in various chunk sizes + std::vector chunk_sizes = {1, 7, 13, 64, 100, 1000, 3333}; + + for (size_t chunk_size : chunk_sizes) { + // Create a new hasher and initialize it properly + std::string accumulated; + for (size_t i = 0; i < data.size(); i += chunk_size) { + size_t len = std::min(chunk_size, data.size() - i); + accumulated.append(data.begin() + i, data.begin() + i + len); + } + + DSHash hasher2(accumulated); + std::string hash2 = hasher2.toString(); + + if (hash1 != hash2) { + std::cerr << "✗ Mismatch with chunk size " << chunk_size << std::endl; + std::cerr << " Single: " << hash1 << std::endl; + std::cerr << " Chunked: " << hash2 << std::endl; + exit(1); + } + } + + std::cout << "✓ All chunk sizes produce identical results" << std::endl; +} + +void test_boundary_conditions() { + std::cout << "Testing boundary conditions..." << std::endl; + + // Test data sizes around block boundaries + std::vector sizes = { + 0, 1, 31, 32, 33, // Near 32 bytes + 54, 55, 56, 57, // Near padding boundary + 63, 64, 65, // Block boundary + 119, 120, 121, // Near 2 blocks + 127, 128, 129, // 2 block boundary + 511, 512, 513, // 8 block boundary + 1023, 1024, 1025 // 16 block boundary + }; + + for (size_t size : sizes) { + std::string data(size, 'X'); + DSHash hasher(data); + std::string hash = hasher.toString(); + + // Just verify it doesn't crash and produces valid hash + assert(hash.length() == 64); + for (char c : hash) { + assert((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')); + } + } + + std::cout << "✓ All boundary conditions handled correctly" << std::endl; +} + +void test_large_files() { + std::cout << "Testing large file handling..." << std::endl; + + std::string tempFile = "/tmp/test_large.bin"; + + // Test various large sizes + std::vector mb_sizes = {1, 5, 10, 50}; + + for (size_t mb : mb_sizes) { + size_t size = mb * 1024 * 1024; + + // Create file with pattern + std::ofstream out(tempFile, std::ios::binary); + std::vector buffer(8192); + for (size_t i = 0; i < buffer.size(); i++) { + buffer[i] = i % 256; + } + + size_t written = 0; + while (written < size) { + size_t to_write = std::min(buffer.size(), size - written); + out.write(reinterpret_cast(buffer.data()), to_write); + written += to_write; + } + out.close(); + + // Hash the file + auto start = std::chrono::high_resolution_clock::now(); + DSHash hasher{std::filesystem::path(tempFile)}; + std::string hash = hasher.toString(); + auto end = std::chrono::high_resolution_clock::now(); + + auto duration = std::chrono::duration_cast(end - start); + + std::cout << " " << mb << "MB file: " << duration.count() << "ms"; + + // Verify hash format + assert(hash.length() == 64); + for (char c : hash) { + assert((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')); + } + + std::cout << " ✓" << std::endl; + } + + std::filesystem::remove(tempFile); + std::cout << "✓ Large files handled successfully" << std::endl; +} + +void test_special_characters_in_strings() { + std::cout << "Testing special characters..." << std::endl; + + struct TestCase { + std::string description; + std::string input; + }; + + std::vector cases = { + {"Null bytes", std::string("abc\0def", 7)}, + {"High ASCII", "\x80\x90\xA0\xB0\xC0\xD0\xE0\xF0"}, + {"Control characters", "\x01\x02\x03\x04\x05\x06\x07\x08"}, + {"Mixed binary", std::string("\x00\xFF\x00\xFF", 4)}, + {"UTF-8 emoji", "🎉🎊🎈🎆🎇"}, + {"Tabs and newlines", "line1\t\tline2\n\rline3"}, + {"Quotes and escapes", "\"'\\`${}[]()"}, + }; + + for (const auto& tc : cases) { + DSHash hasher(tc.input); + std::string hash = hasher.toString(); + + // Verify valid hash + assert(hash.length() == 64); + for (char c : hash) { + assert((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')); + } + + std::cout << " " << tc.description << " ✓" << std::endl; + } + + std::cout << "✓ Special characters handled correctly" << std::endl; +} + +void test_consistency() { + std::cout << "Testing hash consistency..." << std::endl; + + // Test that same input always produces same output + std::string test_data = "consistency test data"; + std::string first_hash; + + for (int i = 0; i < 100; i++) { + DSHash hasher(test_data); + std::string hash = hasher.toString(); + + if (i == 0) { + first_hash = hash; + } else { + assert(hash == first_hash); + } + } + + std::cout << "✓ Hash is consistent across 100 iterations" << std::endl; +} + +void test_known_collisions() { + std::cout << "Testing uniqueness..." << std::endl; + + // Generate many similar strings and verify they produce different hashes + std::set hashes; + + for (int i = 0; i < 1000; i++) { + std::string data = "test" + std::to_string(i); + DSHash hasher(data); + std::string hash = hasher.toString(); + + if (hashes.find(hash) != hashes.end()) { + std::cerr << "✗ Collision found for: " << data << std::endl; + exit(1); + } + hashes.insert(hash); + } + + std::cout << "✓ 1000 similar strings produced unique hashes" << std::endl; +} + +void test_empty_directory() { + std::cout << "Testing empty directory..." << std::endl; + + std::filesystem::path tempDir = "/tmp/test_empty_dir"; + std::filesystem::create_directories(tempDir); + + DSHash hasher(tempDir); + std::string hash = hasher.toString(); + + // Should produce valid hash for empty directory + assert(hash.length() == 64); + + std::filesystem::remove_all(tempDir); + std::cout << "✓ Empty directory handled correctly" << std::endl; +} + +void test_directory_with_subdirs() { + std::cout << "Testing complex directory structure..." << std::endl; + + std::filesystem::path tempDir = "/tmp/test_complex_dir"; + std::filesystem::remove_all(tempDir); + + // Create complex structure + std::filesystem::create_directories(tempDir / "a" / "b" / "c"); + std::filesystem::create_directories(tempDir / "x" / "y"); + std::filesystem::create_directories(tempDir / "empty"); + + // Add files at various levels + std::ofstream(tempDir / "root.txt") << "root"; + std::ofstream(tempDir / "a" / "file_a.txt") << "a"; + std::ofstream(tempDir / "a" / "b" / "file_b.txt") << "b"; + std::ofstream(tempDir / "a" / "b" / "c" / "file_c.txt") << "c"; + std::ofstream(tempDir / "x" / "file_x.txt") << "x"; + std::ofstream(tempDir / "x" / "y" / "file_y.txt") << "y"; + + // Hash multiple times to ensure consistency + std::string hash1, hash2; + + DSHash hasher1(tempDir); + hash1 = hasher1.toString(); + + DSHash hasher2(tempDir); + hash2 = hasher2.toString(); + + assert(hash1 == hash2); + + std::filesystem::remove_all(tempDir); + std::cout << "✓ Complex directory structure handled correctly" << std::endl; +} + +int main() { + try { + std::cout << "\n=== Running stress tests ===\n" << std::endl; + + test_incremental_vs_single(); + test_boundary_conditions(); + test_large_files(); + test_special_characters_in_strings(); + test_consistency(); + test_known_collisions(); + test_empty_directory(); + test_directory_with_subdirs(); + + std::cout << "\n✓ All stress tests passed!\n" << std::endl; + return 0; + } catch (const std::exception& e) { + std::cerr << "\n✗ Test failed with exception: " << e.what() << std::endl; + return 1; + } +} \ No newline at end of file