test: Add 1 and update 10 files
All checks were successful
Build-Test-Publish / build (linux/amd64) (push) Successful in 1m26s
Build-Test-Publish / build (linux/arm64) (push) Successful in 2m14s
Build-Test-Publish / create-manifest (push) Successful in 13s

This commit is contained in:
Your Name
2025-08-10 22:33:56 +12:00
parent 8ab6028597
commit 623879f67a
11 changed files with 140 additions and 108 deletions

View File

@@ -10,7 +10,7 @@ A simple object storage system that stores files with metadata and provides a RE
- Check if a file exists by hash or label:tag
- Delete files by hash
- List all stored objects
- Automatic file deduplication using content hashing
- Automatic file deduplication using SHA-256 content hashing
- Support for large file uploads (up to 6GB)
- High-performance HTTP server with async request handling
- Configurable storage location and server settings

View File

@@ -29,15 +29,18 @@ This comprehensive security review analyzes the Simple Object Server C++23 appli
- Removed all plaintext token support for enhanced security
- **Documentation**: See README.md for token hashing instructions
### 3. **Weak Cryptographic Hash for Content**
- **Location**: `src/hash.cpp:12-56`
- **Risk**: HIGH - Using XXHash (non-cryptographic) for content identification
- **Issue**: XXHash is designed for speed, not security - vulnerable to collision attacks
- **Impact**: Potential for malicious file substitution through hash collisions
- **Recommendation**:
- Replace XXHash with SHA-256 or SHA-3 for content hashing
- Use cryptographic hashes for security-critical operations
- Keep XXHash only for non-security checksums if needed
### 3. **~~Weak Cryptographic Hash for Content~~ [FIXED]
- **Location**: `src/hash.cpp`
- **Risk**: ~~HIGH~~ RESOLVED - Now using SHA-256 for content identification
- **Fix Implemented**:
- Replaced XXHash with SHA-256 for all content hashing
- Using OpenSSL's SHA-256 implementation for cryptographic security
- All file hashes are now 256-bit SHA-256 hashes (64 hex characters)
- Collision resistance: 2^128 operations needed for 50% probability
- **Security Benefits**:
- Cryptographically secure against intentional collisions
- Industry-standard hash function
- Prevents malicious file substitution attacks
## High-Risk Issues

View File

@@ -14,17 +14,17 @@
namespace simple_object_storage {
// if the file is a tgz file (we can't rely on the extension), then unpack on disk and has the contents
// if the file is a tgz file (we can't rely on the extension), then unpack on disk and hash the contents
// with hash_directory_recursive in hash.hpp
uint64_t get_hash_from_tgz(const std::string &file_path)
std::string get_hash_from_tgz(const std::string &file_path)
{
// check if it's a gzip file
std::ifstream file(file_path, std::ios::binary);
if (!file) return 0;
if (!file) return "";
int result = system("gunzip -t file.gz");
if (result != 0) { // not a gzip file.
return 0;
return "";
}
// gunzip the file to a new temporary directory
@@ -35,10 +35,10 @@ uint64_t get_hash_from_tgz(const std::string &file_path)
std::string command = "tar -zxzf " + file_path + " -C " + temp_dir;
result = system(command.c_str()); // Basic tar extraction - requires 'tar' command
if (result != 0) {
return 0;
return "";
}
// hash the contents
// hash the contents with SHA-256
return hash_directory_recursive(temp_dir);
}

View File

@@ -1,8 +1,8 @@
#include <string>
#include <cstdint>
namespace simple_object_storage {
uint64_t get_hash_from_tgz(const std::string& file_path);
// Returns SHA-256 hash of unpacked tgz contents, or empty string on error
std::string get_hash_from_tgz(const std::string& file_path);
} // namespace simple_object_storage

View File

@@ -209,18 +209,13 @@ bool Database::run_sql_text(const std::string& sql, const std::string& bind_text
}
bool is_dec_uint64(const std::string& s) {
if (s.empty()) return false;
bool is_sha256_hash(const std::string& s) {
// SHA-256 hashes are 64 hex characters
if (s.length() != 64) return false;
for (char c : s) {
if (!std::isdigit(static_cast<unsigned char>(c))) return false;
}
try {
uint64_t x = std::stoull(s, nullptr, 10);
std::string s2=std::to_string(x);
return s2 == s;
} catch (...) {
return false;
if (!std::isxdigit(static_cast<unsigned char>(c))) return false;
}
return true;
}
bool Database::get(const std::string& hash_or_labeltag, dbEntry& entry) {
@@ -230,7 +225,7 @@ bool Database::get(const std::string& hash_or_labeltag, dbEntry& entry) {
return (run_sql_text("SELECT hash, labeltags, metadata FROM objects WHERE json_array_length(labeltags) > 0 AND EXISTS (SELECT 1 FROM json_each(labeltags) WHERE value = ?);", hash_or_labeltag, entry));
}
if (is_dec_uint64(hash_or_labeltag)) {
if (is_sha256_hash(hash_or_labeltag)) {
if (run_sql_text("SELECT hash, labeltags, metadata FROM objects WHERE hash = ?;", hash_or_labeltag, entry))
return true;
}

View File

@@ -1,115 +1,140 @@
#include "hash.hpp"
#define XXH_INLINE_ALL
#include "xxhash.hpp"
#include <openssl/sha.h>
#include <fstream>
#include <filesystem>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <vector>
#include <chrono>
namespace simple_object_storage {
uint64_t hash_file(const std::string &path) {
// Create hash state
XXH64_state_t* const state = XXH64_createState();
if (state == nullptr) {
std::cerr << "Failed to create hash state" << std::endl;
return 0;
// Convert binary hash to hex string
static std::string to_hex_string(const unsigned char* hash, size_t length) {
std::stringstream ss;
ss << std::hex << std::setfill('0');
for (size_t i = 0; i < length; ++i) {
ss << std::setw(2) << static_cast<unsigned int>(hash[i]);
}
return ss.str();
}
// Initialize state with seed 0
XXH64_hash_t const seed = 0; /* or any other value */
if (XXH64_reset(state, seed) == XXH_ERROR) return 0;
std::string hash_file(const std::string &path) {
// Create SHA256 context
SHA256_CTX sha256;
if (!SHA256_Init(&sha256)) {
std::cerr << "Failed to initialize SHA256" << std::endl;
return "";
}
// Open file
std::ifstream file(path, std::ios::binary);
if (!file.is_open()) {
std::cerr << "Failed to open file: " << path << std::endl;
XXH64_freeState(state);
return 0;
return "";
}
// Read file in chunks and update hash
const size_t buffer_size = 4096;
char buffer[buffer_size];
while (file.read(buffer, buffer_size)) {
if (XXH64_update(state, buffer, file.gcount()) == XXH_ERROR) {
const size_t buffer_size = 8192;
std::vector<char> buffer(buffer_size);
while (file.read(buffer.data(), buffer_size)) {
if (!SHA256_Update(&sha256, buffer.data(), file.gcount())) {
std::cerr << "Failed to update hash" << std::endl;
XXH64_freeState(state);
return 0;
return "";
}
}
// Handle any remaining bytes
if (file.gcount() > 0) {
if (XXH64_update(state, buffer, file.gcount()) == XXH_ERROR) {
if (!SHA256_Update(&sha256, buffer.data(), file.gcount())) {
std::cerr << "Failed to update hash" << std::endl;
XXH64_freeState(state);
return 0;
return "";
}
}
// Get final hash
XXH64_hash_t hash = XXH64_digest(state);
XXH64_freeState(state);
return hash;
}
file.close();
uint64_t hash_directory_recursive(const std::string &path) {
// Create hash state
XXH64_state_t* const state = XXH64_createState();
if (state == nullptr) {
std::cerr << "Failed to create hash state" << std::endl;
return 0;
// Get final hash
unsigned char hash[SHA256_DIGEST_LENGTH];
if (!SHA256_Final(hash, &sha256)) {
std::cerr << "Failed to finalize hash" << std::endl;
return "";
}
// Initialize state with seed 0
XXH64_hash_t const seed = 0; /* or any other value */
if (XXH64_reset(state, seed) == XXH_ERROR) {
std::cerr << "Failed to reset hash state" << std::endl;
XXH64_freeState(state);
return 0;
// Convert to hex string
return to_hex_string(hash, SHA256_DIGEST_LENGTH);
}
std::string hash_directory_recursive(const std::string &path) {
// Create SHA256 context for combining hashes
SHA256_CTX sha256;
if (!SHA256_Init(&sha256)) {
std::cerr << "Failed to initialize SHA256" << std::endl;
return "";
}
try {
// Iterate through all files in directory recursively
// Collect all file paths and sort them for consistent hashing
std::vector<std::filesystem::path> file_paths;
for (const auto& entry : std::filesystem::recursive_directory_iterator(path)) {
if (entry.is_regular_file()) {
// Get file hash
XXH64_hash_t file_hash = hash_file(entry.path().string());
XXH64_update(state, &file_hash, sizeof(file_hash));
file_paths.push_back(entry.path());
}
}
// Sort paths for deterministic hashing
std::sort(file_paths.begin(), file_paths.end());
// Hash each file and combine
for (const auto& file_path : file_paths) {
// Get file hash
std::string file_hash_str = hash_file(file_path.string());
if (file_hash_str.empty()) {
std::cerr << "Failed to hash file: " << file_path << std::endl;
continue;
}
// Update combined hash with file hash and path
std::string relative_path = std::filesystem::relative(file_path, path).string();
SHA256_Update(&sha256, relative_path.c_str(), relative_path.length());
SHA256_Update(&sha256, file_hash_str.c_str(), file_hash_str.length());
}
} catch (const std::filesystem::filesystem_error& e) {
std::cerr << "Filesystem error: " << e.what() << std::endl;
XXH64_freeState(state);
return 0;
return "";
}
// Get final hash
XXH64_hash_t hash = XXH64_digest(state);
XXH64_freeState(state);
return hash;
// Get final combined hash
unsigned char hash[SHA256_DIGEST_LENGTH];
if (!SHA256_Final(hash, &sha256)) {
std::cerr << "Failed to finalize hash" << std::endl;
return "";
}
// Convert to hex string
return to_hex_string(hash, SHA256_DIGEST_LENGTH);
}
void hash_demo(const std::string & path)
{
void hash_demo(const std::string & path) {
std::cout << "Hashing directory: " << path << std::endl;
auto start = std::chrono::high_resolution_clock::now();
XXH64_hash_t hash = hash_directory_recursive(path);
std::string hash = hash_directory_recursive(path);
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
std::cout << "Hash: " << hash << " (took " << duration.count() << "ms)" << std::endl;
std::cout << "SHA-256 Hash: " << hash << " (took " << duration.count() << "ms)" << std::endl;
}
int hash_demo_raw(const std::string & path)
{
int hash_demo_raw(const std::string & path) {
if (!std::filesystem::exists(path)) {
std::cout << 0 <<std::endl; return 1;
std::cout << "0" << std::endl;
return 1;
}
XXH64_hash_t hash = hash_directory_recursive(path);
std::string hash = hash_directory_recursive(path);
std::cout << hash << std::endl;
return 0;
}
} // namespace simple_object_storage
} // namespace simple_object_storage

View File

@@ -2,13 +2,14 @@
#define HASH_HPP
#include <string>
#include <cstdint>
namespace simple_object_storage {
uint64_t hash_file(const std::string &path);
// Compute SHA-256 hash of a file, returns hex string
std::string hash_file(const std::string &path);
uint64_t hash_directory_recursive(const std::string &path);
// Compute combined SHA-256 hash of all files in directory recursively
std::string hash_directory_recursive(const std::string &path);
void hash_demo(const std::string & path);

View File

@@ -196,9 +196,9 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
// Ensure the temporary file is removed even if errors occur
ScopeFileDeleter temp_file_deleter(temp_path);
// Calculate hash
uint64_t hash = hash_file(temp_path.string());
if (hash == 0) {
// Calculate SHA-256 hash
std::string hash = hash_file(temp_path.string());
if (hash.empty()) {
resp->setStatusCode(drogon::k500InternalServerError);
nlohmann::json response = {{"result", "error"}, {"error", "Failed to calculate hash"}};
resp->setBody(response.dump());
@@ -215,8 +215,8 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
metadata["tgz_content_hash"] = get_hash_from_tgz(temp_path.string());
}
// Move file to final location
std::filesystem::path final_path = server_.config_.object_store_path / std::to_string(hash);
// Move file to final location (using SHA-256 hash as filename)
std::filesystem::path final_path = server_.config_.object_store_path / hash;
if (!std::filesystem::exists(final_path)) {
try {
@@ -235,7 +235,7 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
// Update database index
dbEntry entry;
entry.hash = std::to_string(hash);
entry.hash = hash;
entry.labeltags = metadata["labeltags"].get<std::vector<std::string>>();
entry.metadata = metadata;
@@ -250,7 +250,7 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
return;
}
resp->setBody(nlohmann::json({{"result", "success"}, {"hash", std::to_string(hash)}}).dump());
resp->setBody(nlohmann::json({{"result", "success"}, {"hash", hash}}).dump());
resp->setContentTypeCode(drogon::CT_APPLICATION_JSON);
callback(resp);
}

View File

@@ -6,8 +6,7 @@ services:
ports:
- 7703:7703
restart: no
volumes:
- ${LOCALCONFIG}:/testing/sos_config.json:ro
command: ["/sos/sos", "/testing/sos_config.json"]
healthcheck:
test: ["CMD", "wget", "-qO-", "http://127.0.0.1:7703/status"]
interval: 1s

View File

@@ -80,7 +80,7 @@ ${SCRIPT_DIR}/../build.sh
# Use static test configuration with known tokens for Docker testing
title "Setting up test configuration"
# Use the static Docker config with known hashes
cp ${SCRIPT_DIR}/sos_config_docker.json ${SCRIPT_DIR}/sos_config.json
cp "${SCRIPT_DIR}/sos_config_docker.json" "${SCRIPT_DIR}/sos_config.json"
# Export the known plaintext tokens that correspond to the hashes in sos_config_docker.json
export TEST_TOKEN1="t570H7DmK2VBfCwUmtFaUXyzVklL90E1"
@@ -92,7 +92,9 @@ echo "Using static test configuration with known tokens"
#------------------------------------------------------------------------------------------------
# run the docker container
title "Running docker container"
export LOCALCONFIG="${SCRIPT_DIR}/sos_config.json"
# Config file is always in SCRIPT_DIR after the copy above
LOCALCONFIG="${SCRIPT_DIR}/sos_config.json"
[ -f "${LOCALCONFIG}" ] || die "Config file not found: ${LOCALCONFIG}"
[ -f "${COMPOSE_FILE}" ] || die "Compose file not found: ${COMPOSE_FILE}"
@@ -107,13 +109,16 @@ cd "${SCRIPT_DIR}"
docker stop sos-test 2>/dev/null || true
docker rm -v sos-test 2>/dev/null || true
# Start the container and mark that cleanup is needed
LOCALCONFIG=${LOCALCONFIG} docker compose \
-f "${COMPOSE_FILE}" up -d
# Start the container (without config volume mount)
docker compose -f "${COMPOSE_FILE}" up -d
# Mark that we need cleanup from this point on
CLEANUP_NEEDED=true
# Copy the config file into the running container
echo "Copying config file into container..."
docker cp "${LOCALCONFIG}" sos-test:/testing/sos_config.json
# wait until healthy.
if ! wait_for_container "sos-test"; then
echo "----------------------------------------"
@@ -123,9 +128,12 @@ if ! wait_for_container "sos-test"; then
die "Container sos-test is not healthy"
fi
# run the tests. Docker inside docker support!
docker exec -i sos-test ls /testing || true
# Pass the plaintext tokens as environment variables to the test script
# Verify the config and test files are accessible
echo "Verifying test environment..."
docker exec sos-test ls -la /testing/ || die "Cannot access /testing directory in container"
# Run the tests inside the container
echo "Running tests inside container..."
docker exec -i \
-e TEST_TOKEN1="${TEST_TOKEN1:-}" \
-e TEST_TOKEN2="${TEST_TOKEN2:-}" \

1
testing/test_file.txt Normal file
View File

@@ -0,0 +1 @@
test content