test: Add 1 and update 10 files
This commit is contained in:
@@ -10,7 +10,7 @@ A simple object storage system that stores files with metadata and provides a RE
|
||||
- Check if a file exists by hash or label:tag
|
||||
- Delete files by hash
|
||||
- List all stored objects
|
||||
- Automatic file deduplication using content hashing
|
||||
- Automatic file deduplication using SHA-256 content hashing
|
||||
- Support for large file uploads (up to 6GB)
|
||||
- High-performance HTTP server with async request handling
|
||||
- Configurable storage location and server settings
|
||||
|
@@ -29,15 +29,18 @@ This comprehensive security review analyzes the Simple Object Server C++23 appli
|
||||
- Removed all plaintext token support for enhanced security
|
||||
- **Documentation**: See README.md for token hashing instructions
|
||||
|
||||
### 3. **Weak Cryptographic Hash for Content**
|
||||
- **Location**: `src/hash.cpp:12-56`
|
||||
- **Risk**: HIGH - Using XXHash (non-cryptographic) for content identification
|
||||
- **Issue**: XXHash is designed for speed, not security - vulnerable to collision attacks
|
||||
- **Impact**: Potential for malicious file substitution through hash collisions
|
||||
- **Recommendation**:
|
||||
- Replace XXHash with SHA-256 or SHA-3 for content hashing
|
||||
- Use cryptographic hashes for security-critical operations
|
||||
- Keep XXHash only for non-security checksums if needed
|
||||
### 3. **~~Weak Cryptographic Hash for Content~~ [FIXED]
|
||||
- **Location**: `src/hash.cpp`
|
||||
- **Risk**: ~~HIGH~~ RESOLVED - Now using SHA-256 for content identification
|
||||
- **Fix Implemented**:
|
||||
- Replaced XXHash with SHA-256 for all content hashing
|
||||
- Using OpenSSL's SHA-256 implementation for cryptographic security
|
||||
- All file hashes are now 256-bit SHA-256 hashes (64 hex characters)
|
||||
- Collision resistance: 2^128 operations needed for 50% probability
|
||||
- **Security Benefits**:
|
||||
- Cryptographically secure against intentional collisions
|
||||
- Industry-standard hash function
|
||||
- Prevents malicious file substitution attacks
|
||||
|
||||
## High-Risk Issues
|
||||
|
||||
|
@@ -14,17 +14,17 @@
|
||||
|
||||
namespace simple_object_storage {
|
||||
|
||||
// if the file is a tgz file (we can't rely on the extension), then unpack on disk and has the contents
|
||||
// if the file is a tgz file (we can't rely on the extension), then unpack on disk and hash the contents
|
||||
// with hash_directory_recursive in hash.hpp
|
||||
uint64_t get_hash_from_tgz(const std::string &file_path)
|
||||
std::string get_hash_from_tgz(const std::string &file_path)
|
||||
{
|
||||
// check if it's a gzip file
|
||||
std::ifstream file(file_path, std::ios::binary);
|
||||
if (!file) return 0;
|
||||
if (!file) return "";
|
||||
|
||||
int result = system("gunzip -t file.gz");
|
||||
if (result != 0) { // not a gzip file.
|
||||
return 0;
|
||||
return "";
|
||||
}
|
||||
|
||||
// gunzip the file to a new temporary directory
|
||||
@@ -35,10 +35,10 @@ uint64_t get_hash_from_tgz(const std::string &file_path)
|
||||
std::string command = "tar -zxzf " + file_path + " -C " + temp_dir;
|
||||
result = system(command.c_str()); // Basic tar extraction - requires 'tar' command
|
||||
if (result != 0) {
|
||||
return 0;
|
||||
return "";
|
||||
}
|
||||
|
||||
// hash the contents
|
||||
// hash the contents with SHA-256
|
||||
return hash_directory_recursive(temp_dir);
|
||||
}
|
||||
|
||||
|
@@ -1,8 +1,8 @@
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
|
||||
namespace simple_object_storage {
|
||||
|
||||
uint64_t get_hash_from_tgz(const std::string& file_path);
|
||||
// Returns SHA-256 hash of unpacked tgz contents, or empty string on error
|
||||
std::string get_hash_from_tgz(const std::string& file_path);
|
||||
|
||||
} // namespace simple_object_storage
|
@@ -209,18 +209,13 @@ bool Database::run_sql_text(const std::string& sql, const std::string& bind_text
|
||||
}
|
||||
|
||||
|
||||
bool is_dec_uint64(const std::string& s) {
|
||||
if (s.empty()) return false;
|
||||
bool is_sha256_hash(const std::string& s) {
|
||||
// SHA-256 hashes are 64 hex characters
|
||||
if (s.length() != 64) return false;
|
||||
for (char c : s) {
|
||||
if (!std::isdigit(static_cast<unsigned char>(c))) return false;
|
||||
}
|
||||
try {
|
||||
uint64_t x = std::stoull(s, nullptr, 10);
|
||||
std::string s2=std::to_string(x);
|
||||
return s2 == s;
|
||||
} catch (...) {
|
||||
return false;
|
||||
if (!std::isxdigit(static_cast<unsigned char>(c))) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Database::get(const std::string& hash_or_labeltag, dbEntry& entry) {
|
||||
@@ -230,7 +225,7 @@ bool Database::get(const std::string& hash_or_labeltag, dbEntry& entry) {
|
||||
return (run_sql_text("SELECT hash, labeltags, metadata FROM objects WHERE json_array_length(labeltags) > 0 AND EXISTS (SELECT 1 FROM json_each(labeltags) WHERE value = ?);", hash_or_labeltag, entry));
|
||||
}
|
||||
|
||||
if (is_dec_uint64(hash_or_labeltag)) {
|
||||
if (is_sha256_hash(hash_or_labeltag)) {
|
||||
if (run_sql_text("SELECT hash, labeltags, metadata FROM objects WHERE hash = ?;", hash_or_labeltag, entry))
|
||||
return true;
|
||||
}
|
||||
|
143
src/hash.cpp
143
src/hash.cpp
@@ -1,115 +1,140 @@
|
||||
#include "hash.hpp"
|
||||
|
||||
#define XXH_INLINE_ALL
|
||||
#include "xxhash.hpp"
|
||||
|
||||
#include <openssl/sha.h>
|
||||
#include <fstream>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
|
||||
namespace simple_object_storage {
|
||||
|
||||
uint64_t hash_file(const std::string &path) {
|
||||
// Create hash state
|
||||
XXH64_state_t* const state = XXH64_createState();
|
||||
if (state == nullptr) {
|
||||
std::cerr << "Failed to create hash state" << std::endl;
|
||||
return 0;
|
||||
// Convert binary hash to hex string
|
||||
static std::string to_hex_string(const unsigned char* hash, size_t length) {
|
||||
std::stringstream ss;
|
||||
ss << std::hex << std::setfill('0');
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
ss << std::setw(2) << static_cast<unsigned int>(hash[i]);
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
// Initialize state with seed 0
|
||||
XXH64_hash_t const seed = 0; /* or any other value */
|
||||
if (XXH64_reset(state, seed) == XXH_ERROR) return 0;
|
||||
std::string hash_file(const std::string &path) {
|
||||
// Create SHA256 context
|
||||
SHA256_CTX sha256;
|
||||
if (!SHA256_Init(&sha256)) {
|
||||
std::cerr << "Failed to initialize SHA256" << std::endl;
|
||||
return "";
|
||||
}
|
||||
|
||||
// Open file
|
||||
std::ifstream file(path, std::ios::binary);
|
||||
if (!file.is_open()) {
|
||||
std::cerr << "Failed to open file: " << path << std::endl;
|
||||
XXH64_freeState(state);
|
||||
return 0;
|
||||
return "";
|
||||
}
|
||||
|
||||
// Read file in chunks and update hash
|
||||
const size_t buffer_size = 4096;
|
||||
char buffer[buffer_size];
|
||||
while (file.read(buffer, buffer_size)) {
|
||||
if (XXH64_update(state, buffer, file.gcount()) == XXH_ERROR) {
|
||||
const size_t buffer_size = 8192;
|
||||
std::vector<char> buffer(buffer_size);
|
||||
|
||||
while (file.read(buffer.data(), buffer_size)) {
|
||||
if (!SHA256_Update(&sha256, buffer.data(), file.gcount())) {
|
||||
std::cerr << "Failed to update hash" << std::endl;
|
||||
XXH64_freeState(state);
|
||||
return 0;
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
// Handle any remaining bytes
|
||||
if (file.gcount() > 0) {
|
||||
if (XXH64_update(state, buffer, file.gcount()) == XXH_ERROR) {
|
||||
if (!SHA256_Update(&sha256, buffer.data(), file.gcount())) {
|
||||
std::cerr << "Failed to update hash" << std::endl;
|
||||
XXH64_freeState(state);
|
||||
return 0;
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
// Get final hash
|
||||
XXH64_hash_t hash = XXH64_digest(state);
|
||||
XXH64_freeState(state);
|
||||
return hash;
|
||||
}
|
||||
file.close();
|
||||
|
||||
uint64_t hash_directory_recursive(const std::string &path) {
|
||||
// Create hash state
|
||||
XXH64_state_t* const state = XXH64_createState();
|
||||
if (state == nullptr) {
|
||||
std::cerr << "Failed to create hash state" << std::endl;
|
||||
return 0;
|
||||
// Get final hash
|
||||
unsigned char hash[SHA256_DIGEST_LENGTH];
|
||||
if (!SHA256_Final(hash, &sha256)) {
|
||||
std::cerr << "Failed to finalize hash" << std::endl;
|
||||
return "";
|
||||
}
|
||||
|
||||
// Initialize state with seed 0
|
||||
XXH64_hash_t const seed = 0; /* or any other value */
|
||||
if (XXH64_reset(state, seed) == XXH_ERROR) {
|
||||
std::cerr << "Failed to reset hash state" << std::endl;
|
||||
XXH64_freeState(state);
|
||||
return 0;
|
||||
// Convert to hex string
|
||||
return to_hex_string(hash, SHA256_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
std::string hash_directory_recursive(const std::string &path) {
|
||||
// Create SHA256 context for combining hashes
|
||||
SHA256_CTX sha256;
|
||||
if (!SHA256_Init(&sha256)) {
|
||||
std::cerr << "Failed to initialize SHA256" << std::endl;
|
||||
return "";
|
||||
}
|
||||
|
||||
try {
|
||||
// Iterate through all files in directory recursively
|
||||
// Collect all file paths and sort them for consistent hashing
|
||||
std::vector<std::filesystem::path> file_paths;
|
||||
for (const auto& entry : std::filesystem::recursive_directory_iterator(path)) {
|
||||
if (entry.is_regular_file()) {
|
||||
// Get file hash
|
||||
XXH64_hash_t file_hash = hash_file(entry.path().string());
|
||||
XXH64_update(state, &file_hash, sizeof(file_hash));
|
||||
file_paths.push_back(entry.path());
|
||||
}
|
||||
}
|
||||
|
||||
// Sort paths for deterministic hashing
|
||||
std::sort(file_paths.begin(), file_paths.end());
|
||||
|
||||
// Hash each file and combine
|
||||
for (const auto& file_path : file_paths) {
|
||||
// Get file hash
|
||||
std::string file_hash_str = hash_file(file_path.string());
|
||||
if (file_hash_str.empty()) {
|
||||
std::cerr << "Failed to hash file: " << file_path << std::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Update combined hash with file hash and path
|
||||
std::string relative_path = std::filesystem::relative(file_path, path).string();
|
||||
SHA256_Update(&sha256, relative_path.c_str(), relative_path.length());
|
||||
SHA256_Update(&sha256, file_hash_str.c_str(), file_hash_str.length());
|
||||
}
|
||||
} catch (const std::filesystem::filesystem_error& e) {
|
||||
std::cerr << "Filesystem error: " << e.what() << std::endl;
|
||||
XXH64_freeState(state);
|
||||
return 0;
|
||||
return "";
|
||||
}
|
||||
|
||||
// Get final hash
|
||||
XXH64_hash_t hash = XXH64_digest(state);
|
||||
XXH64_freeState(state);
|
||||
return hash;
|
||||
// Get final combined hash
|
||||
unsigned char hash[SHA256_DIGEST_LENGTH];
|
||||
if (!SHA256_Final(hash, &sha256)) {
|
||||
std::cerr << "Failed to finalize hash" << std::endl;
|
||||
return "";
|
||||
}
|
||||
|
||||
// Convert to hex string
|
||||
return to_hex_string(hash, SHA256_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
void hash_demo(const std::string & path)
|
||||
{
|
||||
void hash_demo(const std::string & path) {
|
||||
std::cout << "Hashing directory: " << path << std::endl;
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
XXH64_hash_t hash = hash_directory_recursive(path);
|
||||
std::string hash = hash_directory_recursive(path);
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
||||
std::cout << "Hash: " << hash << " (took " << duration.count() << "ms)" << std::endl;
|
||||
std::cout << "SHA-256 Hash: " << hash << " (took " << duration.count() << "ms)" << std::endl;
|
||||
}
|
||||
|
||||
int hash_demo_raw(const std::string & path)
|
||||
{
|
||||
int hash_demo_raw(const std::string & path) {
|
||||
if (!std::filesystem::exists(path)) {
|
||||
std::cout << 0 <<std::endl; return 1;
|
||||
std::cout << "0" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
XXH64_hash_t hash = hash_directory_recursive(path);
|
||||
std::string hash = hash_directory_recursive(path);
|
||||
std::cout << hash << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace simple_object_storage
|
||||
} // namespace simple_object_storage
|
@@ -2,13 +2,14 @@
|
||||
#define HASH_HPP
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
|
||||
namespace simple_object_storage {
|
||||
|
||||
uint64_t hash_file(const std::string &path);
|
||||
// Compute SHA-256 hash of a file, returns hex string
|
||||
std::string hash_file(const std::string &path);
|
||||
|
||||
uint64_t hash_directory_recursive(const std::string &path);
|
||||
// Compute combined SHA-256 hash of all files in directory recursively
|
||||
std::string hash_directory_recursive(const std::string &path);
|
||||
|
||||
void hash_demo(const std::string & path);
|
||||
|
||||
|
@@ -196,9 +196,9 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
|
||||
// Ensure the temporary file is removed even if errors occur
|
||||
ScopeFileDeleter temp_file_deleter(temp_path);
|
||||
|
||||
// Calculate hash
|
||||
uint64_t hash = hash_file(temp_path.string());
|
||||
if (hash == 0) {
|
||||
// Calculate SHA-256 hash
|
||||
std::string hash = hash_file(temp_path.string());
|
||||
if (hash.empty()) {
|
||||
resp->setStatusCode(drogon::k500InternalServerError);
|
||||
nlohmann::json response = {{"result", "error"}, {"error", "Failed to calculate hash"}};
|
||||
resp->setBody(response.dump());
|
||||
@@ -215,8 +215,8 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
|
||||
metadata["tgz_content_hash"] = get_hash_from_tgz(temp_path.string());
|
||||
}
|
||||
|
||||
// Move file to final location
|
||||
std::filesystem::path final_path = server_.config_.object_store_path / std::to_string(hash);
|
||||
// Move file to final location (using SHA-256 hash as filename)
|
||||
std::filesystem::path final_path = server_.config_.object_store_path / hash;
|
||||
|
||||
if (!std::filesystem::exists(final_path)) {
|
||||
try {
|
||||
@@ -235,7 +235,7 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
|
||||
|
||||
// Update database index
|
||||
dbEntry entry;
|
||||
entry.hash = std::to_string(hash);
|
||||
entry.hash = hash;
|
||||
entry.labeltags = metadata["labeltags"].get<std::vector<std::string>>();
|
||||
entry.metadata = metadata;
|
||||
|
||||
@@ -250,7 +250,7 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
|
||||
return;
|
||||
}
|
||||
|
||||
resp->setBody(nlohmann::json({{"result", "success"}, {"hash", std::to_string(hash)}}).dump());
|
||||
resp->setBody(nlohmann::json({{"result", "success"}, {"hash", hash}}).dump());
|
||||
resp->setContentTypeCode(drogon::CT_APPLICATION_JSON);
|
||||
callback(resp);
|
||||
}
|
||||
|
@@ -6,8 +6,7 @@ services:
|
||||
ports:
|
||||
- 7703:7703
|
||||
restart: no
|
||||
volumes:
|
||||
- ${LOCALCONFIG}:/testing/sos_config.json:ro
|
||||
command: ["/sos/sos", "/testing/sos_config.json"]
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-qO-", "http://127.0.0.1:7703/status"]
|
||||
interval: 1s
|
||||
|
@@ -80,7 +80,7 @@ ${SCRIPT_DIR}/../build.sh
|
||||
# Use static test configuration with known tokens for Docker testing
|
||||
title "Setting up test configuration"
|
||||
# Use the static Docker config with known hashes
|
||||
cp ${SCRIPT_DIR}/sos_config_docker.json ${SCRIPT_DIR}/sos_config.json
|
||||
cp "${SCRIPT_DIR}/sos_config_docker.json" "${SCRIPT_DIR}/sos_config.json"
|
||||
|
||||
# Export the known plaintext tokens that correspond to the hashes in sos_config_docker.json
|
||||
export TEST_TOKEN1="t570H7DmK2VBfCwUmtFaUXyzVklL90E1"
|
||||
@@ -92,7 +92,9 @@ echo "Using static test configuration with known tokens"
|
||||
#------------------------------------------------------------------------------------------------
|
||||
# run the docker container
|
||||
title "Running docker container"
|
||||
export LOCALCONFIG="${SCRIPT_DIR}/sos_config.json"
|
||||
|
||||
# Config file is always in SCRIPT_DIR after the copy above
|
||||
LOCALCONFIG="${SCRIPT_DIR}/sos_config.json"
|
||||
|
||||
[ -f "${LOCALCONFIG}" ] || die "Config file not found: ${LOCALCONFIG}"
|
||||
[ -f "${COMPOSE_FILE}" ] || die "Compose file not found: ${COMPOSE_FILE}"
|
||||
@@ -107,13 +109,16 @@ cd "${SCRIPT_DIR}"
|
||||
docker stop sos-test 2>/dev/null || true
|
||||
docker rm -v sos-test 2>/dev/null || true
|
||||
|
||||
# Start the container and mark that cleanup is needed
|
||||
LOCALCONFIG=${LOCALCONFIG} docker compose \
|
||||
-f "${COMPOSE_FILE}" up -d
|
||||
# Start the container (without config volume mount)
|
||||
docker compose -f "${COMPOSE_FILE}" up -d
|
||||
|
||||
# Mark that we need cleanup from this point on
|
||||
CLEANUP_NEEDED=true
|
||||
|
||||
# Copy the config file into the running container
|
||||
echo "Copying config file into container..."
|
||||
docker cp "${LOCALCONFIG}" sos-test:/testing/sos_config.json
|
||||
|
||||
# wait until healthy.
|
||||
if ! wait_for_container "sos-test"; then
|
||||
echo "----------------------------------------"
|
||||
@@ -123,9 +128,12 @@ if ! wait_for_container "sos-test"; then
|
||||
die "Container sos-test is not healthy"
|
||||
fi
|
||||
|
||||
# run the tests. Docker inside docker support!
|
||||
docker exec -i sos-test ls /testing || true
|
||||
# Pass the plaintext tokens as environment variables to the test script
|
||||
# Verify the config and test files are accessible
|
||||
echo "Verifying test environment..."
|
||||
docker exec sos-test ls -la /testing/ || die "Cannot access /testing directory in container"
|
||||
|
||||
# Run the tests inside the container
|
||||
echo "Running tests inside container..."
|
||||
docker exec -i \
|
||||
-e TEST_TOKEN1="${TEST_TOKEN1:-}" \
|
||||
-e TEST_TOKEN2="${TEST_TOKEN2:-}" \
|
||||
|
1
testing/test_file.txt
Normal file
1
testing/test_file.txt
Normal file
@@ -0,0 +1 @@
|
||||
test content
|
Reference in New Issue
Block a user