test: Add 1 and update 10 files
All checks were successful
Build-Test-Publish / build (linux/amd64) (push) Successful in 1m26s
Build-Test-Publish / build (linux/arm64) (push) Successful in 2m14s
Build-Test-Publish / create-manifest (push) Successful in 13s

This commit is contained in:
Your Name
2025-08-10 22:33:56 +12:00
parent 8ab6028597
commit 623879f67a
11 changed files with 140 additions and 108 deletions

View File

@@ -14,17 +14,17 @@
namespace simple_object_storage {
// if the file is a tgz file (we can't rely on the extension), then unpack on disk and has the contents
// if the file is a tgz file (we can't rely on the extension), then unpack on disk and hash the contents
// with hash_directory_recursive in hash.hpp
uint64_t get_hash_from_tgz(const std::string &file_path)
std::string get_hash_from_tgz(const std::string &file_path)
{
// check if it's a gzip file
std::ifstream file(file_path, std::ios::binary);
if (!file) return 0;
if (!file) return "";
int result = system("gunzip -t file.gz");
if (result != 0) { // not a gzip file.
return 0;
return "";
}
// gunzip the file to a new temporary directory
@@ -35,10 +35,10 @@ uint64_t get_hash_from_tgz(const std::string &file_path)
std::string command = "tar -zxzf " + file_path + " -C " + temp_dir;
result = system(command.c_str()); // Basic tar extraction - requires 'tar' command
if (result != 0) {
return 0;
return "";
}
// hash the contents
// hash the contents with SHA-256
return hash_directory_recursive(temp_dir);
}

View File

@@ -1,8 +1,8 @@
#include <string>
#include <cstdint>
namespace simple_object_storage {
uint64_t get_hash_from_tgz(const std::string& file_path);
// Returns SHA-256 hash of unpacked tgz contents, or empty string on error
std::string get_hash_from_tgz(const std::string& file_path);
} // namespace simple_object_storage

View File

@@ -209,18 +209,13 @@ bool Database::run_sql_text(const std::string& sql, const std::string& bind_text
}
bool is_dec_uint64(const std::string& s) {
if (s.empty()) return false;
bool is_sha256_hash(const std::string& s) {
// SHA-256 hashes are 64 hex characters
if (s.length() != 64) return false;
for (char c : s) {
if (!std::isdigit(static_cast<unsigned char>(c))) return false;
}
try {
uint64_t x = std::stoull(s, nullptr, 10);
std::string s2=std::to_string(x);
return s2 == s;
} catch (...) {
return false;
if (!std::isxdigit(static_cast<unsigned char>(c))) return false;
}
return true;
}
bool Database::get(const std::string& hash_or_labeltag, dbEntry& entry) {
@@ -230,7 +225,7 @@ bool Database::get(const std::string& hash_or_labeltag, dbEntry& entry) {
return (run_sql_text("SELECT hash, labeltags, metadata FROM objects WHERE json_array_length(labeltags) > 0 AND EXISTS (SELECT 1 FROM json_each(labeltags) WHERE value = ?);", hash_or_labeltag, entry));
}
if (is_dec_uint64(hash_or_labeltag)) {
if (is_sha256_hash(hash_or_labeltag)) {
if (run_sql_text("SELECT hash, labeltags, metadata FROM objects WHERE hash = ?;", hash_or_labeltag, entry))
return true;
}

View File

@@ -1,115 +1,140 @@
#include "hash.hpp"
#define XXH_INLINE_ALL
#include "xxhash.hpp"
#include <openssl/sha.h>
#include <fstream>
#include <filesystem>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <vector>
#include <chrono>
namespace simple_object_storage {
uint64_t hash_file(const std::string &path) {
// Create hash state
XXH64_state_t* const state = XXH64_createState();
if (state == nullptr) {
std::cerr << "Failed to create hash state" << std::endl;
return 0;
// Convert binary hash to hex string
static std::string to_hex_string(const unsigned char* hash, size_t length) {
std::stringstream ss;
ss << std::hex << std::setfill('0');
for (size_t i = 0; i < length; ++i) {
ss << std::setw(2) << static_cast<unsigned int>(hash[i]);
}
return ss.str();
}
// Initialize state with seed 0
XXH64_hash_t const seed = 0; /* or any other value */
if (XXH64_reset(state, seed) == XXH_ERROR) return 0;
std::string hash_file(const std::string &path) {
// Create SHA256 context
SHA256_CTX sha256;
if (!SHA256_Init(&sha256)) {
std::cerr << "Failed to initialize SHA256" << std::endl;
return "";
}
// Open file
std::ifstream file(path, std::ios::binary);
if (!file.is_open()) {
std::cerr << "Failed to open file: " << path << std::endl;
XXH64_freeState(state);
return 0;
return "";
}
// Read file in chunks and update hash
const size_t buffer_size = 4096;
char buffer[buffer_size];
while (file.read(buffer, buffer_size)) {
if (XXH64_update(state, buffer, file.gcount()) == XXH_ERROR) {
const size_t buffer_size = 8192;
std::vector<char> buffer(buffer_size);
while (file.read(buffer.data(), buffer_size)) {
if (!SHA256_Update(&sha256, buffer.data(), file.gcount())) {
std::cerr << "Failed to update hash" << std::endl;
XXH64_freeState(state);
return 0;
return "";
}
}
// Handle any remaining bytes
if (file.gcount() > 0) {
if (XXH64_update(state, buffer, file.gcount()) == XXH_ERROR) {
if (!SHA256_Update(&sha256, buffer.data(), file.gcount())) {
std::cerr << "Failed to update hash" << std::endl;
XXH64_freeState(state);
return 0;
return "";
}
}
// Get final hash
XXH64_hash_t hash = XXH64_digest(state);
XXH64_freeState(state);
return hash;
}
file.close();
uint64_t hash_directory_recursive(const std::string &path) {
// Create hash state
XXH64_state_t* const state = XXH64_createState();
if (state == nullptr) {
std::cerr << "Failed to create hash state" << std::endl;
return 0;
// Get final hash
unsigned char hash[SHA256_DIGEST_LENGTH];
if (!SHA256_Final(hash, &sha256)) {
std::cerr << "Failed to finalize hash" << std::endl;
return "";
}
// Initialize state with seed 0
XXH64_hash_t const seed = 0; /* or any other value */
if (XXH64_reset(state, seed) == XXH_ERROR) {
std::cerr << "Failed to reset hash state" << std::endl;
XXH64_freeState(state);
return 0;
// Convert to hex string
return to_hex_string(hash, SHA256_DIGEST_LENGTH);
}
std::string hash_directory_recursive(const std::string &path) {
// Create SHA256 context for combining hashes
SHA256_CTX sha256;
if (!SHA256_Init(&sha256)) {
std::cerr << "Failed to initialize SHA256" << std::endl;
return "";
}
try {
// Iterate through all files in directory recursively
// Collect all file paths and sort them for consistent hashing
std::vector<std::filesystem::path> file_paths;
for (const auto& entry : std::filesystem::recursive_directory_iterator(path)) {
if (entry.is_regular_file()) {
// Get file hash
XXH64_hash_t file_hash = hash_file(entry.path().string());
XXH64_update(state, &file_hash, sizeof(file_hash));
file_paths.push_back(entry.path());
}
}
// Sort paths for deterministic hashing
std::sort(file_paths.begin(), file_paths.end());
// Hash each file and combine
for (const auto& file_path : file_paths) {
// Get file hash
std::string file_hash_str = hash_file(file_path.string());
if (file_hash_str.empty()) {
std::cerr << "Failed to hash file: " << file_path << std::endl;
continue;
}
// Update combined hash with file hash and path
std::string relative_path = std::filesystem::relative(file_path, path).string();
SHA256_Update(&sha256, relative_path.c_str(), relative_path.length());
SHA256_Update(&sha256, file_hash_str.c_str(), file_hash_str.length());
}
} catch (const std::filesystem::filesystem_error& e) {
std::cerr << "Filesystem error: " << e.what() << std::endl;
XXH64_freeState(state);
return 0;
return "";
}
// Get final hash
XXH64_hash_t hash = XXH64_digest(state);
XXH64_freeState(state);
return hash;
// Get final combined hash
unsigned char hash[SHA256_DIGEST_LENGTH];
if (!SHA256_Final(hash, &sha256)) {
std::cerr << "Failed to finalize hash" << std::endl;
return "";
}
// Convert to hex string
return to_hex_string(hash, SHA256_DIGEST_LENGTH);
}
void hash_demo(const std::string & path)
{
void hash_demo(const std::string & path) {
std::cout << "Hashing directory: " << path << std::endl;
auto start = std::chrono::high_resolution_clock::now();
XXH64_hash_t hash = hash_directory_recursive(path);
std::string hash = hash_directory_recursive(path);
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
std::cout << "Hash: " << hash << " (took " << duration.count() << "ms)" << std::endl;
std::cout << "SHA-256 Hash: " << hash << " (took " << duration.count() << "ms)" << std::endl;
}
int hash_demo_raw(const std::string & path)
{
int hash_demo_raw(const std::string & path) {
if (!std::filesystem::exists(path)) {
std::cout << 0 <<std::endl; return 1;
std::cout << "0" << std::endl;
return 1;
}
XXH64_hash_t hash = hash_directory_recursive(path);
std::string hash = hash_directory_recursive(path);
std::cout << hash << std::endl;
return 0;
}
} // namespace simple_object_storage
} // namespace simple_object_storage

View File

@@ -2,13 +2,14 @@
#define HASH_HPP
#include <string>
#include <cstdint>
namespace simple_object_storage {
uint64_t hash_file(const std::string &path);
// Compute SHA-256 hash of a file, returns hex string
std::string hash_file(const std::string &path);
uint64_t hash_directory_recursive(const std::string &path);
// Compute combined SHA-256 hash of all files in directory recursively
std::string hash_directory_recursive(const std::string &path);
void hash_demo(const std::string & path);

View File

@@ -196,9 +196,9 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
// Ensure the temporary file is removed even if errors occur
ScopeFileDeleter temp_file_deleter(temp_path);
// Calculate hash
uint64_t hash = hash_file(temp_path.string());
if (hash == 0) {
// Calculate SHA-256 hash
std::string hash = hash_file(temp_path.string());
if (hash.empty()) {
resp->setStatusCode(drogon::k500InternalServerError);
nlohmann::json response = {{"result", "error"}, {"error", "Failed to calculate hash"}};
resp->setBody(response.dump());
@@ -215,8 +215,8 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
metadata["tgz_content_hash"] = get_hash_from_tgz(temp_path.string());
}
// Move file to final location
std::filesystem::path final_path = server_.config_.object_store_path / std::to_string(hash);
// Move file to final location (using SHA-256 hash as filename)
std::filesystem::path final_path = server_.config_.object_store_path / hash;
if (!std::filesystem::exists(final_path)) {
try {
@@ -235,7 +235,7 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
// Update database index
dbEntry entry;
entry.hash = std::to_string(hash);
entry.hash = hash;
entry.labeltags = metadata["labeltags"].get<std::vector<std::string>>();
entry.metadata = metadata;
@@ -250,7 +250,7 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
return;
}
resp->setBody(nlohmann::json({{"result", "success"}, {"hash", std::to_string(hash)}}).dump());
resp->setBody(nlohmann::json({{"result", "success"}, {"hash", hash}}).dump());
resp->setContentTypeCode(drogon::CT_APPLICATION_JSON);
callback(resp);
}