test: Add 1 and update 10 files
This commit is contained in:
@@ -14,17 +14,17 @@
|
||||
|
||||
namespace simple_object_storage {
|
||||
|
||||
// if the file is a tgz file (we can't rely on the extension), then unpack on disk and has the contents
|
||||
// if the file is a tgz file (we can't rely on the extension), then unpack on disk and hash the contents
|
||||
// with hash_directory_recursive in hash.hpp
|
||||
uint64_t get_hash_from_tgz(const std::string &file_path)
|
||||
std::string get_hash_from_tgz(const std::string &file_path)
|
||||
{
|
||||
// check if it's a gzip file
|
||||
std::ifstream file(file_path, std::ios::binary);
|
||||
if (!file) return 0;
|
||||
if (!file) return "";
|
||||
|
||||
int result = system("gunzip -t file.gz");
|
||||
if (result != 0) { // not a gzip file.
|
||||
return 0;
|
||||
return "";
|
||||
}
|
||||
|
||||
// gunzip the file to a new temporary directory
|
||||
@@ -35,10 +35,10 @@ uint64_t get_hash_from_tgz(const std::string &file_path)
|
||||
std::string command = "tar -zxzf " + file_path + " -C " + temp_dir;
|
||||
result = system(command.c_str()); // Basic tar extraction - requires 'tar' command
|
||||
if (result != 0) {
|
||||
return 0;
|
||||
return "";
|
||||
}
|
||||
|
||||
// hash the contents
|
||||
// hash the contents with SHA-256
|
||||
return hash_directory_recursive(temp_dir);
|
||||
}
|
||||
|
||||
|
@@ -1,8 +1,8 @@
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
|
||||
namespace simple_object_storage {
|
||||
|
||||
uint64_t get_hash_from_tgz(const std::string& file_path);
|
||||
// Returns SHA-256 hash of unpacked tgz contents, or empty string on error
|
||||
std::string get_hash_from_tgz(const std::string& file_path);
|
||||
|
||||
} // namespace simple_object_storage
|
@@ -209,18 +209,13 @@ bool Database::run_sql_text(const std::string& sql, const std::string& bind_text
|
||||
}
|
||||
|
||||
|
||||
bool is_dec_uint64(const std::string& s) {
|
||||
if (s.empty()) return false;
|
||||
bool is_sha256_hash(const std::string& s) {
|
||||
// SHA-256 hashes are 64 hex characters
|
||||
if (s.length() != 64) return false;
|
||||
for (char c : s) {
|
||||
if (!std::isdigit(static_cast<unsigned char>(c))) return false;
|
||||
}
|
||||
try {
|
||||
uint64_t x = std::stoull(s, nullptr, 10);
|
||||
std::string s2=std::to_string(x);
|
||||
return s2 == s;
|
||||
} catch (...) {
|
||||
return false;
|
||||
if (!std::isxdigit(static_cast<unsigned char>(c))) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Database::get(const std::string& hash_or_labeltag, dbEntry& entry) {
|
||||
@@ -230,7 +225,7 @@ bool Database::get(const std::string& hash_or_labeltag, dbEntry& entry) {
|
||||
return (run_sql_text("SELECT hash, labeltags, metadata FROM objects WHERE json_array_length(labeltags) > 0 AND EXISTS (SELECT 1 FROM json_each(labeltags) WHERE value = ?);", hash_or_labeltag, entry));
|
||||
}
|
||||
|
||||
if (is_dec_uint64(hash_or_labeltag)) {
|
||||
if (is_sha256_hash(hash_or_labeltag)) {
|
||||
if (run_sql_text("SELECT hash, labeltags, metadata FROM objects WHERE hash = ?;", hash_or_labeltag, entry))
|
||||
return true;
|
||||
}
|
||||
|
143
src/hash.cpp
143
src/hash.cpp
@@ -1,115 +1,140 @@
|
||||
#include "hash.hpp"
|
||||
|
||||
#define XXH_INLINE_ALL
|
||||
#include "xxhash.hpp"
|
||||
|
||||
#include <openssl/sha.h>
|
||||
#include <fstream>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
|
||||
namespace simple_object_storage {
|
||||
|
||||
uint64_t hash_file(const std::string &path) {
|
||||
// Create hash state
|
||||
XXH64_state_t* const state = XXH64_createState();
|
||||
if (state == nullptr) {
|
||||
std::cerr << "Failed to create hash state" << std::endl;
|
||||
return 0;
|
||||
// Convert binary hash to hex string
|
||||
static std::string to_hex_string(const unsigned char* hash, size_t length) {
|
||||
std::stringstream ss;
|
||||
ss << std::hex << std::setfill('0');
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
ss << std::setw(2) << static_cast<unsigned int>(hash[i]);
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
// Initialize state with seed 0
|
||||
XXH64_hash_t const seed = 0; /* or any other value */
|
||||
if (XXH64_reset(state, seed) == XXH_ERROR) return 0;
|
||||
std::string hash_file(const std::string &path) {
|
||||
// Create SHA256 context
|
||||
SHA256_CTX sha256;
|
||||
if (!SHA256_Init(&sha256)) {
|
||||
std::cerr << "Failed to initialize SHA256" << std::endl;
|
||||
return "";
|
||||
}
|
||||
|
||||
// Open file
|
||||
std::ifstream file(path, std::ios::binary);
|
||||
if (!file.is_open()) {
|
||||
std::cerr << "Failed to open file: " << path << std::endl;
|
||||
XXH64_freeState(state);
|
||||
return 0;
|
||||
return "";
|
||||
}
|
||||
|
||||
// Read file in chunks and update hash
|
||||
const size_t buffer_size = 4096;
|
||||
char buffer[buffer_size];
|
||||
while (file.read(buffer, buffer_size)) {
|
||||
if (XXH64_update(state, buffer, file.gcount()) == XXH_ERROR) {
|
||||
const size_t buffer_size = 8192;
|
||||
std::vector<char> buffer(buffer_size);
|
||||
|
||||
while (file.read(buffer.data(), buffer_size)) {
|
||||
if (!SHA256_Update(&sha256, buffer.data(), file.gcount())) {
|
||||
std::cerr << "Failed to update hash" << std::endl;
|
||||
XXH64_freeState(state);
|
||||
return 0;
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
// Handle any remaining bytes
|
||||
if (file.gcount() > 0) {
|
||||
if (XXH64_update(state, buffer, file.gcount()) == XXH_ERROR) {
|
||||
if (!SHA256_Update(&sha256, buffer.data(), file.gcount())) {
|
||||
std::cerr << "Failed to update hash" << std::endl;
|
||||
XXH64_freeState(state);
|
||||
return 0;
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
// Get final hash
|
||||
XXH64_hash_t hash = XXH64_digest(state);
|
||||
XXH64_freeState(state);
|
||||
return hash;
|
||||
}
|
||||
file.close();
|
||||
|
||||
uint64_t hash_directory_recursive(const std::string &path) {
|
||||
// Create hash state
|
||||
XXH64_state_t* const state = XXH64_createState();
|
||||
if (state == nullptr) {
|
||||
std::cerr << "Failed to create hash state" << std::endl;
|
||||
return 0;
|
||||
// Get final hash
|
||||
unsigned char hash[SHA256_DIGEST_LENGTH];
|
||||
if (!SHA256_Final(hash, &sha256)) {
|
||||
std::cerr << "Failed to finalize hash" << std::endl;
|
||||
return "";
|
||||
}
|
||||
|
||||
// Initialize state with seed 0
|
||||
XXH64_hash_t const seed = 0; /* or any other value */
|
||||
if (XXH64_reset(state, seed) == XXH_ERROR) {
|
||||
std::cerr << "Failed to reset hash state" << std::endl;
|
||||
XXH64_freeState(state);
|
||||
return 0;
|
||||
// Convert to hex string
|
||||
return to_hex_string(hash, SHA256_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
std::string hash_directory_recursive(const std::string &path) {
|
||||
// Create SHA256 context for combining hashes
|
||||
SHA256_CTX sha256;
|
||||
if (!SHA256_Init(&sha256)) {
|
||||
std::cerr << "Failed to initialize SHA256" << std::endl;
|
||||
return "";
|
||||
}
|
||||
|
||||
try {
|
||||
// Iterate through all files in directory recursively
|
||||
// Collect all file paths and sort them for consistent hashing
|
||||
std::vector<std::filesystem::path> file_paths;
|
||||
for (const auto& entry : std::filesystem::recursive_directory_iterator(path)) {
|
||||
if (entry.is_regular_file()) {
|
||||
// Get file hash
|
||||
XXH64_hash_t file_hash = hash_file(entry.path().string());
|
||||
XXH64_update(state, &file_hash, sizeof(file_hash));
|
||||
file_paths.push_back(entry.path());
|
||||
}
|
||||
}
|
||||
|
||||
// Sort paths for deterministic hashing
|
||||
std::sort(file_paths.begin(), file_paths.end());
|
||||
|
||||
// Hash each file and combine
|
||||
for (const auto& file_path : file_paths) {
|
||||
// Get file hash
|
||||
std::string file_hash_str = hash_file(file_path.string());
|
||||
if (file_hash_str.empty()) {
|
||||
std::cerr << "Failed to hash file: " << file_path << std::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Update combined hash with file hash and path
|
||||
std::string relative_path = std::filesystem::relative(file_path, path).string();
|
||||
SHA256_Update(&sha256, relative_path.c_str(), relative_path.length());
|
||||
SHA256_Update(&sha256, file_hash_str.c_str(), file_hash_str.length());
|
||||
}
|
||||
} catch (const std::filesystem::filesystem_error& e) {
|
||||
std::cerr << "Filesystem error: " << e.what() << std::endl;
|
||||
XXH64_freeState(state);
|
||||
return 0;
|
||||
return "";
|
||||
}
|
||||
|
||||
// Get final hash
|
||||
XXH64_hash_t hash = XXH64_digest(state);
|
||||
XXH64_freeState(state);
|
||||
return hash;
|
||||
// Get final combined hash
|
||||
unsigned char hash[SHA256_DIGEST_LENGTH];
|
||||
if (!SHA256_Final(hash, &sha256)) {
|
||||
std::cerr << "Failed to finalize hash" << std::endl;
|
||||
return "";
|
||||
}
|
||||
|
||||
// Convert to hex string
|
||||
return to_hex_string(hash, SHA256_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
void hash_demo(const std::string & path)
|
||||
{
|
||||
void hash_demo(const std::string & path) {
|
||||
std::cout << "Hashing directory: " << path << std::endl;
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
XXH64_hash_t hash = hash_directory_recursive(path);
|
||||
std::string hash = hash_directory_recursive(path);
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
||||
std::cout << "Hash: " << hash << " (took " << duration.count() << "ms)" << std::endl;
|
||||
std::cout << "SHA-256 Hash: " << hash << " (took " << duration.count() << "ms)" << std::endl;
|
||||
}
|
||||
|
||||
int hash_demo_raw(const std::string & path)
|
||||
{
|
||||
int hash_demo_raw(const std::string & path) {
|
||||
if (!std::filesystem::exists(path)) {
|
||||
std::cout << 0 <<std::endl; return 1;
|
||||
std::cout << "0" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
XXH64_hash_t hash = hash_directory_recursive(path);
|
||||
std::string hash = hash_directory_recursive(path);
|
||||
std::cout << hash << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace simple_object_storage
|
||||
} // namespace simple_object_storage
|
@@ -2,13 +2,14 @@
|
||||
#define HASH_HPP
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
|
||||
namespace simple_object_storage {
|
||||
|
||||
uint64_t hash_file(const std::string &path);
|
||||
// Compute SHA-256 hash of a file, returns hex string
|
||||
std::string hash_file(const std::string &path);
|
||||
|
||||
uint64_t hash_directory_recursive(const std::string &path);
|
||||
// Compute combined SHA-256 hash of all files in directory recursively
|
||||
std::string hash_directory_recursive(const std::string &path);
|
||||
|
||||
void hash_demo(const std::string & path);
|
||||
|
||||
|
@@ -196,9 +196,9 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
|
||||
// Ensure the temporary file is removed even if errors occur
|
||||
ScopeFileDeleter temp_file_deleter(temp_path);
|
||||
|
||||
// Calculate hash
|
||||
uint64_t hash = hash_file(temp_path.string());
|
||||
if (hash == 0) {
|
||||
// Calculate SHA-256 hash
|
||||
std::string hash = hash_file(temp_path.string());
|
||||
if (hash.empty()) {
|
||||
resp->setStatusCode(drogon::k500InternalServerError);
|
||||
nlohmann::json response = {{"result", "error"}, {"error", "Failed to calculate hash"}};
|
||||
resp->setBody(response.dump());
|
||||
@@ -215,8 +215,8 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
|
||||
metadata["tgz_content_hash"] = get_hash_from_tgz(temp_path.string());
|
||||
}
|
||||
|
||||
// Move file to final location
|
||||
std::filesystem::path final_path = server_.config_.object_store_path / std::to_string(hash);
|
||||
// Move file to final location (using SHA-256 hash as filename)
|
||||
std::filesystem::path final_path = server_.config_.object_store_path / hash;
|
||||
|
||||
if (!std::filesystem::exists(final_path)) {
|
||||
try {
|
||||
@@ -235,7 +235,7 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
|
||||
|
||||
// Update database index
|
||||
dbEntry entry;
|
||||
entry.hash = std::to_string(hash);
|
||||
entry.hash = hash;
|
||||
entry.labeltags = metadata["labeltags"].get<std::vector<std::string>>();
|
||||
entry.metadata = metadata;
|
||||
|
||||
@@ -250,7 +250,7 @@ void PutHandler::handle_upload_object(const drogon::HttpRequestPtr& req, std::fu
|
||||
return;
|
||||
}
|
||||
|
||||
resp->setBody(nlohmann::json({{"result", "success"}, {"hash", std::to_string(hash)}}).dump());
|
||||
resp->setBody(nlohmann::json({{"result", "success"}, {"hash", hash}}).dump());
|
||||
resp->setContentTypeCode(drogon::CT_APPLICATION_JSON);
|
||||
callback(resp);
|
||||
}
|
||||
|
Reference in New Issue
Block a user