Add metadata!

This commit is contained in:
Your Name
2025-05-03 10:14:16 +12:00
parent 16754a48d4
commit 24a4c66c13
8 changed files with 283 additions and 149 deletions

View File

@@ -5,19 +5,17 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Find required packages
find_package(Threads REQUIRED)
find_package(SQLite3 REQUIRED)
find_package(ZLIB REQUIRED)
# Find all source files in src directory
file(GLOB_RECURSE SOURCES
"src/*.cpp"
"hash.cpp"
)
# Find all header files in src directory
file(GLOB_RECURSE HEADERS
"src/*.hpp"
"hash.hpp"
)
# Add include directories
@@ -31,8 +29,8 @@ add_executable(simple_object_storage ${SOURCES})
# Link libraries
target_link_libraries(simple_object_storage
Threads::Threads
SQLite::SQLite3
ZLIB::ZLIB
)
# Install target

View File

@@ -10,13 +10,15 @@ Read access is public.
Write access is controlled by tokens.
- Objects are access via a label and tag, or via their hash. For example:
- `wget http://dtr.jde.nz/object/squashkiwi:latest`
- `wget http://dtr.jde.nz/object/4528400792837739857`
- `wget http://localhost:8123/object/squashkiwi:latest`
- `wget http://localhost:8123/object/4528400792837739857`
- The hash is calculated using `uint64_t hash_file(const std::string &path);` in hash.hpp.
- You can retrieve the hash for a given labvel and tag with, e.g.:
- `curl http://dtr.jde.nz/hash/squashkiwi:latest`
- `curl http://localhost:8123/hash/squashkiwi:latest`
- you can get a full list of {label:tag,hash} entries (one tag per entry) with:
- `curl http://dtr.jde.nz/dir`
- `curl http://localhost:8123/dir`
- get all metadata for a tag:
- `curl http://localhost:8123/meta/squashkiwi:latest`
- a simple welcome page is served at `/index.html` for those browsing to the site.
- to upload a file (via http put)
- `curl -T object_file http://dtr.jde.nz/WRITE_TOKEN/LABEL:TAG`

83
src/compress.cpp Normal file
View File

@@ -0,0 +1,83 @@
#include <fstream>
#include <vector>
#include <filesystem>
#include <string>
#include <random>
#include <chrono>
#include <stdexcept>
#include <iostream> // For error reporting in TempDirectory destructor
#include "compress.hpp"
#include "hash.hpp"
#include "temp_directory.hpp"
namespace simple_object_storage {
std::string decompress_gzip(const std::string& file_path) {
std::ifstream file(file_path, std::ios::binary);
if (!file) return {};
std::vector<char> compressed((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
if (compressed.size() < 2) return {};
// Skip gzip header (10 bytes)
size_t pos = 10;
if (compressed.size() <= pos) return {};
// Prepare zlib stream
z_stream strm = {};
strm.next_in = reinterpret_cast<Bytef*>(compressed.data() + pos);
strm.avail_in = compressed.size() - pos;
if (inflateInit2(&strm, 16 + MAX_WBITS) != Z_OK) return {};
std::string out;
char buffer[4096];
int ret;
do {
strm.next_out = reinterpret_cast<Bytef*>(buffer);
strm.avail_out = sizeof(buffer);
ret = inflate(&strm, Z_NO_FLUSH);
if (ret == Z_STREAM_ERROR || ret == Z_DATA_ERROR || ret == Z_MEM_ERROR) {
inflateEnd(&strm);
return {};
}
out.append(buffer, sizeof(buffer) - strm.avail_out);
} while (ret != Z_STREAM_END);
inflateEnd(&strm);
return out;
}
// if the file is a tgz file (we can't rely on the extension), then unpack on disk and has the contents
// with hash_directory_recursive in hash.hpp
uint64_t get_hash_from_tgz(const std::string &file_path)
{
// check if it's a gzip file
std::ifstream file(file_path, std::ios::binary);
if (!file) return 0;
char buffer[2];
file.read(buffer, 2);
if (buffer[0] != 0x1F || buffer[1] != 0x8B) return 0;
// gunzip the file to a new temporary directory
TempDirectory temp_dir_manager("tgz_unpack_"); // Creates dir and schedules cleanup
std::string temp_dir = temp_dir_manager.string(); // Get the path string to use
std::string decompressed = decompress_gzip(file_path);
// unpack the file on disk
std::string command = "tar -xzf " + file_path + " -C " + temp_dir;
int result = system(command.c_str()); // Basic tar extraction - requires 'tar' command
if (result != 0) {
std::cerr << "Error unpacking tgz file: " << file_path << std::endl;
return 0;
}
// hash the contents
return hash_directory_recursive(temp_dir);
}
} // namespace simple_object_storage

11
src/compress.hpp Normal file
View File

@@ -0,0 +1,11 @@
#include <zlib.h>
#include <string>
#include <cstdint>
namespace simple_object_storage {
std::string decompress_gzip(const std::string& file_path);
uint64_t get_hash_from_tgz(const std::string& file_path);
} // namespace simple_object_storage

View File

@@ -1,5 +1,3 @@
#include "server.hpp"
#include "hash.hpp"
#include <filesystem>
#include <iostream>
#include <sstream>
@@ -11,6 +9,12 @@
#include <sqlite3.h> // Include SQLite
#include <stdexcept> // For std::runtime_error
#include "server.hpp"
#include "hash.hpp"
#include "compress.hpp"
namespace simple_object_storage {
// Simple RAII helper for file deletion
@@ -46,36 +50,18 @@ void execute_sql(sqlite3* db, const char* sql, const std::string& error_msg_pref
}
bool Server::init_db() {
db_path_ = config_.object_store_path / "index.db";
int rc = sqlite3_open(db_path_.c_str(), &db_);
if (rc != SQLITE_OK) {
std::cerr << "Failed to open/create SQLite database '" << db_path_ << "': " << sqlite3_errmsg(db_) << std::endl;
db_ = nullptr; // Ensure db_ is null if open failed
return false;
}
try {
// Enable WAL mode for better concurrency
execute_sql(db_, "PRAGMA journal_mode=WAL;", "Failed to set WAL mode");
// Create table if it doesn't exist
const char* create_table_sql =
"CREATE TABLE IF NOT EXISTS objects ("
"label_tag TEXT PRIMARY KEY UNIQUE NOT NULL, "
"hash TEXT NOT NULL);";
execute_sql(db_, create_table_sql, "Failed to create objects table");
std::filesystem::path db_path = config_.object_store_path / "index.db";
db_ = std::make_unique<Database>(db_path);
return true;
} catch (const std::runtime_error& e) {
std::cerr << "Database initialization error: " << e.what() << std::endl;
sqlite3_close(db_);
db_ = nullptr;
return false;
}
return true;
}
Server::Server(const ServerConfig& config)
: config_(config), running_(false), db_(nullptr) {
: config_(config), running_(false) {
// Ensure object store directory exists
try {
std::filesystem::create_directories(config_.object_store_path);
@@ -93,10 +79,6 @@ Server::Server(const ServerConfig& config)
Server::~Server() {
stop();
if (db_) {
sqlite3_close(db_); // Close the database connection
db_ = nullptr;
}
}
bool Server::start() {
@@ -151,9 +133,14 @@ void Server::setup_routes() {
});
// Upload object
server_.Put("/([^/]+)/(.*)", [this](const httplib::Request& req, httplib::Response& res) { // Adjusted regex slightly for label:tag
server_.Put("/([^/]+)/(.*)", [this](const httplib::Request& req, httplib::Response& res) {
handle_put_object(req, res);
});
// Get metadata for label:tag
server_.Get("/meta/(.*)", [this](const httplib::Request& req, httplib::Response& res) {
handle_get_metadata(req, res);
});
}
void Server::handle_get_object(const httplib::Request& req, httplib::Response& res) {
@@ -170,51 +157,23 @@ void Server::handle_get_object(const httplib::Request& req, httplib::Response& r
}
if (!is_hash_lookup) {
// Lookup by label:tag in the SQLite database
sqlite3_stmt* stmt = nullptr;
const char* sql = "SELECT hash FROM objects WHERE label_tag = ?;";
int rc = sqlite3_prepare_v2(db_, sql, -1, &stmt, nullptr);
if (rc != SQLITE_OK) {
std::cerr << "Failed to prepare statement (get hash): " << sqlite3_errmsg(db_) << std::endl;
res.status = 500;
res.set_content("Database error preparing statement", "text/plain");
return;
}
sqlite3_bind_text(stmt, 1, key.c_str(), -1, SQLITE_STATIC);
rc = sqlite3_step(stmt);
if (rc == SQLITE_ROW) {
const unsigned char* text = sqlite3_column_text(stmt, 0);
if (text) {
hash_str = reinterpret_cast<const char*>(text);
}
} else if (rc == SQLITE_DONE) {
// Not found
sqlite3_finalize(stmt);
// Lookup by label:tag in the database
dbEntry entry;
if (!db_->get(key, entry)) {
res.status = 404;
res.set_content("Object not found (label:tag)", "text/plain");
return;
} else {
std::cerr << "Failed to execute statement (get hash): " << sqlite3_errmsg(db_) << std::endl;
sqlite3_finalize(stmt);
res.status = 500;
res.set_content("Database error executing statement", "text/plain");
return;
}
sqlite3_finalize(stmt);
hash_str = entry.hash;
} else {
// Lookup directly by hash
hash_str = key;
}
if (hash_str.empty()) {
// Should have been caught earlier if not found, but as a safeguard
res.status = 404;
res.set_content("Object hash could not be determined", "text/plain");
return;
res.status = 404;
res.set_content("Object hash could not be determined", "text/plain");
return;
}
// Construct the file path using the hash string
@@ -233,68 +192,30 @@ void Server::handle_get_object(const httplib::Request& req, httplib::Response& r
void Server::handle_get_hash(const httplib::Request& req, httplib::Response& res) {
const auto& label_tag = req.matches[1].str();
sqlite3_stmt* stmt = nullptr;
const char* sql = "SELECT hash FROM objects WHERE label_tag = ?;";
int rc = sqlite3_prepare_v2(db_, sql, -1, &stmt, nullptr);
if (rc != SQLITE_OK) {
std::cerr << "Failed to prepare statement (get hash direct): " << sqlite3_errmsg(db_) << std::endl;
res.status = 500;
res.set_content("Database error preparing statement", "text/plain");
dbEntry entry;
if (!db_->get(label_tag, entry)) {
res.status = 404;
res.set_content("Label:tag not found", "text/plain");
return;
}
sqlite3_bind_text(stmt, 1, label_tag.c_str(), -1, SQLITE_STATIC);
rc = sqlite3_step(stmt);
if (rc == SQLITE_ROW) {
const unsigned char* text = sqlite3_column_text(stmt, 0);
if (text) {
res.set_content(reinterpret_cast<const char*>(text), "text/plain");
}
} else if (rc == SQLITE_DONE) {
res.status = 404;
res.set_content("Label:tag not found", "text/plain");
} else {
std::cerr << "Failed to execute statement (get hash direct): " << sqlite3_errmsg(db_) << std::endl;
res.status = 500;
res.set_content("Database error executing statement", "text/plain");
}
sqlite3_finalize(stmt);
res.set_content(entry.hash, "text/plain");
}
void Server::handle_get_directory(const httplib::Request& /*req*/, httplib::Response& res) {
std::stringstream ss;
sqlite3_stmt* stmt = nullptr;
const char* sql = "SELECT label_tag, hash FROM objects;";
int rc = sqlite3_prepare_v2(db_, sql, -1, &stmt, nullptr);
if (rc != SQLITE_OK) {
std::cerr << "Failed to prepare statement (get dir): " << sqlite3_errmsg(db_) << std::endl;
std::vector<dbEntry> entries;
if (!db_->list(entries)) {
res.status = 500;
res.set_content("Database error preparing statement", "text/plain");
res.set_content("Database error retrieving directory", "text/plain");
return;
}
while ((rc = sqlite3_step(stmt)) == SQLITE_ROW) {
const unsigned char* label_tag_text = sqlite3_column_text(stmt, 0);
const unsigned char* hash_text = sqlite3_column_text(stmt, 1);
if (label_tag_text && hash_text) {
ss << reinterpret_cast<const char*>(label_tag_text) << ","
<< reinterpret_cast<const char*>(hash_text) << "\n";
}
for (const auto& entry : entries) {
ss << entry.label_tag << "," << entry.hash << "\n";
}
if (rc != SQLITE_DONE) {
std::cerr << "Failed to execute/iterate statement (get dir): " << sqlite3_errmsg(db_) << std::endl;
// Don't overwrite potential results, but log error
if (ss.str().empty()) { // Only send error if no data was retrieved
res.status = 500;
res.set_content("Database error executing statement", "text/plain");
}
}
sqlite3_finalize(stmt);
res.set_content(ss.str(), "text/plain");
}
@@ -345,6 +266,8 @@ void Server::handle_put_object(const httplib::Request& req, httplib::Response& r
return;
}
nlohmann::json metadata = get_metadata(temp_path.string());
// Move file to final location
std::string hash_str = std::to_string(hash);
std::filesystem::path final_path = config_.object_store_path / hash_str;
@@ -360,27 +283,13 @@ void Server::handle_put_object(const httplib::Request& req, httplib::Response& r
}
}
// Update SQLite index (INSERT OR REPLACE)
sqlite3_stmt* stmt = nullptr;
const char* sql = "INSERT OR REPLACE INTO objects (label_tag, hash) VALUES (?, ?);";
int rc = sqlite3_prepare_v2(db_, sql, -1, &stmt, nullptr);
if (rc != SQLITE_OK) {
std::cerr << "Failed to prepare statement (put object): " << sqlite3_errmsg(db_) << std::endl;
res.status = 500;
res.set_content("Database error preparing statement", "text/plain");
// Attempt to clean up the moved file if index fails
try { if (std::filesystem::exists(final_path)) std::filesystem::remove(final_path); } catch(...) {};
return;
}
// Update database index
dbEntry entry;
entry.label_tag = label_tag;
entry.hash = hash_str;
entry.metadata = nlohmann::json::object(); // Empty metadata for now
sqlite3_bind_text(stmt, 1, label_tag.c_str(), -1, SQLITE_STATIC);
sqlite3_bind_text(stmt, 2, hash_str.c_str(), -1, SQLITE_STATIC);
rc = sqlite3_step(stmt);
sqlite3_finalize(stmt);
if (rc != SQLITE_DONE) {
std::cerr << "Failed to execute statement (put object): " << sqlite3_errmsg(db_) << std::endl;
if (!db_->insert(entry)) {
res.status = 500;
res.set_content("Failed to update database index", "text/plain");
// Attempt to clean up the moved file if index fails
@@ -391,6 +300,25 @@ void Server::handle_put_object(const httplib::Request& req, httplib::Response& r
res.set_content(hash_str, "text/plain");
}
void Server::handle_get_metadata(const httplib::Request& req, httplib::Response& res) {
const auto& label_tag = req.matches[1].str();
dbEntry entry;
if (!db_->get(label_tag, entry)) {
res.status = 404;
res.set_content("Metadata not found for label:tag: " + label_tag, "text/plain");
return;
}
try {
res.set_content(entry.metadata.dump(), "application/json");
} catch (const nlohmann::json::exception& e) {
std::cerr << "Error serializing metadata for " << label_tag << ": " << e.what() << std::endl;
res.status = 500;
res.set_content("Internal server error: Failed to serialize metadata", "text/plain");
}
}
bool Server::validate_write_token(const std::string& token) const {
return std::find(config_.write_tokens.begin(), config_.write_tokens.end(), token) != config_.write_tokens.end();
}
@@ -403,4 +331,24 @@ std::pair<std::string, std::string> Server::parse_label_tag(const std::string& l
return {label_tag.substr(0, colon_pos), label_tag.substr(colon_pos + 1)};
}
nlohmann::json Server::get_metadata(const std::string &file_path) const
{
nlohmann::json metadata;
// get the file size
metadata["file_size"] = std::filesystem::file_size(file_path);
// get the file modification time
auto ftime = std::filesystem::last_write_time(file_path);
auto sctp = std::chrono::time_point_cast<std::chrono::system_clock::duration>(
ftime - std::filesystem::file_time_type::clock::now()
+ std::chrono::system_clock::now()
);
metadata["file_modification_time"] = std::chrono::system_clock::to_time_t(sctp);
metadata["tgz_content_hash"] = get_hash_from_tgz(file_path);
return metadata;
}
} // namespace simple_object_storage

View File

@@ -3,13 +3,13 @@
#include "config.hpp"
#include "httplib.hpp"
// #include "litecask.hpp" // Removed litecask
#include "database.hpp"
#include <string>
#include <memory>
#include <thread>
#include <atomic>
#include <filesystem>
#include <sqlite3.h> // Include SQLite header
#include <json.hpp>
namespace simple_object_storage {
@@ -25,20 +25,19 @@ private:
void setup_routes();
void handle_get_object(const httplib::Request& req, httplib::Response& res);
void handle_get_hash(const httplib::Request& req, httplib::Response& res);
void handle_get_directory(const httplib::Request& req, httplib::Response& res); // Re-add directory handler
void handle_get_directory(const httplib::Request& req, httplib::Response& res);
void handle_put_object(const httplib::Request& req, httplib::Response& res);
void handle_get_metadata(const httplib::Request& req, httplib::Response& res);
bool validate_write_token(const std::string& token) const;
std::pair<std::string, std::string> parse_label_tag(const std::string& label_tag) const;
nlohmann::json get_metadata(const std::string& file_path) const;
bool init_db(); // Helper for DB initialization
bool init_db();
const ServerConfig& config_;
httplib::Server server_;
// Removed litecask members
sqlite3* db_ = nullptr; // SQLite database connection
std::filesystem::path db_path_;
std::unique_ptr<Database> db_;
std::atomic<bool> running_;
// Removed _isInitialized - will rely on db_ pointer
};
} // namespace simple_object_storage

63
src/temp_directory.cpp Normal file
View File

@@ -0,0 +1,63 @@
#include "temp_directory.hpp"
#include <filesystem>
#include <string>
#include <random>
#include <chrono>
#include <stdexcept>
#include <iostream> // For error reporting in destructor
namespace simple_object_storage {
TempDirectory::TempDirectory(const std::string& prefix) {
auto temp_dir_base = std::filesystem::temp_directory_path();
std::mt19937_64 rng(std::chrono::high_resolution_clock::now().time_since_epoch().count());
std::uniform_int_distribution<uint64_t> dist;
int retries = 5; // Avoid infinite loop in edge cases
while (retries-- > 0) {
std::string random_suffix = std::to_string(dist(rng));
path_ = temp_dir_base / (prefix + random_suffix);
if (!std::filesystem::exists(path_)) {
break; // Found a unique path
}
}
if (std::filesystem::exists(path_)) {
throw std::runtime_error("Failed to find unique temporary directory path after multiple retries.");
}
try {
if (!std::filesystem::create_directory(path_)) {
throw std::runtime_error("Failed to create temporary directory: " + path_.string());
}
} catch (const std::filesystem::filesystem_error& e) {
throw std::runtime_error("Filesystem error creating temporary directory: " + path_.string() + " - " + e.what());
}
}
TempDirectory::~TempDirectory() {
try {
if (std::filesystem::exists(path_)) {
std::error_code ec; // Use error code to avoid exceptions in destructor
std::filesystem::remove_all(path_, ec);
if (ec) {
std::cerr << "Error removing temporary directory " << path_.string() << ": " << ec.message() << std::endl;
}
}
} catch (const std::exception& e) { // Catch potential exceptions from exists() though unlikely
std::cerr << "Error during temporary directory cleanup for " << path_.string() << ": " << e.what() << std::endl;
} catch (...) {
std::cerr << "Unknown error during temporary directory cleanup for " << path_.string() << std::endl;
}
}
const std::filesystem::path& TempDirectory::path() const {
return path_;
}
std::string TempDirectory::string() const {
return path_.string();
}
} // namespace simple_object_storage

30
src/temp_directory.hpp Normal file
View File

@@ -0,0 +1,30 @@
#ifndef TEMP_DIRECTORY_HPP
#define TEMP_DIRECTORY_HPP
#include <filesystem>
#include <string>
namespace simple_object_storage {
// RAII helper for temporary directory cleanup
class TempDirectory {
public:
TempDirectory(const std::string& prefix = "temp_");
~TempDirectory();
// Disable copy/move semantics for simplicity
TempDirectory(const TempDirectory&) = delete;
TempDirectory& operator=(const TempDirectory&) = delete;
TempDirectory(TempDirectory&&) = delete;
TempDirectory& operator=(TempDirectory&&) = delete;
const std::filesystem::path& path() const;
std::string string() const;
private:
std::filesystem::path path_;
};
} // namespace simple_object_storage
#endif // TEMP_DIRECTORY_HPP