diff --git a/CMakeLists.txt b/CMakeLists.txt index 9cbd042..46c46d6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,19 +5,17 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) # Find required packages -find_package(Threads REQUIRED) find_package(SQLite3 REQUIRED) +find_package(ZLIB REQUIRED) # Find all source files in src directory file(GLOB_RECURSE SOURCES "src/*.cpp" - "hash.cpp" ) # Find all header files in src directory file(GLOB_RECURSE HEADERS "src/*.hpp" - "hash.hpp" ) # Add include directories @@ -31,8 +29,8 @@ add_executable(simple_object_storage ${SOURCES}) # Link libraries target_link_libraries(simple_object_storage - Threads::Threads SQLite::SQLite3 + ZLIB::ZLIB ) # Install target diff --git a/README.md b/README.md index 6e50cb0..0a335bf 100644 --- a/README.md +++ b/README.md @@ -10,13 +10,15 @@ Read access is public. Write access is controlled by tokens. - Objects are access via a label and tag, or via their hash. For example: - - `wget http://dtr.jde.nz/object/squashkiwi:latest` - - `wget http://dtr.jde.nz/object/4528400792837739857` + - `wget http://localhost:8123/object/squashkiwi:latest` + - `wget http://localhost:8123/object/4528400792837739857` - The hash is calculated using `uint64_t hash_file(const std::string &path);` in hash.hpp. - You can retrieve the hash for a given labvel and tag with, e.g.: - - `curl http://dtr.jde.nz/hash/squashkiwi:latest` + - `curl http://localhost:8123/hash/squashkiwi:latest` - you can get a full list of {label:tag,hash} entries (one tag per entry) with: - - `curl http://dtr.jde.nz/dir` + - `curl http://localhost:8123/dir` +- get all metadata for a tag: + - `curl http://localhost:8123/meta/squashkiwi:latest` - a simple welcome page is served at `/index.html` for those browsing to the site. - to upload a file (via http put) - `curl -T object_file http://dtr.jde.nz/WRITE_TOKEN/LABEL:TAG` diff --git a/src/compress.cpp b/src/compress.cpp new file mode 100644 index 0000000..1fb704a --- /dev/null +++ b/src/compress.cpp @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include +#include +#include // For error reporting in TempDirectory destructor + +#include "compress.hpp" +#include "hash.hpp" +#include "temp_directory.hpp" + + +namespace simple_object_storage { + +std::string decompress_gzip(const std::string& file_path) { + std::ifstream file(file_path, std::ios::binary); + if (!file) return {}; + + std::vector compressed((std::istreambuf_iterator(file)), std::istreambuf_iterator()); + if (compressed.size() < 2) return {}; + + // Skip gzip header (10 bytes) + size_t pos = 10; + if (compressed.size() <= pos) return {}; + + // Prepare zlib stream + z_stream strm = {}; + strm.next_in = reinterpret_cast(compressed.data() + pos); + strm.avail_in = compressed.size() - pos; + + if (inflateInit2(&strm, 16 + MAX_WBITS) != Z_OK) return {}; + + std::string out; + char buffer[4096]; + int ret; + do { + strm.next_out = reinterpret_cast(buffer); + strm.avail_out = sizeof(buffer); + ret = inflate(&strm, Z_NO_FLUSH); + if (ret == Z_STREAM_ERROR || ret == Z_DATA_ERROR || ret == Z_MEM_ERROR) { + inflateEnd(&strm); + return {}; + } + out.append(buffer, sizeof(buffer) - strm.avail_out); + } while (ret != Z_STREAM_END); + + inflateEnd(&strm); + return out; +} + +// if the file is a tgz file (we can't rely on the extension), then unpack on disk and has the contents +// with hash_directory_recursive in hash.hpp +uint64_t get_hash_from_tgz(const std::string &file_path) +{ + // check if it's a gzip file + std::ifstream file(file_path, std::ios::binary); + if (!file) return 0; + + char buffer[2]; + file.read(buffer, 2); + if (buffer[0] != 0x1F || buffer[1] != 0x8B) return 0; + + // gunzip the file to a new temporary directory + TempDirectory temp_dir_manager("tgz_unpack_"); // Creates dir and schedules cleanup + std::string temp_dir = temp_dir_manager.string(); // Get the path string to use + + std::string decompressed = decompress_gzip(file_path); + + // unpack the file on disk + std::string command = "tar -xzf " + file_path + " -C " + temp_dir; + int result = system(command.c_str()); // Basic tar extraction - requires 'tar' command + if (result != 0) { + std::cerr << "Error unpacking tgz file: " << file_path << std::endl; + return 0; + } + + // hash the contents + return hash_directory_recursive(temp_dir); +} + +} // namespace simple_object_storage \ No newline at end of file diff --git a/src/compress.hpp b/src/compress.hpp new file mode 100644 index 0000000..67fcc8f --- /dev/null +++ b/src/compress.hpp @@ -0,0 +1,11 @@ +#include +#include +#include + +namespace simple_object_storage { + +std::string decompress_gzip(const std::string& file_path); + +uint64_t get_hash_from_tgz(const std::string& file_path); + +} // namespace simple_object_storage \ No newline at end of file diff --git a/src/server.cpp b/src/server.cpp index 3e5367a..154181d 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -1,5 +1,3 @@ -#include "server.hpp" -#include "hash.hpp" #include #include #include @@ -11,6 +9,12 @@ #include // Include SQLite #include // For std::runtime_error + +#include "server.hpp" +#include "hash.hpp" +#include "compress.hpp" + + namespace simple_object_storage { // Simple RAII helper for file deletion @@ -46,36 +50,18 @@ void execute_sql(sqlite3* db, const char* sql, const std::string& error_msg_pref } bool Server::init_db() { - db_path_ = config_.object_store_path / "index.db"; - int rc = sqlite3_open(db_path_.c_str(), &db_); - if (rc != SQLITE_OK) { - std::cerr << "Failed to open/create SQLite database '" << db_path_ << "': " << sqlite3_errmsg(db_) << std::endl; - db_ = nullptr; // Ensure db_ is null if open failed - return false; - } - try { - // Enable WAL mode for better concurrency - execute_sql(db_, "PRAGMA journal_mode=WAL;", "Failed to set WAL mode"); - - // Create table if it doesn't exist - const char* create_table_sql = - "CREATE TABLE IF NOT EXISTS objects (" - "label_tag TEXT PRIMARY KEY UNIQUE NOT NULL, " - "hash TEXT NOT NULL);"; - execute_sql(db_, create_table_sql, "Failed to create objects table"); - + std::filesystem::path db_path = config_.object_store_path / "index.db"; + db_ = std::make_unique(db_path); + return true; } catch (const std::runtime_error& e) { std::cerr << "Database initialization error: " << e.what() << std::endl; - sqlite3_close(db_); - db_ = nullptr; return false; } - return true; } Server::Server(const ServerConfig& config) - : config_(config), running_(false), db_(nullptr) { + : config_(config), running_(false) { // Ensure object store directory exists try { std::filesystem::create_directories(config_.object_store_path); @@ -93,10 +79,6 @@ Server::Server(const ServerConfig& config) Server::~Server() { stop(); - if (db_) { - sqlite3_close(db_); // Close the database connection - db_ = nullptr; - } } bool Server::start() { @@ -151,9 +133,14 @@ void Server::setup_routes() { }); // Upload object - server_.Put("/([^/]+)/(.*)", [this](const httplib::Request& req, httplib::Response& res) { // Adjusted regex slightly for label:tag + server_.Put("/([^/]+)/(.*)", [this](const httplib::Request& req, httplib::Response& res) { handle_put_object(req, res); }); + + // Get metadata for label:tag + server_.Get("/meta/(.*)", [this](const httplib::Request& req, httplib::Response& res) { + handle_get_metadata(req, res); + }); } void Server::handle_get_object(const httplib::Request& req, httplib::Response& res) { @@ -170,51 +157,23 @@ void Server::handle_get_object(const httplib::Request& req, httplib::Response& r } if (!is_hash_lookup) { - // Lookup by label:tag in the SQLite database - sqlite3_stmt* stmt = nullptr; - const char* sql = "SELECT hash FROM objects WHERE label_tag = ?;"; - int rc = sqlite3_prepare_v2(db_, sql, -1, &stmt, nullptr); - - if (rc != SQLITE_OK) { - std::cerr << "Failed to prepare statement (get hash): " << sqlite3_errmsg(db_) << std::endl; - res.status = 500; - res.set_content("Database error preparing statement", "text/plain"); - return; - } - - sqlite3_bind_text(stmt, 1, key.c_str(), -1, SQLITE_STATIC); - - rc = sqlite3_step(stmt); - if (rc == SQLITE_ROW) { - const unsigned char* text = sqlite3_column_text(stmt, 0); - if (text) { - hash_str = reinterpret_cast(text); - } - } else if (rc == SQLITE_DONE) { - // Not found - sqlite3_finalize(stmt); + // Lookup by label:tag in the database + dbEntry entry; + if (!db_->get(key, entry)) { res.status = 404; res.set_content("Object not found (label:tag)", "text/plain"); return; - } else { - std::cerr << "Failed to execute statement (get hash): " << sqlite3_errmsg(db_) << std::endl; - sqlite3_finalize(stmt); - res.status = 500; - res.set_content("Database error executing statement", "text/plain"); - return; } - sqlite3_finalize(stmt); - + hash_str = entry.hash; } else { // Lookup directly by hash hash_str = key; } if (hash_str.empty()) { - // Should have been caught earlier if not found, but as a safeguard - res.status = 404; - res.set_content("Object hash could not be determined", "text/plain"); - return; + res.status = 404; + res.set_content("Object hash could not be determined", "text/plain"); + return; } // Construct the file path using the hash string @@ -233,68 +192,30 @@ void Server::handle_get_object(const httplib::Request& req, httplib::Response& r void Server::handle_get_hash(const httplib::Request& req, httplib::Response& res) { const auto& label_tag = req.matches[1].str(); - sqlite3_stmt* stmt = nullptr; - const char* sql = "SELECT hash FROM objects WHERE label_tag = ?;"; - int rc = sqlite3_prepare_v2(db_, sql, -1, &stmt, nullptr); - - if (rc != SQLITE_OK) { - std::cerr << "Failed to prepare statement (get hash direct): " << sqlite3_errmsg(db_) << std::endl; - res.status = 500; - res.set_content("Database error preparing statement", "text/plain"); + dbEntry entry; + if (!db_->get(label_tag, entry)) { + res.status = 404; + res.set_content("Label:tag not found", "text/plain"); return; } - sqlite3_bind_text(stmt, 1, label_tag.c_str(), -1, SQLITE_STATIC); - - rc = sqlite3_step(stmt); - if (rc == SQLITE_ROW) { - const unsigned char* text = sqlite3_column_text(stmt, 0); - if (text) { - res.set_content(reinterpret_cast(text), "text/plain"); - } - } else if (rc == SQLITE_DONE) { - res.status = 404; - res.set_content("Label:tag not found", "text/plain"); - } else { - std::cerr << "Failed to execute statement (get hash direct): " << sqlite3_errmsg(db_) << std::endl; - res.status = 500; - res.set_content("Database error executing statement", "text/plain"); - } - sqlite3_finalize(stmt); + res.set_content(entry.hash, "text/plain"); } void Server::handle_get_directory(const httplib::Request& /*req*/, httplib::Response& res) { std::stringstream ss; - sqlite3_stmt* stmt = nullptr; - const char* sql = "SELECT label_tag, hash FROM objects;"; - int rc = sqlite3_prepare_v2(db_, sql, -1, &stmt, nullptr); - - if (rc != SQLITE_OK) { - std::cerr << "Failed to prepare statement (get dir): " << sqlite3_errmsg(db_) << std::endl; + std::vector entries; + + if (!db_->list(entries)) { res.status = 500; - res.set_content("Database error preparing statement", "text/plain"); + res.set_content("Database error retrieving directory", "text/plain"); return; } - while ((rc = sqlite3_step(stmt)) == SQLITE_ROW) { - const unsigned char* label_tag_text = sqlite3_column_text(stmt, 0); - const unsigned char* hash_text = sqlite3_column_text(stmt, 1); - if (label_tag_text && hash_text) { - ss << reinterpret_cast(label_tag_text) << "," - << reinterpret_cast(hash_text) << "\n"; - } + for (const auto& entry : entries) { + ss << entry.label_tag << "," << entry.hash << "\n"; } - if (rc != SQLITE_DONE) { - std::cerr << "Failed to execute/iterate statement (get dir): " << sqlite3_errmsg(db_) << std::endl; - // Don't overwrite potential results, but log error - if (ss.str().empty()) { // Only send error if no data was retrieved - res.status = 500; - res.set_content("Database error executing statement", "text/plain"); - } - } - - sqlite3_finalize(stmt); res.set_content(ss.str(), "text/plain"); } @@ -345,6 +266,8 @@ void Server::handle_put_object(const httplib::Request& req, httplib::Response& r return; } + nlohmann::json metadata = get_metadata(temp_path.string()); + // Move file to final location std::string hash_str = std::to_string(hash); std::filesystem::path final_path = config_.object_store_path / hash_str; @@ -360,27 +283,13 @@ void Server::handle_put_object(const httplib::Request& req, httplib::Response& r } } - // Update SQLite index (INSERT OR REPLACE) - sqlite3_stmt* stmt = nullptr; - const char* sql = "INSERT OR REPLACE INTO objects (label_tag, hash) VALUES (?, ?);"; - int rc = sqlite3_prepare_v2(db_, sql, -1, &stmt, nullptr); - if (rc != SQLITE_OK) { - std::cerr << "Failed to prepare statement (put object): " << sqlite3_errmsg(db_) << std::endl; - res.status = 500; - res.set_content("Database error preparing statement", "text/plain"); - // Attempt to clean up the moved file if index fails - try { if (std::filesystem::exists(final_path)) std::filesystem::remove(final_path); } catch(...) {}; - return; - } + // Update database index + dbEntry entry; + entry.label_tag = label_tag; + entry.hash = hash_str; + entry.metadata = nlohmann::json::object(); // Empty metadata for now - sqlite3_bind_text(stmt, 1, label_tag.c_str(), -1, SQLITE_STATIC); - sqlite3_bind_text(stmt, 2, hash_str.c_str(), -1, SQLITE_STATIC); - - rc = sqlite3_step(stmt); - sqlite3_finalize(stmt); - - if (rc != SQLITE_DONE) { - std::cerr << "Failed to execute statement (put object): " << sqlite3_errmsg(db_) << std::endl; + if (!db_->insert(entry)) { res.status = 500; res.set_content("Failed to update database index", "text/plain"); // Attempt to clean up the moved file if index fails @@ -391,6 +300,25 @@ void Server::handle_put_object(const httplib::Request& req, httplib::Response& r res.set_content(hash_str, "text/plain"); } +void Server::handle_get_metadata(const httplib::Request& req, httplib::Response& res) { + const auto& label_tag = req.matches[1].str(); + + dbEntry entry; + if (!db_->get(label_tag, entry)) { + res.status = 404; + res.set_content("Metadata not found for label:tag: " + label_tag, "text/plain"); + return; + } + + try { + res.set_content(entry.metadata.dump(), "application/json"); + } catch (const nlohmann::json::exception& e) { + std::cerr << "Error serializing metadata for " << label_tag << ": " << e.what() << std::endl; + res.status = 500; + res.set_content("Internal server error: Failed to serialize metadata", "text/plain"); + } +} + bool Server::validate_write_token(const std::string& token) const { return std::find(config_.write_tokens.begin(), config_.write_tokens.end(), token) != config_.write_tokens.end(); } @@ -403,4 +331,24 @@ std::pair Server::parse_label_tag(const std::string& l return {label_tag.substr(0, colon_pos), label_tag.substr(colon_pos + 1)}; } +nlohmann::json Server::get_metadata(const std::string &file_path) const +{ + nlohmann::json metadata; + + // get the file size + metadata["file_size"] = std::filesystem::file_size(file_path); + + // get the file modification time + auto ftime = std::filesystem::last_write_time(file_path); + auto sctp = std::chrono::time_point_cast( + ftime - std::filesystem::file_time_type::clock::now() + + std::chrono::system_clock::now() + ); + metadata["file_modification_time"] = std::chrono::system_clock::to_time_t(sctp); + + metadata["tgz_content_hash"] = get_hash_from_tgz(file_path); + + return metadata; +} + } // namespace simple_object_storage \ No newline at end of file diff --git a/src/server.hpp b/src/server.hpp index cede583..e68b0fe 100644 --- a/src/server.hpp +++ b/src/server.hpp @@ -3,13 +3,13 @@ #include "config.hpp" #include "httplib.hpp" -// #include "litecask.hpp" // Removed litecask +#include "database.hpp" #include #include #include #include #include -#include // Include SQLite header +#include namespace simple_object_storage { @@ -25,20 +25,19 @@ private: void setup_routes(); void handle_get_object(const httplib::Request& req, httplib::Response& res); void handle_get_hash(const httplib::Request& req, httplib::Response& res); - void handle_get_directory(const httplib::Request& req, httplib::Response& res); // Re-add directory handler + void handle_get_directory(const httplib::Request& req, httplib::Response& res); void handle_put_object(const httplib::Request& req, httplib::Response& res); + void handle_get_metadata(const httplib::Request& req, httplib::Response& res); bool validate_write_token(const std::string& token) const; std::pair parse_label_tag(const std::string& label_tag) const; + nlohmann::json get_metadata(const std::string& file_path) const; - bool init_db(); // Helper for DB initialization + bool init_db(); const ServerConfig& config_; httplib::Server server_; - // Removed litecask members - sqlite3* db_ = nullptr; // SQLite database connection - std::filesystem::path db_path_; + std::unique_ptr db_; std::atomic running_; - // Removed _isInitialized - will rely on db_ pointer }; } // namespace simple_object_storage diff --git a/src/temp_directory.cpp b/src/temp_directory.cpp new file mode 100644 index 0000000..7c1465d --- /dev/null +++ b/src/temp_directory.cpp @@ -0,0 +1,63 @@ +#include "temp_directory.hpp" + +#include +#include +#include +#include +#include +#include // For error reporting in destructor + +namespace simple_object_storage { + +TempDirectory::TempDirectory(const std::string& prefix) { + auto temp_dir_base = std::filesystem::temp_directory_path(); + std::mt19937_64 rng(std::chrono::high_resolution_clock::now().time_since_epoch().count()); + std::uniform_int_distribution dist; + + int retries = 5; // Avoid infinite loop in edge cases + while (retries-- > 0) { + std::string random_suffix = std::to_string(dist(rng)); + path_ = temp_dir_base / (prefix + random_suffix); + if (!std::filesystem::exists(path_)) { + break; // Found a unique path + } + } + + if (std::filesystem::exists(path_)) { + throw std::runtime_error("Failed to find unique temporary directory path after multiple retries."); + } + + try { + if (!std::filesystem::create_directory(path_)) { + throw std::runtime_error("Failed to create temporary directory: " + path_.string()); + } + } catch (const std::filesystem::filesystem_error& e) { + throw std::runtime_error("Filesystem error creating temporary directory: " + path_.string() + " - " + e.what()); + } +} + +TempDirectory::~TempDirectory() { + try { + if (std::filesystem::exists(path_)) { + std::error_code ec; // Use error code to avoid exceptions in destructor + std::filesystem::remove_all(path_, ec); + if (ec) { + std::cerr << "Error removing temporary directory " << path_.string() << ": " << ec.message() << std::endl; + } + } + } catch (const std::exception& e) { // Catch potential exceptions from exists() though unlikely + std::cerr << "Error during temporary directory cleanup for " << path_.string() << ": " << e.what() << std::endl; + } catch (...) { + std::cerr << "Unknown error during temporary directory cleanup for " << path_.string() << std::endl; + } +} + +const std::filesystem::path& TempDirectory::path() const { + return path_; +} + +std::string TempDirectory::string() const { + return path_.string(); +} + +} // namespace simple_object_storage \ No newline at end of file diff --git a/src/temp_directory.hpp b/src/temp_directory.hpp new file mode 100644 index 0000000..0487472 --- /dev/null +++ b/src/temp_directory.hpp @@ -0,0 +1,30 @@ +#ifndef TEMP_DIRECTORY_HPP +#define TEMP_DIRECTORY_HPP + +#include +#include + +namespace simple_object_storage { + +// RAII helper for temporary directory cleanup +class TempDirectory { +public: + TempDirectory(const std::string& prefix = "temp_"); + ~TempDirectory(); + + // Disable copy/move semantics for simplicity + TempDirectory(const TempDirectory&) = delete; + TempDirectory& operator=(const TempDirectory&) = delete; + TempDirectory(TempDirectory&&) = delete; + TempDirectory& operator=(TempDirectory&&) = delete; + + const std::filesystem::path& path() const; + std::string string() const; + +private: + std::filesystem::path path_; +}; + +} // namespace simple_object_storage + +#endif // TEMP_DIRECTORY_HPP \ No newline at end of file