From 477d06d3bffb27b72fec2d90b2d56116ca333358 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 25 May 2025 12:32:06 +1200 Subject: [PATCH] Bug fixing --- README.md | 193 ++++++++++++++++++++++++---------------- src/database.cpp | 191 +++++++++++++++++++++++---------------- src/database.hpp | 14 +-- src/put_handler.cpp | 33 ++----- test.sh | 35 +++----- test_1GB_file_upload.sh | 2 +- 6 files changed, 261 insertions(+), 207 deletions(-) diff --git a/README.md b/README.md index abccf61..6bb1ef7 100644 --- a/README.md +++ b/README.md @@ -1,95 +1,138 @@ # Simple Object Storage -## Introduction +A simple object storage system that stores files with metadata and provides a REST API for access. -Simple Object Storage is a very simple C++ webserver -which provides a store of tagged binary objects (the objects can be large), -which are available over http. +## Features -Read access is public. -Write access is controlled by tokens. +- Store files with metadata (labels, tags, and custom fields) +- Retrieve files by hash or label:tag combination +- Check if a file exists by hash or label:tag +- Delete files by hash +- List all stored objects +- Automatic file deduplication using content hashing +- Support for large file uploads +- Configurable storage location and server settings +- Token-based authentication for write operations -Public read actions: +## Building -### Retrieve Object -- Objects are accessed via a label and tag, or via their hash. For example: - - `wget http://localhost:8123/object/squashkiwi:latest` - - `wget http://localhost:8123/object/4528400792837739857` - -### Check Object Existence -- Check if an object exists by label:tag or hash: - - `curl http://localhost:8123/exists/squashkiwi:latest` - - `curl http://localhost:8123/exists/4528400792837739857` - -### Retrieve Hash -- Get the hash for a given label and tag: - - `curl http://localhost:8123/hash/squashkiwi:latest` - - Response format: `{"result":"success","hash":"4528400792837739857"}` - -### List Store Contents -- Get a full list of {label:tag,hash} entries: - - `curl http://localhost:8123/dir` - - Response format: `{"result":"success","entries":[{"label_tag":"example:latest","hash":"4528400792837739857"}]}` - -### Retrieve Metadata -- Get all metadata for a tag: - - `curl http://localhost:8123/meta/squashkiwi:latest` - - `curl http://localhost:8123/meta/4528400792837739857` - - Response format: `{"result":"success","metadata":{"description":"Example file","tags":["test","example"],"custom_field":"custom value"}}` - -### Service Status Check -- Quick status check: - - `curl http://localhost:8123/status` - - Response format: `{"result":"success","status":"ok"}` - -## Write actions (require authentication): - -### Upload Object -- Upload a file with metadata (via HTTP PUT): ```bash -curl -X PUT \ - -H "Authorization: Bearer YOUR_TOKEN" \ - -F "file=@/path/to/your/file.txt" \ - -F 'metadata={"label":"example","tags":["latest","test","example"],"description":"Example file","custom_field":"custom value"}' \ - "http://localhost:8123/upload" +mkdir build +cd build +cmake .. +make ``` - - The object file is uploaded, hashed, added to the registry (if that hash doesn't already exist), and {label:tag,hash} entries are added to the directory index. - - Matching tags on older versions are removed. - - Response format: `{"result":"success","hash":"4528400792837739857"}` - -### Delete Object -- Delete an object and all its tags: - - `curl -H "Authorization: Bearer YOUR_TOKEN" http://localhost:8123/deleteobject?hash=4528400792837739857` - - Response format: `{"result":"success"}` ## Configuration -- The server is configured via `~/.config/simple_object_storage/config.json` which allows setting: - - `write_tokens`: List of valid write access tokens - - `object_store_path`: Location for the object store (path on disk) - - `host`: Server host (default: "0.0.0.0") - - `port`: Server port (default: 8123) -Example config.json: +The server can be configured by creating a JSON configuration file at `~/.config/simple_object_storage/config.json`. Here's an example configuration: + ```json { - "write_tokens": ["your-secret-token-1", "your-secret-token-2"], - "object_store_path": "/data/storage", - "host": "0.0.0.0", - "port": 8123 + "host": "localhost", + "port": 8080, + "storage_path": "/path/to/storage", + "write_tokens": ["your-secret-token"] } ``` -## Signal Handling +## API Endpoints -The server handles the following signals: +### Upload a File -- `SIGTERM`/`SIGINT`: Gracefully shuts down the server when received (e.g. from Ctrl+C or system shutdown) -- `SIGHUP`: Reloads the server configuration without restarting the service +``` +PUT /upload +``` -The server ensures proper cleanup of resources during shutdown, including: -- Closing all database connections -- Stopping the HTTP server -- Cleaning up any open file handles -- Properly terminating worker threads +Parameters: +- `file`: The file to upload +- `metadata`: JSON object containing: + - `labels`: Array of strings (required) + - `tags`: Array of strings (required) + - Additional custom fields (optional) -Dockcross is used to cross-build for both 64-bit x86 and arm64 (combining both into one docker container image). +Example: +```bash +curl -X PUT \ + -H "Authorization: Bearer your-token" \ + -F "file=@example.txt" \ + -F 'metadata={"labels":["test"],"tags":["latest"],"description":"Example file"}' \ + http://localhost:8080/upload +``` + +### Get a File + +``` +GET /object/{hash} +GET /object/{label}:{tag} +``` + +Example: +```bash +curl http://localhost:8080/object/abc123 +curl http://localhost:8080/object/test:latest +``` + +### Check if a File Exists + +``` +GET /exists/{hash} +GET /exists/{label}:{tag} +``` + +Example: +```bash +curl http://localhost:8080/exists/abc123 +curl http://localhost:8080/exists/test:latest +``` + +### Delete a File + +``` +GET /deleteobject?hash={hash} +``` + +Example: +```bash +curl -H "Authorization: Bearer your-token" http://localhost:8080/deleteobject?hash=abc123 +``` + +### List All Objects + +``` +GET /list +``` + +Example: +```bash +curl http://localhost:8080/list +``` + +## Database Schema + +The system uses SQLite to store metadata about uploaded files. The database schema is as follows: + +```sql +CREATE TABLE objects ( + hash TEXT PRIMARY KEY, + labels TEXT NOT NULL, -- JSON array of labels + tags TEXT NOT NULL, -- JSON array of tags + metadata TEXT NOT NULL -- JSON object with additional metadata +); +``` + +## Testing + +The repository includes two test scripts: +- `test.sh`: Basic functionality tests +- `test_1GB_file_upload.sh`: Tests uploading and downloading a 1GB file + +To run the tests: +```bash +./test.sh +./test_1GB_file_upload.sh +``` + +## License + +This project is licensed under the MIT License - see the LICENSE file for details. diff --git a/src/database.cpp b/src/database.cpp index 9f5bd29..d6deb4a 100644 --- a/src/database.cpp +++ b/src/database.cpp @@ -1,11 +1,31 @@ #include #include +#include #include "database.hpp" #include "sqlite3/sqlite3.h" namespace simple_object_storage { +bool Database::createObjectsTable() { + const char* create_table_sql = + "CREATE TABLE IF NOT EXISTS objects (" + "hash TEXT PRIMARY KEY," + "labels TEXT NOT NULL," // JSON array of labels + "tags TEXT NOT NULL," // JSON array of tags + "metadata TEXT NOT NULL" + ");"; + + char* err_msg = nullptr; + int rc = sqlite3_exec(db_, create_table_sql, nullptr, nullptr, &err_msg); + if (rc != SQLITE_OK) { + std::string error = err_msg; + sqlite3_free(err_msg); + return false; + } + return true; +} + bool Database::createVersionTable() { const char* sql = "CREATE TABLE IF NOT EXISTS version_info (" @@ -80,9 +100,21 @@ bool Database::setVersion(int version) { } bool Database::migrate(int from_version, int to_version) { - // Currently only one version, so no migrations needed - // This method will be expanded when we need to add new versions - return true; + if (from_version == 1 && to_version == 2) { + // Drop old table + const char* drop_sql = "DROP TABLE IF EXISTS objects;"; + char* err_msg = nullptr; + int rc = sqlite3_exec(db_, drop_sql, nullptr, nullptr, &err_msg); + if (rc != SQLITE_OK) { + std::string error = err_msg; + sqlite3_free(err_msg); + return false; + } + + // Create new table with updated schema + return createObjectsTable(); + } + return false; } Database::Database(const std::filesystem::path& path) : path_(path) { @@ -112,19 +144,8 @@ Database::Database(const std::filesystem::path& path) : path_(path) { } // Create objects table if it doesn't exist - const char* create_table_sql = - "CREATE TABLE IF NOT EXISTS objects (" - "label_tag TEXT PRIMARY KEY," - "hash TEXT NOT NULL," - "metadata TEXT NOT NULL" - ");"; - - char* err_msg = nullptr; - rc = sqlite3_exec(db_, create_table_sql, nullptr, nullptr, &err_msg); - if (rc != SQLITE_OK) { - std::string error = err_msg; - sqlite3_free(err_msg); - throw std::runtime_error("Failed to create table: " + error); + if (!createObjectsTable()) { + throw std::runtime_error("Failed to create objects table"); } } @@ -134,24 +155,6 @@ Database::~Database() { } } -bool Database::insert(const dbEntry& entry) { - std::string sql = "INSERT INTO objects (label_tag, hash, metadata) VALUES (?, ?, ?);"; - sqlite3_stmt* stmt; - - if (sqlite3_prepare_v2(db_, sql.c_str(), -1, &stmt, nullptr) != SQLITE_OK) { - return false; - } - - sqlite3_bind_text(stmt, 1, entry.label_tag.c_str(), -1, SQLITE_STATIC); - sqlite3_bind_text(stmt, 2, entry.hash.c_str(), -1, SQLITE_STATIC); - std::string metadata_str = entry.metadata.dump(); - sqlite3_bind_text(stmt, 3, metadata_str.c_str(), -1, SQLITE_STATIC); - - bool success = sqlite3_step(stmt) == SQLITE_DONE; - sqlite3_finalize(stmt); - return success; -} - bool Database::remove(const std::string& label_tag) { std::string sql = "DELETE FROM objects WHERE label_tag = ?;"; sqlite3_stmt* stmt; @@ -180,50 +183,36 @@ bool Database::remove_by_hash(const std::string& hash) { return success; } -bool Database::get(const std::string& label_tag, dbEntry& entry) { - std::string sql = "SELECT hash, metadata FROM objects WHERE label_tag = ?;"; +bool Database::get(const std::string& hash, dbEntry& entry) { + std::string sql = "SELECT labels, tags, metadata FROM objects WHERE hash = ?;"; sqlite3_stmt* stmt; if (sqlite3_prepare_v2(db_, sql.c_str(), -1, &stmt, nullptr) != SQLITE_OK) { return false; } - sqlite3_bind_text(stmt, 1, label_tag.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 1, hash.c_str(), -1, SQLITE_STATIC); if (sqlite3_step(stmt) != SQLITE_ROW) { sqlite3_finalize(stmt); return false; } - entry.label_tag = label_tag; - entry.hash = reinterpret_cast(sqlite3_column_text(stmt, 0)); - std::string metadata_str = reinterpret_cast(sqlite3_column_text(stmt, 1)); + entry.hash = hash; + std::string labels_str = reinterpret_cast(sqlite3_column_text(stmt, 0)); + std::string tags_str = reinterpret_cast(sqlite3_column_text(stmt, 1)); + std::string metadata_str = reinterpret_cast(sqlite3_column_text(stmt, 2)); + + entry.labels = nlohmann::json::parse(labels_str).get>(); + entry.tags = nlohmann::json::parse(tags_str).get>(); entry.metadata = nlohmann::json::parse(metadata_str); sqlite3_finalize(stmt); return true; } -bool Database::update(const std::string& label_tag, const dbEntry& entry) { - std::string sql = "UPDATE objects SET hash = ?, metadata = ? WHERE label_tag = ?;"; - sqlite3_stmt* stmt; - - if (sqlite3_prepare_v2(db_, sql.c_str(), -1, &stmt, nullptr) != SQLITE_OK) { - return false; - } - - sqlite3_bind_text(stmt, 1, entry.hash.c_str(), -1, SQLITE_STATIC); - std::string metadata_str = entry.metadata.dump(); - sqlite3_bind_text(stmt, 2, metadata_str.c_str(), -1, SQLITE_STATIC); - sqlite3_bind_text(stmt, 3, label_tag.c_str(), -1, SQLITE_STATIC); - - bool success = sqlite3_step(stmt) == SQLITE_DONE; - sqlite3_finalize(stmt); - return success; -} - bool Database::list(std::vector& entries) { - std::string sql = "SELECT label_tag, hash, metadata FROM objects;"; + std::string sql = "SELECT hash, labels, tags, metadata FROM objects;"; sqlite3_stmt* stmt; if (sqlite3_prepare_v2(db_, sql.c_str(), -1, &stmt, nullptr) != SQLITE_OK) { @@ -233,9 +222,13 @@ bool Database::list(std::vector& entries) { entries.clear(); while (sqlite3_step(stmt) == SQLITE_ROW) { dbEntry entry; - entry.label_tag = reinterpret_cast(sqlite3_column_text(stmt, 0)); - entry.hash = reinterpret_cast(sqlite3_column_text(stmt, 1)); - std::string metadata_str = reinterpret_cast(sqlite3_column_text(stmt, 2)); + entry.hash = reinterpret_cast(sqlite3_column_text(stmt, 0)); + std::string labels_str = reinterpret_cast(sqlite3_column_text(stmt, 1)); + std::string tags_str = reinterpret_cast(sqlite3_column_text(stmt, 2)); + std::string metadata_str = reinterpret_cast(sqlite3_column_text(stmt, 3)); + + entry.labels = nlohmann::json::parse(labels_str).get>(); + entry.tags = nlohmann::json::parse(tags_str).get>(); entry.metadata = nlohmann::json::parse(metadata_str); entries.push_back(entry); } @@ -245,21 +238,69 @@ bool Database::list(std::vector& entries) { } bool Database::update_or_insert(const dbEntry& entry) { - std::string sql = "INSERT OR REPLACE INTO objects (label_tag, hash, metadata) VALUES (?, ?, ?);"; - sqlite3_stmt* stmt; + // First try to get existing entry + dbEntry existing; + bool exists = get(entry.hash, existing); - if (sqlite3_prepare_v2(db_, sql.c_str(), -1, &stmt, nullptr) != SQLITE_OK) { - return false; + if (exists) { + // Merge labels and tags + std::set merged_labels(existing.labels.begin(), existing.labels.end()); + merged_labels.insert(entry.labels.begin(), entry.labels.end()); + std::set merged_tags(existing.tags.begin(), existing.tags.end()); + merged_tags.insert(entry.tags.begin(), entry.tags.end()); + + // Create new entry with merged data + dbEntry merged = entry; + merged.labels = std::vector(merged_labels.begin(), merged_labels.end()); + merged.tags = std::vector(merged_tags.begin(), merged_tags.end()); + + // Merge metadata + for (const auto& [key, value] : entry.metadata.items()) { + merged.metadata[key] = value; + } + + // Update database + std::string sql = "UPDATE objects SET labels = ?, tags = ?, metadata = ? WHERE hash = ?;"; + sqlite3_stmt* stmt; + + if (sqlite3_prepare_v2(db_, sql.c_str(), -1, &stmt, nullptr) != SQLITE_OK) { + return false; + } + + std::string labels_str = nlohmann::json(merged.labels).dump(); + std::string tags_str = nlohmann::json(merged.tags).dump(); + std::string metadata_str = merged.metadata.dump(); + + sqlite3_bind_text(stmt, 1, labels_str.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 2, tags_str.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 3, metadata_str.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 4, merged.hash.c_str(), -1, SQLITE_STATIC); + + bool success = sqlite3_step(stmt) == SQLITE_DONE; + sqlite3_finalize(stmt); + return success; + } else { + // Insert new entry + std::string sql = "INSERT INTO objects (hash, labels, tags, metadata) VALUES (?, ?, ?, ?);"; + sqlite3_stmt* stmt; + + if (sqlite3_prepare_v2(db_, sql.c_str(), -1, &stmt, nullptr) != SQLITE_OK) { + return false; + } + + std::string labels_str = nlohmann::json(entry.labels).dump(); + std::string tags_str = nlohmann::json(entry.tags).dump(); + std::string metadata_str = entry.metadata.dump(); + + sqlite3_bind_text(stmt, 1, entry.hash.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 2, labels_str.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 3, tags_str.c_str(), -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 4, metadata_str.c_str(), -1, SQLITE_STATIC); + + bool success = sqlite3_step(stmt) == SQLITE_DONE; + sqlite3_finalize(stmt); + return success; } - - sqlite3_bind_text(stmt, 1, entry.label_tag.c_str(), -1, SQLITE_STATIC); - sqlite3_bind_text(stmt, 2, entry.hash.c_str(), -1, SQLITE_STATIC); - std::string metadata_str = entry.metadata.dump(); - sqlite3_bind_text(stmt, 3, metadata_str.c_str(), -1, SQLITE_STATIC); - - bool success = sqlite3_step(stmt) == SQLITE_DONE; - sqlite3_finalize(stmt); - return success; } } // namespace simple_object_storage diff --git a/src/database.hpp b/src/database.hpp index 4648067..554728b 100644 --- a/src/database.hpp +++ b/src/database.hpp @@ -11,22 +11,21 @@ namespace simple_object_storage { class dbEntry { public: - std::string label_tag; // unique identifier for the object - std::string hash; // hash of the object - not unique + std::string hash; // unique primary key + std::vector labels; // multiple labels + std::vector tags; // multiple tags nlohmann::json metadata; }; class Database { public: - static const int CURRENT_VERSION = 1; + static const int CURRENT_VERSION = 2; Database(const std::filesystem::path& path); ~Database(); - bool insert(const dbEntry& entry); - bool remove(const std::string& label_tag); + bool remove(const std::string& hash); bool remove_by_hash(const std::string& hash); - bool get(const std::string& label_tag, dbEntry& entry); - bool update(const std::string& label_tag, const dbEntry& entry); + bool get(const std::string& hash, dbEntry& entry); bool list(std::vector& entries); bool update_or_insert(const dbEntry& entry); private: @@ -37,6 +36,7 @@ class Database { bool getVersion(int& version); bool setVersion(int version); bool migrate(int from_version, int to_version); + bool createObjectsTable(); }; } // namespace simple_object_storage diff --git a/src/put_handler.cpp b/src/put_handler.cpp index 68dbb39..cb78d4c 100644 --- a/src/put_handler.cpp +++ b/src/put_handler.cpp @@ -67,9 +67,9 @@ void PutHandler::handle_put_object(const httplib::Request& req, httplib::Respons } // Validate required metadata fields - if (!metadata.contains("label")) { + if (!metadata.contains("labels") || !metadata["labels"].is_array() || metadata["labels"].empty()) { res.status = 400; - nlohmann::json response = {{"result", "error"}, {"error", "Missing required metadata field: label"}}; + nlohmann::json response = {{"result", "error"}, {"error", "Missing or invalid required metadata field: labels (must be non-empty array)"}}; res.set_content(response.dump(), "application/json"); return; } @@ -81,15 +81,6 @@ void PutHandler::handle_put_object(const httplib::Request& req, httplib::Respons return; } - // Extract label and tags - std::string label = metadata["label"]; - if (label.empty()) { - res.status = 400; - nlohmann::json response = {{"result", "error"}, {"error", "Label cannot be empty"}}; - res.set_content(response.dump(), "application/json"); - return; - } - // Add filename to metadata if not provided if (!metadata.contains("filename")) { metadata["filename"] = file.filename; @@ -147,6 +138,7 @@ void PutHandler::handle_put_object(const httplib::Request& req, httplib::Respons // Move file to final location std::filesystem::path final_path = server_.config_.object_store_path / std::to_string(hash); + if (!std::filesystem::exists(final_path)) { try { std::filesystem::rename(temp_path, final_path); @@ -163,22 +155,11 @@ void PutHandler::handle_put_object(const httplib::Request& req, httplib::Respons // Update database index dbEntry entry; entry.hash = std::to_string(hash); - entry.metadata = metadata; // Store the complete metadata + entry.labels = metadata["labels"].get>(); + entry.tags = metadata["tags"].get>(); + entry.metadata = metadata; - // For each tag, create a label:tag entry - bool success = true; - for (const auto& tag : metadata["tags"]) { - std::string tag_str = tag.get(); - if (tag_str.empty()) continue; // Skip empty tags - - entry.label_tag = label + ":" + tag_str; - if (!server_.db_->update_or_insert(entry)) { - success = false; - break; - } - } - - if (!success) { + if (!server_.db_->update_or_insert(entry)) { res.status = 500; nlohmann::json response = {{"result", "error"}, {"error", "Failed to update database index"}}; res.set_content(response.dump(), "application/json"); diff --git a/test.sh b/test.sh index 72cf2de..f07d562 100755 --- a/test.sh +++ b/test.sh @@ -54,9 +54,9 @@ BASE_TAG="autotest" # Construct metadata JSON METADATA_JSON=$(cat < /dev/null diff --git a/test_1GB_file_upload.sh b/test_1GB_file_upload.sh index 71b8c4a..745ba2c 100755 --- a/test_1GB_file_upload.sh +++ b/test_1GB_file_upload.sh @@ -29,7 +29,7 @@ echo "Uploading file..." RESPONSE=$(curl -X PUT \ -H "Authorization: Bearer ${WRITE_TOKEN}" \ -F "file=@test_file.bin" \ - -F 'metadata={"labeltag":"test:latest","description":"Test file","tags":["test","large"]}' \ + -F 'metadata={"labels":["test"],"tags":["latest","large"],"description":"Test file"}' \ "http://${HOST}:${PORT}/upload") echo "Upload response: $RESPONSE"