Switch to base64 encoding (more compact than hex)

This commit is contained in:
Your Name 2025-05-17 11:45:38 +12:00
parent d81f75dab2
commit a5a9df01b4

View File

@ -7,6 +7,7 @@
#include <vector>
#include "xxhash.hpp"
#include <sys/stat.h> // For file permissions
#include <cstring> // For strlen
namespace fs = std::filesystem;
@ -25,9 +26,64 @@ static uint64_t fnv1a_64(const void* data, size_t len) {
return h;
}
// Helper function to output the _recreate_file_ utility function
// Base64 encoding function - no dependencies
static std::string base64_encode(const unsigned char* data, size_t len) {
const char* base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
std::string result;
result.reserve((len + 2) / 3 * 4); // Reserve space for the full encoded size
int val = 0, valb = -6;
for (size_t i = 0; i < len; i++) {
val = (val << 8) + data[i];
valb += 8;
while (valb >= 0) {
result.push_back(base64_chars[(val >> valb) & 0x3F]);
valb -= 6;
}
}
if (valb > -6) {
result.push_back(base64_chars[((val << 8) >> (valb + 8)) & 0x3F]);
}
// Add padding
while (result.size() % 4) {
result.push_back('=');
}
return result;
}
// Helper function to output the _recreate_file_ utility function and Base64 decoder
static void output_recreate_file_utility(std::ofstream& cpp) {
cpp << R"cpp(
// Base64 decoding function - no dependencies
static void base64_decode(const char* encoded_data, size_t encoded_len, unsigned char* output, size_t* output_len) {
const char* base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
size_t out_pos = 0;
int val = 0, valb = -8;
for (size_t i = 0; i < encoded_len; i++) {
char c = encoded_data[i];
if (c == '=') break;
// Find position in base64_chars
const char* pos = strchr(base64_chars, c);
if (pos == nullptr) continue; // Skip invalid characters
val = (val << 6) + static_cast<int>(pos - base64_chars);
valb += 6;
if (valb >= 0) {
output[out_pos++] = static_cast<unsigned char>((val >> valb) & 0xFF);
valb -= 8;
}
}
*output_len = out_pos;
}
// Utility function to recreate a file with proper permissions
static bool _recreate_file_(const std::filesystem::path& outpath, uint64_t file_hash, std::filesystem::perms file_perms, const unsigned char* filedata, size_t filedata_len) {
namespace fs = std::filesystem;
@ -85,7 +141,7 @@ void generate_file_code(const std::string& source, const std::string& destfolder
#include <filesystem>
#include <string>
#include <iostream>
#include <iomanip>
#include <cstring>
// Tiny dependency-free FNV-1a 64-bit hash
static uint64_t fnv1a_64(const void* data, size_t len) {
@ -108,23 +164,36 @@ bool recreate_file(std::string destination_folder) {
namespace fs = std::filesystem;
fs::path outpath = fs::path(destination_folder) / ")cpp" << src.filename().string() << R"cpp(";
// File data embedded directly in the function
const unsigned char filedata[] = {)cpp";
// File data embedded as Base64
static const char filedata_base64[] = )cpp";
// Embed file data directly
for (size_t i = 0; i < filedata.size(); ++i) {
if (i % 16 == 0) cpp << "\n ";
cpp << "0x" << std::hex << std::setw(2) << std::setfill('0') << (int)(unsigned char)filedata[i];
if (i + 1 != filedata.size()) cpp << ", ";
// Encode the file data to Base64
std::string base64 = base64_encode(reinterpret_cast<const unsigned char*>(filedata.data()), filedata.size());
// Split into 76-character chunks for readability
const size_t line_length = 76;
for (size_t i = 0; i < base64.length(); i += line_length) {
if (i > 0) cpp << "\n ";
cpp << "\"" << base64.substr(i, std::min(line_length, base64.length() - i)) << "\"";
if (i + line_length < base64.length()) cpp << "\\";
}
cpp << ";\n\n";
cpp << std::dec << "\n };\n";
// Decode Base64 at runtime
cpp << " // Decode Base64 data\n";
cpp << " size_t decoded_size = (strlen(filedata_base64) * 3) / 4;\n";
cpp << " unsigned char* decoded_data = new unsigned char[decoded_size];\n";
cpp << " size_t actual_size;\n";
cpp << " base64_decode(filedata_base64, strlen(filedata_base64), decoded_data, &actual_size);\n\n";
// Direct call to _recreate_file_ with hash and permissions defined inline
cpp << " return _recreate_file_(outpath, " << hash << "ULL, "
// Call _recreate_file_ with the decoded data
cpp << " bool result = _recreate_file_(outpath, " << hash << "ULL, "
<< "std::filesystem::perms(" << static_cast<unsigned>(src_perms) << "), "
<< "filedata, " << filedata.size() << ");\n";
<< "decoded_data, actual_size);\n";
// Clean up
cpp << " delete[] decoded_data;\n";
cpp << " return result;\n";
cpp << "}\n";
cpp << "}\n";
@ -182,9 +251,7 @@ namespace )hpp" << ns << R"hpp( {
#include <filesystem>
#include <string>
#include <iostream>
#include <iomanip>
#include <cstring>
/*
@ -237,20 +304,35 @@ bool recreate_tree(std::string destination_folder) {
cpp << " // File: " << rel << "\n";
cpp << " fs::path outpath = fs::path(destination_folder) / \"" << rel << "\";\n";
// Embed file data directly in the function
cpp << " const unsigned char filedata[] = {";
for (size_t i = 0; i < filedata.size(); ++i) {
if (i % 16 == 0) cpp << "\n ";
cpp << "0x" << std::hex << std::setw(2) << std::setfill('0') << (int)(unsigned char)filedata[i];
if (i + 1 != filedata.size()) cpp << ", ";
}
cpp << "\n };\n";
// Reset to decimal format
cpp << std::dec;
// Embed file data as Base64
cpp << " static const char filedata_base64[] = ";
// Direct call to _recreate_file_ with all parameters defined inline
// Encode the file data to Base64
std::string base64 = base64_encode(reinterpret_cast<const unsigned char*>(filedata.data()), filedata.size());
// Split into 76-character chunks for readability
const size_t line_length = 76;
for (size_t i = 0; i < base64.length(); i += line_length) {
if (i > 0) cpp << "\n ";
cpp << "\"" << base64.substr(i, std::min(line_length, base64.length() - i)) << "\"";
if (i + line_length < base64.length()) cpp << "\\";
}
cpp << ";\n\n";
// Decode Base64 at runtime
cpp << " // Decode Base64 data\n";
cpp << " size_t decoded_size = (strlen(filedata_base64) * 3) / 4;\n";
cpp << " unsigned char* decoded_data = new unsigned char[decoded_size];\n";
cpp << " size_t actual_size;\n";
cpp << " base64_decode(filedata_base64, strlen(filedata_base64), decoded_data, &actual_size);\n\n";
// Call _recreate_file_ with the decoded data
cpp << " bool file_written = _recreate_file_(outpath, "
<< hash << "ULL, std::filesystem::perms(" << static_cast<unsigned>(file_perms) << "), filedata, " << filedata.size() << ");\n";
<< hash << "ULL, std::filesystem::perms(" << static_cast<unsigned>(file_perms) << "), "
<< "decoded_data, actual_size);\n";
// Clean up and update flag
cpp << " delete[] decoded_data;\n";
cpp << " any_written = any_written || file_written;\n";
cpp << " }\n"; // Close scope to free memory
}