diff --git a/dbpf-recompress.cpp b/dbpf-recompress.cpp index f148ffa..ff6110b 100644 --- a/dbpf-recompress.cpp +++ b/dbpf-recompress.cpp @@ -1,5 +1,8 @@ #include "dbpf.h" +#include +#include + #include #include #include @@ -9,41 +12,114 @@ using namespace std; -void tryDelete(string fileName) { +void tryDelete(wstring fileName) { try { filesystem::remove(fileName); } catch(filesystem::filesystem_error) {} } -int main(int argc, char *argv[]) { +bool validatePackage(dbpf::Package& oldPackage, dbpf::Package& newPackage, fstream& oldFile, fstream& newFile, wstring displayPath) { + if(!newPackage.unpacked) { + wcout << displayPath << L": Failed to load new package" << endl; + return false; + } + + bytes oldHeader = dbpf::readFile(oldFile, 0, 96); + bytes newHeader = dbpf::readFile(newFile, 0, 96); + + if(bytes(oldHeader.begin(), oldHeader.begin() + 36) != bytes(newHeader.begin(), newHeader.begin() + 36) + || bytes(oldHeader.begin() + 60, oldHeader.end()) != bytes(newHeader.begin() + 60, newHeader.end())) { + wcout << displayPath << L": New header does not match the old header" << endl; + return false; + } + + for(uint i = 48; i < 60; i++) { + if(newHeader[i] != 0) { + wcout << displayPath << L": Hole index info not set to zero" << endl; + return false; + } + } + + if(oldPackage.entries.size() != newPackage.entries.size()) { + wcout << displayPath << L": Number of entries between old package and new package not matching" << endl; + return false; + } + + for(int i = 0; i < oldPackage.entries.size(); i++) { + auto& oldEntry = oldPackage.entries[i]; + auto& newEntry = newPackage.entries[i]; + + if(oldEntry.type != newEntry.type || oldEntry.group != newEntry.group || oldEntry.instance != newEntry.instance || oldEntry.resource != newEntry.resource) { + wcout << displayPath << L": Types, groups, instances, or resources of entries not matching" << endl; + return false; + } + + bytes oldContent = dbpf::readFile(oldFile, oldEntry.location, oldEntry.size); + bytes newContent = dbpf::readFile(newFile, newEntry.location, newEntry.size); + + bool compressed_in_header = newContent[4] == 0x10 && newContent[5] == 0xFB; + bool in_clst = newPackage.compressedEntries.find(dbpf::CompressedEntry{newEntry.type, newEntry.group, newEntry.instance, newEntry.resource}) != newPackage.compressedEntries.end(); + + if(compressed_in_header != in_clst) { + wcout << displayPath << L": Incorrect compression information" << endl; + return false; + } + + if(newEntry.compressed) { + uint tempPos = 0; + uint uncompressedSize = dbpf::getUncompressedSize(newContent); + uint compressedSize = dbpf::getInt32le(newContent, tempPos); + + if(compressedSize > uncompressedSize) { + wcout << displayPath << L": Compressed size is larger than the uncompressed size for one entry" << endl; + return false; + } + } + + oldContent = dbpf::decompressEntry(oldEntry, oldContent); + newContent = dbpf::decompressEntry(newEntry, newContent); + + if(oldContent != newContent) { + wcout << displayPath << L": Mismatch between old entry and new entry" << endl; + return false; + } + } + + return true; +} + +//using wide chars and wide strings to support UTF-16 file names +int wmain(int argc, wchar_t *argv[]) { + _setmode(_fileno(stdout), _O_U16TEXT); //fix for wcout + if(argc == 1) { - cout << "No arguments provided" << endl; + wcout << L"No arguments provided" << endl; return 0; } //parse args - string arg = argv[1]; + wstring arg = argv[1]; - if(arg == "help") { - cout << "dbpf-recompress.exe -args package_file_or_folder" << endl; - cout << " -d decompress" << endl; - cout << endl; + if(arg == L"help") { + wcout << L"dbpf-recompress.exe -args package_file_or_folder" << endl; + wcout << L" -d decompress" << endl; + wcout << endl; return 0; } - dbpf::Mode mode = dbpf::COMPRESS; + dbpf::Mode mode = dbpf::RECOMPRESS; int fileArgIndex = 1; - if(arg == "-d") { + if(arg == L"-d") { mode = dbpf::DECOMPRESS; fileArgIndex = 2; } if(fileArgIndex > argc - 1) { - cout << "No file path provided" << endl; + wcout << L"No file path provided" << endl; return 0; } - string pathName = argv[fileArgIndex]; + wstring pathName = argv[fileArgIndex]; auto files = vector(); bool is_dir = false; @@ -51,7 +127,7 @@ int main(int argc, char *argv[]) { if(filesystem::is_regular_file(pathName)) { auto file_entry = filesystem::directory_entry(pathName); if(file_entry.path().extension() != ".package") { - cout << "Not a package file" << endl; + wcout << L"Not a package file" << endl; return 0; } @@ -66,21 +142,21 @@ int main(int argc, char *argv[]) { } } else { - cout << "File not found" << endl; + wcout << L"File not found" << endl; return 0; } for(auto& dir_entry: files) { //open file - string fileName = dir_entry.path().string(); - string tempFileName = fileName + ".new"; + wstring fileName = dir_entry.path().wstring(); + wstring tempFileName = fileName + L".new"; float current_size = dir_entry.file_size() / 1024.0; - string displayPath; //for cout + wstring displayPath; //for cout if(is_dir) { - displayPath = filesystem::relative(fileName, pathName).string(); + displayPath = filesystem::relative(fileName, pathName).wstring(); } else { displayPath = fileName; } @@ -88,24 +164,22 @@ int main(int argc, char *argv[]) { fstream file = fstream(fileName, ios::in | ios::binary); if(!file.is_open()) { - cout << displayPath << ": Failed to open file" << endl; + wcout << displayPath << L": Failed to open file" << endl; continue; } //get package dbpf::Package oldPackage = dbpf::getPackage(file, displayPath); - dbpf::Package package = dbpf::Package{oldPackage.indexVersion, oldPackage.entries}; + dbpf::Package package = oldPackage; //copy //error unpacking package - if(package.indexVersion == -1) { + if(!package.unpacked) { file.close(); continue; } - //find proper compression mode - if(mode != dbpf::DECOMPRESS) { + if(mode != dbpf::DECOMPRESS && mode != dbpf::RECOMPRESS) { bool all_entries_compressed = true; - bool compression_can_improve = false; for(auto& entry: package.entries) { if(!entry.compressed) { @@ -114,28 +188,8 @@ int main(int argc, char *argv[]) { } } - //try to recompress one large entry that's already compressed and find out if we're gonna get a smaller size - for(auto entry: package.entries) { - if(entry.compressed && entry.uncompressedSize >= 100000) { - bytes content = dbpf::readFile(file, entry.location, entry.size); - content = dbpf::recompressEntry(entry, content); - - if(content.size() < entry.size) { - compression_can_improve = true; - } - - break; - } - } - - if(compression_can_improve) { - mode = dbpf::RECOMPRESS; - } else { - if(all_entries_compressed) { - mode = dbpf::SKIP; - } else { - mode = dbpf::COMPRESS; - } + if(all_entries_compressed) { + mode = dbpf::SKIP; } } @@ -147,56 +201,20 @@ int main(int argc, char *argv[]) { dbpf::putPackage(tempFile, file, package, mode); } else { - cout << displayPath << ": Failed to create temp file" << endl; + wcout << displayPath << L": Failed to create temp file" << endl; file.close(); continue; } //validate new file tempFile.seekg(0, ios::beg); - dbpf::Package newPackage = dbpf::getPackage(tempFile, displayPath + ".new"); - - bool validationFailed = false; - - if(!validationFailed && newPackage.indexVersion == -1) { - cout << displayPath << ": Failed to load new package" << endl; - validationFailed = true; - } - - if(!validationFailed && (oldPackage.entries.size() != newPackage.entries.size())) { - cout << displayPath << ": Number of entries between old package and new package not matching" << endl; - validationFailed = true; - } - - if(!validationFailed) { - for(int i = 0; i < oldPackage.entries.size(); i++) { - auto& oldEntry = oldPackage.entries[i]; - auto& newEntry = newPackage.entries[i]; - - if(oldEntry.type != newEntry.type || oldEntry.group != newEntry.group || oldEntry.instance != newEntry.instance || oldEntry.resource != newEntry.resource) { - cout << displayPath << ": Types, groups, instances, or resources of entries not matching" << endl; - validationFailed = true; - break; - } - - bytes oldContent = dbpf::readFile(file, oldEntry.location, oldEntry.size); - bytes newContent = dbpf::readFile(tempFile, newEntry.location, newEntry.size); - - oldContent = dbpf::decompressEntry(oldEntry, oldContent); - newContent = dbpf::decompressEntry(newEntry, newContent); - - if(oldContent != newContent) { - cout << displayPath << ": Mismatch between old entry and new entry" << endl; - validationFailed = true; - break; - } - } - } + dbpf::Package newPackage = dbpf::getPackage(tempFile, tempFileName); + bool is_valid = validatePackage(oldPackage, newPackage, file, tempFile, displayPath); file.close(); tempFile.close(); - if(validationFailed) { + if(!is_valid) { tryDelete(tempFileName); continue; } @@ -210,7 +228,7 @@ int main(int argc, char *argv[]) { } catch(filesystem::filesystem_error) { - cout << displayPath << ": Failed to overwrite file" << endl; + wcout << displayPath << L": Failed to overwrite file" << endl; tryDelete(tempFileName); continue; } @@ -227,25 +245,25 @@ int main(int argc, char *argv[]) { float new_size = filesystem::file_size(fileName) / 1024.0; //output file size to console - cout << displayPath << " " << fixed << setprecision(2); + wcout << displayPath << L" " << fixed << setprecision(2); if(current_size >= 1000) { - cout << current_size / 1024.0 << " MB"; + wcout << current_size / 1024.0 << L" MB"; } else { - cout << current_size << " KB"; + wcout << current_size << L" KB"; } - cout << " -> "; + wcout << " -> "; if(new_size >= 1000) { - cout << new_size / 1024.0 << " MB"; + wcout << new_size / 1024.0 << L" MB"; } else { - cout << new_size << " KB"; + wcout << new_size << L" KB"; } - cout << endl; + wcout << endl; } - cout << endl; + wcout << endl; return 0; -} +} \ No newline at end of file diff --git a/dbpf.h b/dbpf.h index 44f5a66..b6a7d7e 100644 --- a/dbpf.h +++ b/dbpf.h @@ -29,7 +29,7 @@ namespace dbpf { file.write(reinterpret_cast(buf.data()), buf.size()); } - //convert 4 bytes from buf at pos to integer and increment pos (little endian) + //convert 4 bytes from buf at pos to an integer and increment pos (little endian) uint getInt32le(bytes& buf, uint& pos) { return ((uint) buf[pos++]) + ((uint) buf[pos++] << 8) + ((uint) buf[pos++] << 16) + ((uint) buf[pos++] << 24); } @@ -42,21 +42,34 @@ namespace dbpf { buf[pos++] = n >> 24; } - //convert 3 bytes from buf at pos to integer and increment pos (big endian) - uint getInt24bg(bytes& buf, uint& pos) { - return ((uint) buf[pos++] << 16) + ((uint) buf[pos++] << 8) + ((uint) buf[pos++]); - } - - //put integer in buf at pos and increment pos (big endian) - void putInt24bg(bytes& buf, uint& pos, uint n) { - buf[pos++] = n >> 16; - buf[pos++] = n >> 8; - buf[pos++] = n; + //get the uncompressed size from the compression header (3 bytes big endian integer) + uint getUncompressedSize(bytes& buf) { + return ((uint) buf[6] << 16) + ((uint) buf[7] << 8) + ((uint) buf[8]); } //compression mode enum Mode { COMPRESS, DECOMPRESS, RECOMPRESS, SKIP }; - + + //representing the header of a package file + struct Header { + uint majorVersion; + uint minorVersion; + uint majorUserVersion; + uint minorUserVersion; + uint flags; + uint createdDate; + uint modifiedDate; + uint indexMajorVersion; + uint indexEntryCount; + uint indexLocation; + uint indexSize; + uint holeIndexEntryCount; + uint holdIndexLocation; + uint holeIndexSize; + uint indexMinorVersion; + bytes remainder; + }; + //representing one entry (file) inside the package struct Entry { uint type; @@ -79,12 +92,6 @@ namespace dbpf { uint uncompressedSize; }; - //representing one package file - struct Package { - int indexVersion; - vector entries; - }; - //for use by sets and maps struct hashFunction { template @@ -100,14 +107,22 @@ namespace dbpf { } }; + //representing one package file + struct Package { + bool unpacked = true; + Header header; + vector entries; + unordered_set compressedEntries; //directory of compressed files + }; + bytes compressEntry(Entry& entry, bytes& content) { if(!entry.compressed && !entry.repeated) { - bytes newContent = bytes((content.size() - 1)); //must be smaller than the original, otherwise there is no benefit - int length = qfs_compress(&content[0], content.size(), &newContent[0]); + bytes newContent = bytes(content.size() - 1); //must be smaller than the original, otherwise there is no benefit + int length = qfs_compress(content.data(), content.size(), newContent.data()); if(length > 0) { - entry.compressed = true; newContent.resize(length); + entry.compressed = true; return newContent; } } @@ -117,97 +132,114 @@ namespace dbpf { bytes decompressEntry(Entry& entry, bytes& content) { if(entry.compressed) { - uint tempPos = 6; - bytes newContent = bytes((getInt24bg(content, tempPos))); //uncompressed - bool success = qfs_decompress(&content[0], content.size(), &newContent[0], newContent.size(), false); + bytes newContent = bytes(getUncompressedSize(content)); + bool success = qfs_decompress(content.data(), content.size(), newContent.data(), newContent.size(), false); if(success) { entry.compressed = false; return newContent; - } else { - cout << "Failed to decompress entry" << endl; + wcout << L"Failed to decompress entry" << endl; } } return content; } - + bytes recompressEntry(Entry& entry, bytes& content) { bool wasCompressed = entry.compressed; + bytes newContent = decompressEntry(entry, content); newContent = compressEntry(entry, newContent); + //only return the new entry if there is a reduction in size if(newContent.size() < content.size()) { return newContent; } else { - //decompression/compression failed, or new compressed entry is larger or equal to old compressed entry entry.compressed = wasCompressed; return content; } } - + //get package infromation from file - Package getPackage(fstream& file, string displayPath) { + Package getPackage(fstream& file, wstring displayPath) { file.seekg(0, ios::end); uint fileSize = file.tellg(); file.seekg(0, ios::beg); if(fileSize < 64) { - cout << displayPath << ": Header not found" << endl; - return Package{-1, vector()}; + wcout << displayPath << L": Header not found" << endl; + return Package{false}; } + Package package = Package(); + //header - bytes buffer = readFile(file, 0, 64); - - uint pos = 36; - uint entryCount = getInt32le(buffer, pos); - uint indexLocation = getInt32le(buffer, pos); - uint indexSize = getInt32le(buffer, pos); - - pos += 12; - int indexVersion = getInt32le(buffer, pos); + bytes buffer = readFile(file, 0, 96); + uint pos = 4; - Package package = Package{indexVersion, vector()}; - package.entries.reserve(entryCount + 1); + if(buffer[0] != 'D' || buffer[1] != 'B' || buffer[2] != 'P' || buffer[3] != 'F') { + wcout << displayPath << L": Magic header not found" << endl; + return Package{false}; + } + + package.header.majorVersion = getInt32le(buffer, pos); + package.header.minorVersion = getInt32le(buffer, pos); + package.header.majorUserVersion = getInt32le(buffer, pos); + package.header.minorUserVersion = getInt32le(buffer, pos); + package.header.flags = getInt32le(buffer, pos); + package.header.createdDate = getInt32le(buffer, pos); + package.header.modifiedDate = getInt32le(buffer, pos); + package.header.indexMajorVersion = getInt32le(buffer, pos); + package.header.indexEntryCount = getInt32le(buffer, pos); + package.header.indexLocation = getInt32le(buffer, pos); + package.header.indexSize = getInt32le(buffer, pos); + pos += 12; //skip hole index info + package.header.indexMinorVersion = getInt32le(buffer, pos); + package.header.remainder = bytes(buffer.begin() + 64, buffer.end()); + + if(package.header.majorVersion != 1 || (package.header.minorVersion != 0 && package.header.minorVersion != 1 && package.header.minorVersion != 2) || package.header.indexMajorVersion != 7) { + wcout << displayPath << L": Not a Sims 2 package file" << endl; + return Package{false}; + } + + package.entries.reserve(package.header.indexEntryCount + 1); bytes clstContent; - //error checking - if(indexVersion > 2) { - cout << displayPath << ": Unrecognized index version" << endl; - return Package{-1, vector()}; + if(package.header.indexMinorVersion > 2) { + wcout << displayPath << L": Unrecognized index version" << endl; + return Package{false}; } - if(indexLocation > fileSize || indexLocation + indexSize > fileSize) { - cout << displayPath << ": File index outside of bounds" << endl; - return Package{-1, vector()}; + if(package.header.indexLocation > fileSize || package.header.indexLocation + package.header.indexSize > fileSize) { + wcout << displayPath << L": File index outside of bounds" << endl; + return Package{false}; } - uint entryCountToIndexSize = 0; - if(indexVersion == 2) { - entryCountToIndexSize = entryCount * 4 * 6; + uint indexEntryCountToIndexSize = 0; + if(package.header.indexMinorVersion == 2) { + indexEntryCountToIndexSize = package.header.indexEntryCount * 4 * 6; } else { - entryCountToIndexSize = entryCount * 4 * 5; + indexEntryCountToIndexSize = package.header.indexEntryCount * 4 * 5; } - if(entryCountToIndexSize > indexSize) { - cout << displayPath << ": Entry count larger than index" << endl; - return Package{-1, vector()}; + if(indexEntryCountToIndexSize > package.header.indexSize) { + wcout << displayPath << L": Entry count larger than index" << endl; + return Package{false}; } //index - buffer = readFile(file, indexLocation, indexSize); + buffer = readFile(file, package.header.indexLocation, package.header.indexSize); pos = 0; - for(uint i = 0; i < entryCount; i++) { + for(uint i = 0; i < package.header.indexEntryCount; i++) { uint type = getInt32le(buffer, pos); uint group = getInt32le(buffer, pos); uint instance = getInt32le(buffer, pos); uint resource = 0; - if(indexVersion == 2) { + if(package.header.indexMinorVersion == 2) { resource = getInt32le(buffer, pos); } @@ -215,8 +247,8 @@ namespace dbpf { uint size = getInt32le(buffer, pos); if(location > fileSize || location + size > fileSize) { - cout << displayPath << ": Entry location outside of bounds" << endl; - return Package{-1, vector()}; + wcout << displayPath << L": Entry location outside of bounds" << endl; + return Package{false}; } if(type == 0xE86B1EEF) { @@ -230,11 +262,10 @@ namespace dbpf { //directory of compressed files if(clstContent.size() > 0) { - unordered_set compressedEntries; - if(indexVersion == 2) { - compressedEntries.reserve(clstContent.size() / (4 * 5)); + if(package.header.indexMinorVersion == 2) { + package.compressedEntries.reserve(clstContent.size() / (4 * 5)); } else { - compressedEntries.reserve(clstContent.size() / (4 * 4)); + package.compressedEntries.reserve(clstContent.size() / (4 * 4)); } pos = 0; @@ -244,23 +275,23 @@ namespace dbpf { uint instance = getInt32le(clstContent, pos); uint resource = 0; - if(indexVersion == 2) { + if(package.header.indexMinorVersion == 2) { resource = getInt32le(clstContent, pos); } uint uncompressedSize = getInt32le(clstContent, pos); - compressedEntries.insert(CompressedEntry{type, group, instance, resource, uncompressedSize}); + package.compressedEntries.insert(CompressedEntry{type, group, instance, resource, uncompressedSize}); } //check if entries are compressed for(auto& entry: package.entries) { - auto iter = compressedEntries.find(CompressedEntry{entry.type, entry.group, entry.instance, entry.resource}); - if(iter != compressedEntries.end()) { + auto iter = package.compressedEntries.find(CompressedEntry{entry.type, entry.group, entry.instance, entry.resource}); + if(entry.size > 9 && iter != package.compressedEntries.end()) { + bytes header = readFile(file, entry.location, 9); - CompressedEntry compressedEntry = *iter; - if(entry.size != compressedEntry.uncompressedSize) { + if(header[4] == 0x10 && header[5] == 0xFB) { entry.compressed = true; - entry.uncompressedSize = compressedEntry.uncompressedSize; + entry.uncompressedSize = getUncompressedSize(header); } } } @@ -293,21 +324,23 @@ namespace dbpf { uint pos = 0; putInt32le(buffer, pos, 0x46504244); - putInt32le(buffer, pos, 1); - putInt32le(buffer, pos, 1); - putInt32le(buffer, pos, 0); + putInt32le(buffer, pos, package.header.majorVersion); + putInt32le(buffer, pos, package.header.minorVersion); + putInt32le(buffer, pos, package.header.majorUserVersion); + putInt32le(buffer, pos, package.header.minorUserVersion); + putInt32le(buffer, pos, package.header.flags); + putInt32le(buffer, pos, package.header.createdDate); + putInt32le(buffer, pos, package.header.modifiedDate); + putInt32le(buffer, pos, package.header.indexMajorVersion); + pos += 12; //skip index info, update later putInt32le(buffer, pos, 0); putInt32le(buffer, pos, 0); putInt32le(buffer, pos, 0); - putInt32le(buffer, pos, 0); - putInt32le(buffer, pos, 7); - putInt32le(buffer, pos, 0); - putInt32le(buffer, pos, 0); - putInt32le(buffer, pos, 0); - putInt32le(buffer, pos, 0); - putInt32le(buffer, pos, 0); - putInt32le(buffer, pos, 0); - putInt32le(buffer, pos, package.indexVersion); + putInt32le(buffer, pos, package.header.indexMinorVersion); + + for(uint i = 0; i < package.header.remainder.size(); i++) { + buffer[pos++] = package.header.remainder[i]; + } writeFile(newFile, buffer); @@ -317,29 +350,30 @@ namespace dbpf { #pragma omp parallel for for(int i = 0; i < package.entries.size(); i++) { + auto& entry = package.entries[i]; + omp_set_lock(&lock); - bytes content = readFile(oldFile, package.entries[i].location, package.entries[i].size); + bytes content = readFile(oldFile, entry.location, entry.size); omp_unset_lock(&lock); if(mode == DECOMPRESS) { - content = decompressEntry(package.entries[i], content); + content = decompressEntry(entry, content); } else if(mode == RECOMPRESS) { - content = recompressEntry(package.entries[i], content); + content = recompressEntry(entry, content); } else { - content = compressEntry(package.entries[i], content); + content = compressEntry(entry, content); } - package.entries[i].size = content.size(); + entry.size = content.size(); //we only care about the uncompressed size if the file is compressed - if(package.entries[i].compressed) { - uint tempPos = 6; - package.entries[i].uncompressedSize = getInt24bg(content, tempPos); + if(entry.compressed) { + entry.uncompressedSize = getUncompressedSize(content); } omp_set_lock(&lock); - package.entries[i].location = newFile.tellp(); + entry.location = newFile.tellp(); writeFile(newFile, content); omp_unset_lock(&lock); @@ -351,7 +385,7 @@ namespace dbpf { bytes clstContent; pos = 0; - if(package.indexVersion == 2) { + if(package.header.indexMinorVersion == 2) { clstContent = bytes(package.entries.size() * 4 * 5); } else { clstContent = bytes(package.entries.size() * 4 * 4); @@ -365,7 +399,7 @@ namespace dbpf { putInt32le(clstContent, pos, entry.group); putInt32le(clstContent, pos, entry.instance); - if(package.indexVersion == 2) { + if(package.header.indexMinorVersion == 2) { putInt32le(clstContent, pos, entry.resource); } @@ -384,7 +418,7 @@ namespace dbpf { //write the index uint indexStart = newFile.tellp(); - if(package.indexVersion == 2) { + if(package.header.indexMinorVersion == 2) { buffer = bytes(package.entries.size() * 4 * 6); } else { buffer = bytes(package.entries.size() * 4 * 5); @@ -397,7 +431,7 @@ namespace dbpf { putInt32le(buffer, pos, entry.group); putInt32le(buffer, pos, entry.instance); - if(package.indexVersion == 2) { + if(package.header.indexMinorVersion == 2) { putInt32le(buffer, pos, entry.resource); }