Custom zip parsing (#140)
## Summary * Use custom zip central directory parsing to lower memory usage when loading zipped epub content
This commit is contained in:
@@ -60,9 +60,6 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) {
|
||||
}
|
||||
|
||||
ContentOpfParser opfParser(getCachePath(), getBasePath(), contentOpfSize, bookMetadataCache.get());
|
||||
Serial.printf("[%lu] [MEM] Free: %d bytes, Total: %d bytes, Min Free: %d bytes\n", millis(), ESP.getFreeHeap(),
|
||||
ESP.getHeapSize(), ESP.getMinFreeHeap());
|
||||
|
||||
if (!opfParser.setup()) {
|
||||
Serial.printf("[%lu] [EBP] Could not setup content.opf parser\n", millis());
|
||||
return false;
|
||||
@@ -321,10 +318,9 @@ bool Epub::generateCoverBmp() const {
|
||||
}
|
||||
|
||||
uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size, const bool trailingNullByte) const {
|
||||
const ZipFile zip("/sd" + filepath);
|
||||
const std::string path = FsHelpers::normalisePath(itemHref);
|
||||
|
||||
const auto content = zip.readFileToMemory(path.c_str(), size, trailingNullByte);
|
||||
const auto content = ZipFile(filepath).readFileToMemory(path.c_str(), size, trailingNullByte);
|
||||
if (!content) {
|
||||
Serial.printf("[%lu] [EBP] Failed to read item %s\n", millis(), path.c_str());
|
||||
return nullptr;
|
||||
@@ -334,20 +330,13 @@ uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size
|
||||
}
|
||||
|
||||
bool Epub::readItemContentsToStream(const std::string& itemHref, Print& out, const size_t chunkSize) const {
|
||||
const ZipFile zip("/sd" + filepath);
|
||||
const std::string path = FsHelpers::normalisePath(itemHref);
|
||||
|
||||
return zip.readFileToStream(path.c_str(), out, chunkSize);
|
||||
return ZipFile(filepath).readFileToStream(path.c_str(), out, chunkSize);
|
||||
}
|
||||
|
||||
bool Epub::getItemSize(const std::string& itemHref, size_t* size) const {
|
||||
const ZipFile zip("/sd" + filepath);
|
||||
return getItemSize(zip, itemHref, size);
|
||||
}
|
||||
|
||||
bool Epub::getItemSize(const ZipFile& zip, const std::string& itemHref, size_t* size) {
|
||||
const std::string path = FsHelpers::normalisePath(itemHref);
|
||||
return zip.getInflatedFileSize(path.c_str(), size);
|
||||
return ZipFile(filepath).getInflatedFileSize(path.c_str(), size);
|
||||
}
|
||||
|
||||
int Epub::getSpineItemsCount() const {
|
||||
|
||||
@@ -24,7 +24,6 @@ class Epub {
|
||||
bool findContentOpfFile(std::string* contentOpfFile) const;
|
||||
bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata);
|
||||
bool parseTocNcxFile() const;
|
||||
static bool getItemSize(const ZipFile& zip, const std::string& itemHref, size_t* size);
|
||||
|
||||
public:
|
||||
explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) {
|
||||
@@ -54,5 +53,5 @@ class Epub {
|
||||
size_t getCumulativeSpineItemSize(int spineIndex) const;
|
||||
|
||||
size_t getBookSize() const;
|
||||
uint8_t calculateProgress(const int currentSpineIndex, const float currentSpineRead) const;
|
||||
uint8_t calculateProgress(int currentSpineIndex, float currentSpineRead) const;
|
||||
};
|
||||
|
||||
@@ -122,7 +122,26 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
|
||||
// LUTs complete
|
||||
// Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin
|
||||
|
||||
const ZipFile zip("/sd" + epubPath);
|
||||
ZipFile zip(epubPath);
|
||||
// Pre-open zip file to speed up size calculations
|
||||
if (!zip.open()) {
|
||||
Serial.printf("[%lu] [BMC] Could not open EPUB zip for size calculations\n", millis());
|
||||
bookFile.close();
|
||||
spineFile.close();
|
||||
tocFile.close();
|
||||
return false;
|
||||
}
|
||||
// TODO: For large ZIPs loading the all localHeaderOffsets will crash.
|
||||
// However not having them loaded is extremely slow. Need a better solution here.
|
||||
// Perhaps only a cache of spine items or a better way to speedup lookups?
|
||||
if (!zip.loadAllFileStatSlims()) {
|
||||
Serial.printf("[%lu] [BMC] Could not load zip local header offsets for size calculations\n", millis());
|
||||
bookFile.close();
|
||||
spineFile.close();
|
||||
tocFile.close();
|
||||
zip.close();
|
||||
return false;
|
||||
}
|
||||
size_t cumSize = 0;
|
||||
spineFile.seek(0);
|
||||
for (int i = 0; i < spineCount; i++) {
|
||||
@@ -157,6 +176,8 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
|
||||
// Write out spine data to book.bin
|
||||
writeSpineEntry(bookFile, spineEntry);
|
||||
}
|
||||
// Close opened zip file
|
||||
zip.close();
|
||||
|
||||
// Loop through toc entries from toc file writing to book.bin
|
||||
tocFile.seek(0);
|
||||
@@ -223,6 +244,8 @@ void BookMetadataCache::createTocEntry(const std::string& title, const std::stri
|
||||
|
||||
int spineIndex = -1;
|
||||
// find spine index
|
||||
// TODO: This lookup is slow as need to scan through all items each time. We can't hold it all in memory due to size.
|
||||
// But perhaps we can load just the hrefs in a vector/list to do an index lookup?
|
||||
spineFile.seek(0);
|
||||
for (int i = 0; i < spineCount; i++) {
|
||||
auto spineEntry = readSpineEntry(spineFile);
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
#include <FsHelpers.h>
|
||||
#include <HardwareSerial.h>
|
||||
#include <Serialization.h>
|
||||
#include <ZipFile.h>
|
||||
|
||||
#include "../BookMetadataCache.h"
|
||||
|
||||
@@ -183,6 +182,8 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
|
||||
if (strcmp(atts[i], "idref") == 0) {
|
||||
const std::string idref = atts[i + 1];
|
||||
// Resolve the idref to href using items map
|
||||
// TODO: This lookup is slow as need to scan through all items each time.
|
||||
// It can take up to 200ms per item when getting to 1500 items.
|
||||
self->tempItemStore.seek(0);
|
||||
std::string itemId;
|
||||
std::string href;
|
||||
|
||||
Reference in New Issue
Block a user