Stream inflated EPUB HTMLs down to disk instead of inflating in memory (#4)
* Downgrade miniz for stability * Stream HTML from ZIP down to disk instead of loading all in mem
This commit is contained in:
@@ -9,7 +9,7 @@
|
||||
bool Epub::findContentOpfFile(const ZipFile& zip, std::string& contentOpfFile) {
|
||||
// open up the meta data to find where the content.opf file lives
|
||||
size_t s;
|
||||
const auto metaInfo = zip.readTextFileToMemory("META-INF/container.xml", &s);
|
||||
const auto metaInfo = reinterpret_cast<char*>(zip.readFileToMemory("META-INF/container.xml", &s, true));
|
||||
if (!metaInfo) {
|
||||
Serial.println("Could not find META-INF/container.xml");
|
||||
return false;
|
||||
@@ -57,7 +57,7 @@ bool Epub::findContentOpfFile(const ZipFile& zip, std::string& contentOpfFile) {
|
||||
|
||||
bool Epub::parseContentOpf(ZipFile& zip, std::string& content_opf_file) {
|
||||
// read in the content.opf file and parse it
|
||||
auto contents = zip.readTextFileToMemory(content_opf_file.c_str());
|
||||
auto contents = reinterpret_cast<char*>(zip.readFileToMemory(content_opf_file.c_str(), nullptr, true));
|
||||
|
||||
// parse the contents
|
||||
tinyxml2::XMLDocument doc;
|
||||
@@ -168,7 +168,7 @@ bool Epub::parseTocNcxFile(const ZipFile& zip) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto ncxData = zip.readTextFileToMemory(tocNcxItem.c_str());
|
||||
const auto ncxData = reinterpret_cast<char*>(zip.readFileToMemory(tocNcxItem.c_str(), nullptr, true));
|
||||
if (!ncxData) {
|
||||
Serial.printf("Could not find %s\n", tocNcxItem.c_str());
|
||||
return false;
|
||||
@@ -308,11 +308,11 @@ std::string normalisePath(const std::string& path) {
|
||||
return result;
|
||||
}
|
||||
|
||||
uint8_t* Epub::getItemContents(const std::string& itemHref, size_t* size) const {
|
||||
uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size, bool trailingNullByte) const {
|
||||
const ZipFile zip("/sd" + filepath);
|
||||
const std::string path = normalisePath(itemHref);
|
||||
|
||||
const auto content = zip.readFileToMemory(path.c_str(), size);
|
||||
const auto content = zip.readFileToMemory(path.c_str(), size, trailingNullByte);
|
||||
if (!content) {
|
||||
Serial.printf("Failed to read item %s\n", path.c_str());
|
||||
return nullptr;
|
||||
@@ -321,17 +321,11 @@ uint8_t* Epub::getItemContents(const std::string& itemHref, size_t* size) const
|
||||
return content;
|
||||
}
|
||||
|
||||
char* Epub::getTextItemContents(const std::string& itemHref, size_t* size) const {
|
||||
bool Epub::readItemContentsToStream(const std::string& itemHref, Print& out, const size_t chunkSize) const {
|
||||
const ZipFile zip("/sd" + filepath);
|
||||
const std::string path = normalisePath(itemHref);
|
||||
|
||||
const auto content = zip.readTextFileToMemory(path.c_str(), size);
|
||||
if (!content) {
|
||||
Serial.printf("Failed to read item %s\n", path.c_str());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return content;
|
||||
return zip.readFileToStream(path.c_str(), out, chunkSize);
|
||||
}
|
||||
|
||||
int Epub::getSpineItemsCount() const { return spine.size(); }
|
||||
|
||||
@@ -56,8 +56,9 @@ class Epub {
|
||||
const std::string& getPath() const;
|
||||
const std::string& getTitle() const;
|
||||
const std::string& getCoverImageItem() const;
|
||||
uint8_t* getItemContents(const std::string& itemHref, size_t* size = nullptr) const;
|
||||
char* getTextItemContents(const std::string& itemHref, size_t* size = nullptr) const;
|
||||
uint8_t* readItemContentsToBytes(const std::string& itemHref, size_t* size = nullptr,
|
||||
bool trailingNullByte = false) const;
|
||||
bool readItemContentsToStream(const std::string& itemHref, Print& out, size_t chunkSize) const;
|
||||
std::string& getSpineItem(int spineIndex);
|
||||
int getSpineItemsCount() const;
|
||||
EpubTocEntry& getTocItem(int tocTndex);
|
||||
|
||||
@@ -199,6 +199,11 @@ bool EpubHtmlParserSlim::parseAndBuildPages() {
|
||||
XML_SetCharacterDataHandler(parser, characterData);
|
||||
|
||||
FILE* file = fopen(filepath, "r");
|
||||
if (!file) {
|
||||
Serial.printf("Couldn't open file %s\n", filepath);
|
||||
XML_ParserFree(parser);
|
||||
return false;
|
||||
}
|
||||
|
||||
do {
|
||||
void* const buf = XML_GetBuffer(parser, 1024);
|
||||
|
||||
@@ -64,35 +64,28 @@ void Section::setupCacheDir() const {
|
||||
void Section::clearCache() const { SD.rmdir(cachePath.c_str()); }
|
||||
|
||||
bool Section::persistPageDataToSD() {
|
||||
size_t size = 0;
|
||||
auto localPath = epub->getSpineItem(spineIndex);
|
||||
const auto localPath = epub->getSpineItem(spineIndex);
|
||||
|
||||
const auto html = epub->getItemContents(epub->getSpineItem(spineIndex), &size);
|
||||
if (!html) {
|
||||
Serial.println("Failed to read item contents");
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: Would love to stream this through an XML visitor
|
||||
// TODO: Should we get rid of this file all together?
|
||||
// It currently saves us a bit of memory by allowing for all the inflation bits to be released
|
||||
// before loading the XML parser
|
||||
const auto tmpHtmlPath = epub->getCachePath() + "/.tmp_" + std::to_string(spineIndex) + ".html";
|
||||
File f = SD.open(tmpHtmlPath.c_str(), FILE_WRITE);
|
||||
const auto written = f.write(html, size);
|
||||
File f = SD.open(tmpHtmlPath.c_str(), FILE_WRITE, true);
|
||||
bool success = epub->readItemContentsToStream(localPath, f, 1024);
|
||||
f.close();
|
||||
free(html);
|
||||
|
||||
Serial.printf("Wrote %d bytes to %s\n", written, tmpHtmlPath.c_str());
|
||||
|
||||
if (size != written) {
|
||||
Serial.println("Failed to inflate section contents to SD");
|
||||
SD.remove(tmpHtmlPath.c_str());
|
||||
if (!success) {
|
||||
Serial.println("Failed to stream item contents");
|
||||
return false;
|
||||
}
|
||||
|
||||
Serial.printf("Streamed HTML to %s\n", tmpHtmlPath.c_str());
|
||||
|
||||
const auto sdTmpHtmlPath = "/sd" + tmpHtmlPath;
|
||||
|
||||
auto visitor =
|
||||
EpubHtmlParserSlim(sdTmpHtmlPath.c_str(), renderer, [this](const Page* page) { this->onPageComplete(page); });
|
||||
const bool success = visitor.parseAndBuildPages();
|
||||
success = visitor.parseAndBuildPages();
|
||||
|
||||
SD.remove(tmpHtmlPath.c_str());
|
||||
if (!success) {
|
||||
|
||||
Reference in New Issue
Block a user