Extract EPUB TOC into temp file before parsing (#85)

## Summary * Extract EPUB TOC into temp file before parsing * Streaming ZIP -> XML parser uses up a lot of memory as we're allocating inflation buffers while also holding a few copies of the buffer in different forms * Instead, but streaming the inflated file down to the SD card (like we do for HTML parsing, we can lower memory usage) ## Additional Context * This should help with https://github.com/daveallie/crosspoint-reader/issues/60 and https://github.com/daveallie/crosspoint-reader/issues/10. It won't remove those class of issues completely, but will allow for many more books to be opened.
2025-12-21 17:08:34 +11:00
parent 0d32d21d75
commit f264efdb12
4 changed files with 31 additions and 16 deletions
--- a/lib/Epub/Epub.cpp
+++ b/lib/Epub/Epub.cpp
@@ -93,24 +93,42 @@ bool Epub::parseTocNcxFile() {

  Serial.printf("[%lu] [EBP] Parsing toc ncx file: %s\n", millis(), tocNcxItem.c_str());

-  size_t tocSize;
-  if (!getItemSize(tocNcxItem, &tocSize)) {
-    Serial.printf("[%lu] [EBP] Could not get size of toc ncx\n", millis());
-    return false;
-  }
+  const auto tmpNcxPath = getCachePath() + "/toc.ncx";
+  File tempNcxFile = SD.open(tmpNcxPath.c_str(), FILE_WRITE);
+  readItemContentsToStream(tocNcxItem, tempNcxFile, 1024);
+  tempNcxFile.close();
+  tempNcxFile = SD.open(tmpNcxPath.c_str(), FILE_READ);
+  const auto ncxSize = tempNcxFile.size();

-  TocNcxParser ncxParser(contentBasePath, tocSize);
+  TocNcxParser ncxParser(contentBasePath, ncxSize);

  if (!ncxParser.setup()) {
    Serial.printf("[%lu] [EBP] Could not setup toc ncx parser\n", millis());
    return false;
  }

-  if (!readItemContentsToStream(tocNcxItem, ncxParser, 1024)) {
-    Serial.printf("[%lu] [EBP] Could not read toc ncx stream\n", millis());
+  const auto ncxBuffer = static_cast<uint8_t*>(malloc(1024));
+  if (!ncxBuffer) {
+    Serial.printf("[%lu] [EBP] Could not allocate memory for toc ncx parser\n", millis());
    return false;
  }

+  while (tempNcxFile.available()) {
+    const auto readSize = tempNcxFile.read(ncxBuffer, 1024);
+    const auto processedSize = ncxParser.write(ncxBuffer, readSize);
+
+    if (processedSize != readSize) {
+      Serial.printf("[%lu] [EBP] Could not process all toc ncx data\n", millis());
+      free(ncxBuffer);
+      tempNcxFile.close();
+      return false;
+    }
+  }
+
+  free(ncxBuffer);
+  tempNcxFile.close();
+  SD.remove(tmpNcxPath.c_str());
+
  this->toc = std::move(ncxParser.toc);

  Serial.printf("[%lu] [EBP] Parsed %d TOC items\n", millis(), this->toc.size());
@@ -293,7 +311,7 @@ std::string& Epub::getSpineItem(const int spineIndex) {
 }

 EpubTocEntry& Epub::getTocItem(const int tocTndex) {
-  static EpubTocEntry emptyEntry("", "", "", 0);
+  static EpubTocEntry emptyEntry = {};
  if (toc.empty()) {
    Serial.printf("[%lu] [EBP] getTocItem called but toc is empty\n", millis());
    return emptyEntry;
--- a/lib/Epub/Epub/EpubTocEntry.h
+++ b/lib/Epub/Epub/EpubTocEntry.h
@@ -2,12 +2,9 @@

 #include <string>

-class EpubTocEntry {
- public:
+struct EpubTocEntry {
  std::string title;
  std::string href;
  std::string anchor;
-  int level;
-  EpubTocEntry(std::string title, std::string href, std::string anchor, const int level)
-      : title(std::move(title)), href(std::move(href)), anchor(std::move(anchor)), level(level) {}
+  uint8_t level;
 };
--- a/lib/Epub/Epub/parsers/TocNcxParser.cpp
+++ b/lib/Epub/Epub/parsers/TocNcxParser.cpp
@@ -155,7 +155,7 @@ void XMLCALL TocNcxParser::endElement(void* userData, const XML_Char* name) {
      }

      // Push to vector
-      self->toc.emplace_back(self->currentLabel, href, anchor, self->currentDepth);
+      self->toc.push_back({std::move(self->currentLabel), std::move(href), std::move(anchor), self->currentDepth});

      // Clear them so we don't re-add them if there are weird XML structures
      self->currentLabel.clear();
--- a/lib/Epub/Epub/parsers/TocNcxParser.h
+++ b/lib/Epub/Epub/parsers/TocNcxParser.h
@@ -17,7 +17,7 @@ class TocNcxParser final : public Print {

  std::string currentLabel;
  std::string currentSrc;
-  size_t currentDepth = 0;
+  uint8_t currentDepth = 0;

  static void startElement(void* userData, const XML_Char* name, const XML_Char** atts);
  static void characterData(void* userData, const XML_Char* s, int len);