#include "ContentOpfParser.h" #include #include #include #include #include "../BookMetadataCache.h" namespace { constexpr char MEDIA_TYPE_NCX[] = "application/x-dtbncx+xml"; constexpr char itemCacheFile[] = "/.items.bin"; } // namespace bool ContentOpfParser::setup() { parser = XML_ParserCreate(nullptr); if (!parser) { Serial.printf("[%lu] [COF] Couldn't allocate memory for parser\n", millis()); return false; } XML_SetUserData(parser, this); XML_SetElementHandler(parser, startElement, endElement); XML_SetCharacterDataHandler(parser, characterData); return true; } ContentOpfParser::~ContentOpfParser() { if (parser) { XML_StopParser(parser, XML_FALSE); // Stop any pending processing XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks XML_SetCharacterDataHandler(parser, nullptr); XML_ParserFree(parser); parser = nullptr; } if (tempItemStore) { tempItemStore.close(); } if (SD.exists((cachePath + itemCacheFile).c_str())) { SD.remove((cachePath + itemCacheFile).c_str()); } } size_t ContentOpfParser::write(const uint8_t data) { return write(&data, 1); } size_t ContentOpfParser::write(const uint8_t* buffer, const size_t size) { if (!parser) return 0; const uint8_t* currentBufferPos = buffer; auto remainingInBuffer = size; while (remainingInBuffer > 0) { void* const buf = XML_GetBuffer(parser, 1024); if (!buf) { Serial.printf("[%lu] [COF] Couldn't allocate memory for buffer\n", millis()); XML_StopParser(parser, XML_FALSE); // Stop any pending processing XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks XML_SetCharacterDataHandler(parser, nullptr); XML_ParserFree(parser); parser = nullptr; return 0; } const auto toRead = remainingInBuffer < 1024 ? remainingInBuffer : 1024; memcpy(buf, currentBufferPos, toRead); if (XML_ParseBuffer(parser, static_cast(toRead), remainingSize == toRead) == XML_STATUS_ERROR) { Serial.printf("[%lu] [COF] Parse error at line %lu: %s\n", millis(), XML_GetCurrentLineNumber(parser), XML_ErrorString(XML_GetErrorCode(parser))); XML_StopParser(parser, XML_FALSE); // Stop any pending processing XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks XML_SetCharacterDataHandler(parser, nullptr); XML_ParserFree(parser); parser = nullptr; return 0; } currentBufferPos += toRead; remainingInBuffer -= toRead; remainingSize -= toRead; } return size; } void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { auto* self = static_cast(userData); (void)atts; if (self->state == START && (strcmp(name, "package") == 0 || strcmp(name, "opf:package") == 0)) { self->state = IN_PACKAGE; return; } if (self->state == IN_PACKAGE && (strcmp(name, "metadata") == 0 || strcmp(name, "opf:metadata") == 0)) { self->state = IN_METADATA; return; } if (self->state == IN_METADATA && strcmp(name, "dc:title") == 0) { self->state = IN_BOOK_TITLE; return; } if (self->state == IN_PACKAGE && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) { self->state = IN_MANIFEST; if (!FsHelpers::openFileForWrite("COF", self->cachePath + itemCacheFile, self->tempItemStore)) { Serial.printf( "[%lu] [COF] Couldn't open temp items file for writing. This is probably going to be a fatal error.\n", millis()); } return; } if (self->state == IN_PACKAGE && (strcmp(name, "spine") == 0 || strcmp(name, "opf:spine") == 0)) { self->state = IN_SPINE; if (!FsHelpers::openFileForRead("COF", self->cachePath + itemCacheFile, self->tempItemStore)) { Serial.printf( "[%lu] [COF] Couldn't open temp items file for reading. This is probably going to be a fatal error.\n", millis()); } return; } if (self->state == IN_METADATA && (strcmp(name, "meta") == 0 || strcmp(name, "opf:meta") == 0)) { bool isCover = false; std::string coverItemId; for (int i = 0; atts[i]; i += 2) { if (strcmp(atts[i], "name") == 0 && strcmp(atts[i + 1], "cover") == 0) { isCover = true; } else if (strcmp(atts[i], "content") == 0) { coverItemId = atts[i + 1]; } } if (isCover) { self->coverItemId = coverItemId; } return; } if (self->state == IN_MANIFEST && (strcmp(name, "item") == 0 || strcmp(name, "opf:item") == 0)) { std::string itemId; std::string href; std::string mediaType; for (int i = 0; atts[i]; i += 2) { if (strcmp(atts[i], "id") == 0) { itemId = atts[i + 1]; } else if (strcmp(atts[i], "href") == 0) { href = self->baseContentPath + atts[i + 1]; } else if (strcmp(atts[i], "media-type") == 0) { mediaType = atts[i + 1]; } } // Write items down to SD card serialization::writeString(self->tempItemStore, itemId); serialization::writeString(self->tempItemStore, href); if (itemId == self->coverItemId) { self->coverItemHref = href; } if (mediaType == MEDIA_TYPE_NCX) { if (self->tocNcxPath.empty()) { self->tocNcxPath = href; } else { Serial.printf("[%lu] [COF] Warning: Multiple NCX files found in manifest. Ignoring duplicate: %s\n", millis(), href.c_str()); } } return; } // NOTE: This relies on spine appearing after item manifest (which is pretty safe as it's part of the EPUB spec) // Only run the spine parsing if there's a cache to add it to if (self->cache) { if (self->state == IN_SPINE && (strcmp(name, "itemref") == 0 || strcmp(name, "opf:itemref") == 0)) { for (int i = 0; atts[i]; i += 2) { if (strcmp(atts[i], "idref") == 0) { const std::string idref = atts[i + 1]; // Resolve the idref to href using items map self->tempItemStore.seek(0); std::string itemId; std::string href; while (self->tempItemStore.available()) { serialization::readString(self->tempItemStore, itemId); serialization::readString(self->tempItemStore, href); if (itemId == idref) { self->cache->createSpineEntry(href); break; } } } } return; } } } void XMLCALL ContentOpfParser::characterData(void* userData, const XML_Char* s, const int len) { auto* self = static_cast(userData); if (self->state == IN_BOOK_TITLE) { self->title.append(s, len); return; } } void XMLCALL ContentOpfParser::endElement(void* userData, const XML_Char* name) { auto* self = static_cast(userData); (void)name; if (self->state == IN_SPINE && (strcmp(name, "spine") == 0 || strcmp(name, "opf:spine") == 0)) { self->state = IN_PACKAGE; self->tempItemStore.close(); return; } if (self->state == IN_MANIFEST && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) { self->state = IN_PACKAGE; self->tempItemStore.close(); return; } if (self->state == IN_BOOK_TITLE && strcmp(name, "dc:title") == 0) { self->state = IN_METADATA; return; } if (self->state == IN_METADATA && (strcmp(name, "metadata") == 0 || strcmp(name, "opf:metadata") == 0)) { self->state = IN_PACKAGE; return; } if (self->state == IN_PACKAGE && (strcmp(name, "package") == 0 || strcmp(name, "opf:package") == 0)) { self->state = START; return; } }