Consolidate chapter page data into single file (#144)

## Summary

* Consolidate chapter page data into single file
* Header structure of the file stays the same, following the page count,
we now put a LUT offset
   * The page data is all then appended to this file
* Finally the LUT is appended to the end of the file, and the page count
is updated
* This will also significantly improve the duration of cache cleanup
which takes a while to scan the directory and cleanup content
* Remove page file version as it's all tied up into the section file now
* Bumped section file version to 7
* Moved section content into sub directory
* Updated docs

## Additional Context

* Benchmarks:
  * Generating 74 pages of content from a chapter in Jade Legacy took:
    * master: 6,229ms
    * this PR: 1,305ms
    * Speedup of 79%
  * Generating 207 pages of content from Livesuit book:
    * With progress bar UI updates:
      * master: 24,250ms
      * this PR: 8,063ms
      * Speedup of 67%
    * Without progress bar UI updates:
      * master: 13,055ms
      * this PR: 3,600ms
      * Speedup of 72%
This commit is contained in:
Dave Allie
2025-12-29 12:19:54 +10:00
committed by GitHub
parent b1763821b5
commit 534504cf7a
11 changed files with 162 additions and 143 deletions

View File

@@ -3,20 +3,16 @@
#include <HardwareSerial.h>
#include <Serialization.h>
namespace {
constexpr uint8_t PAGE_FILE_VERSION = 3;
}
void PageLine::render(GfxRenderer& renderer, const int fontId, const int xOffset, const int yOffset) {
block->render(renderer, fontId, xPos + xOffset, yPos + yOffset);
}
void PageLine::serialize(File& file) {
bool PageLine::serialize(File& file) {
serialization::writePod(file, xPos);
serialization::writePod(file, yPos);
// serialize TextBlock pointed to by PageLine
block->serialize(file);
return block->serialize(file);
}
std::unique_ptr<PageLine> PageLine::deserialize(File& file) {
@@ -35,27 +31,22 @@ void Page::render(GfxRenderer& renderer, const int fontId, const int xOffset, co
}
}
void Page::serialize(File& file) const {
serialization::writePod(file, PAGE_FILE_VERSION);
bool Page::serialize(File& file) const {
const uint32_t count = elements.size();
serialization::writePod(file, count);
for (const auto& el : elements) {
// Only PageLine exists currently
serialization::writePod(file, static_cast<uint8_t>(TAG_PageLine));
el->serialize(file);
if (!el->serialize(file)) {
return false;
}
}
return true;
}
std::unique_ptr<Page> Page::deserialize(File& file) {
uint8_t version;
serialization::readPod(file, version);
if (version != PAGE_FILE_VERSION) {
Serial.printf("[%lu] [PGE] Deserialization failed: Unknown version %u\n", millis(), version);
return nullptr;
}
auto page = std::unique_ptr<Page>(new Page());
uint32_t count;

View File

@@ -18,7 +18,7 @@ class PageElement {
explicit PageElement(const int16_t xPos, const int16_t yPos) : xPos(xPos), yPos(yPos) {}
virtual ~PageElement() = default;
virtual void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) = 0;
virtual void serialize(File& file) = 0;
virtual bool serialize(File& file) = 0;
};
// a line from a block element
@@ -29,7 +29,7 @@ class PageLine final : public PageElement {
PageLine(std::shared_ptr<TextBlock> block, const int16_t xPos, const int16_t yPos)
: PageElement(xPos, yPos), block(std::move(block)) {}
void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) override;
void serialize(File& file) override;
bool serialize(File& file) override;
static std::unique_ptr<PageLine> deserialize(File& file);
};
@@ -38,6 +38,6 @@ class Page {
// the list of block index and line numbers on this page
std::vector<std::shared_ptr<PageElement>> elements;
void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) const;
void serialize(File& file) const;
bool serialize(File& file) const;
static std::unique_ptr<Page> deserialize(File& file);
};

View File

@@ -8,54 +8,60 @@
#include "parsers/ChapterHtmlSlimParser.h"
namespace {
constexpr uint8_t SECTION_FILE_VERSION = 6;
constexpr uint8_t SECTION_FILE_VERSION = 7;
constexpr size_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(int) +
sizeof(int) + sizeof(int) + sizeof(size_t);
} // namespace
void Section::onPageComplete(std::unique_ptr<Page> page) {
const auto filePath = cachePath + "/page_" + std::to_string(pageCount) + ".bin";
File outputFile;
if (!FsHelpers::openFileForWrite("SCT", filePath, outputFile)) {
return;
size_t Section::onPageComplete(std::unique_ptr<Page> page) {
if (!file) {
Serial.printf("[%lu] [SCT] File not open for writing page %d\n", millis(), pageCount);
return 0;
}
page->serialize(outputFile);
outputFile.close();
const auto position = file.position();
if (!page->serialize(file)) {
Serial.printf("[%lu] [SCT] Failed to serialize page %d\n", millis(), pageCount);
return 0;
}
Serial.printf("[%lu] [SCT] Page %d processed\n", millis(), pageCount);
pageCount++;
return position;
}
void Section::writeCacheMetadata(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight) const {
File outputFile;
if (!FsHelpers::openFileForWrite("SCT", cachePath + "/section.bin", outputFile)) {
void Section::writeSectionFileHeader(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight) {
if (!file) {
Serial.printf("[%lu] [SCT] File not open for writing header\n", millis());
return;
}
serialization::writePod(outputFile, SECTION_FILE_VERSION);
serialization::writePod(outputFile, fontId);
serialization::writePod(outputFile, lineCompression);
serialization::writePod(outputFile, extraParagraphSpacing);
serialization::writePod(outputFile, viewportWidth);
serialization::writePod(outputFile, viewportHeight);
serialization::writePod(outputFile, pageCount);
outputFile.close();
static_assert(HEADER_SIZE == sizeof(SECTION_FILE_VERSION) + sizeof(fontId) + sizeof(lineCompression) +
sizeof(extraParagraphSpacing) + sizeof(viewportWidth) + sizeof(viewportHeight) +
sizeof(pageCount) + sizeof(size_t),
"Header size mismatch");
serialization::writePod(file, SECTION_FILE_VERSION);
serialization::writePod(file, fontId);
serialization::writePod(file, lineCompression);
serialization::writePod(file, extraParagraphSpacing);
serialization::writePod(file, viewportWidth);
serialization::writePod(file, viewportHeight);
serialization::writePod(file, pageCount); // Placeholder for page count (will be initially 0 when written)
serialization::writePod(file, static_cast<size_t>(0)); // Placeholder for LUT offset
}
bool Section::loadCacheMetadata(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight) {
const auto sectionFilePath = cachePath + "/section.bin";
File inputFile;
if (!FsHelpers::openFileForRead("SCT", sectionFilePath, inputFile)) {
bool Section::loadSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight) {
if (!FsHelpers::openFileForRead("SCT", filePath, file)) {
return false;
}
// Match parameters
{
uint8_t version;
serialization::readPod(inputFile, version);
serialization::readPod(file, version);
if (version != SECTION_FILE_VERSION) {
inputFile.close();
file.close();
Serial.printf("[%lu] [SCT] Deserialization failed: Unknown version %u\n", millis(), version);
clearCache();
return false;
@@ -64,41 +70,36 @@ bool Section::loadCacheMetadata(const int fontId, const float lineCompression, c
int fileFontId, fileViewportWidth, fileViewportHeight;
float fileLineCompression;
bool fileExtraParagraphSpacing;
serialization::readPod(inputFile, fileFontId);
serialization::readPod(inputFile, fileLineCompression);
serialization::readPod(inputFile, fileExtraParagraphSpacing);
serialization::readPod(inputFile, fileViewportWidth);
serialization::readPod(inputFile, fileViewportHeight);
serialization::readPod(file, fileFontId);
serialization::readPod(file, fileLineCompression);
serialization::readPod(file, fileExtraParagraphSpacing);
serialization::readPod(file, fileViewportWidth);
serialization::readPod(file, fileViewportHeight);
if (fontId != fileFontId || lineCompression != fileLineCompression ||
extraParagraphSpacing != fileExtraParagraphSpacing || viewportWidth != fileViewportWidth ||
viewportHeight != fileViewportHeight) {
inputFile.close();
file.close();
Serial.printf("[%lu] [SCT] Deserialization failed: Parameters do not match\n", millis());
clearCache();
return false;
}
}
serialization::readPod(inputFile, pageCount);
inputFile.close();
serialization::readPod(file, pageCount);
file.close();
Serial.printf("[%lu] [SCT] Deserialization succeeded: %d pages\n", millis(), pageCount);
return true;
}
void Section::setupCacheDir() const {
epub->setupCacheDir();
SD.mkdir(cachePath.c_str());
}
// Your updated class method (assuming you are using the 'SD' object, which is a wrapper for a specific filesystem)
bool Section::clearCache() const {
if (!SD.exists(cachePath.c_str())) {
if (!SD.exists(filePath.c_str())) {
Serial.printf("[%lu] [SCT] Cache does not exist, no action needed\n", millis());
return true;
}
if (!FsHelpers::removeDir(cachePath.c_str())) {
if (!SD.remove(filePath.c_str())) {
Serial.printf("[%lu] [SCT] Failed to clear cache\n", millis());
return false;
}
@@ -107,10 +108,10 @@ bool Section::clearCache() const {
return true;
}
bool Section::persistPageDataToSD(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight,
const std::function<void()>& progressSetupFn,
const std::function<void(int)>& progressFn) {
bool Section::createSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight,
const std::function<void()>& progressSetupFn,
const std::function<void(int)>& progressFn) {
constexpr size_t MIN_SIZE_FOR_PROGRESS = 50 * 1024; // 50KB
const auto localPath = epub->getSpineItem(spineIndex).href;
const auto tmpHtmlPath = epub->getCachePath() + "/.tmp_" + std::to_string(spineIndex) + ".html";
@@ -156,30 +157,66 @@ bool Section::persistPageDataToSD(const int fontId, const float lineCompression,
progressSetupFn();
}
if (!FsHelpers::openFileForWrite("SCT", filePath, file)) {
return false;
}
writeSectionFileHeader(fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight);
std::vector<size_t> lut = {};
ChapterHtmlSlimParser visitor(
tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight,
[this](std::unique_ptr<Page> page) { this->onPageComplete(std::move(page)); }, progressFn);
[this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); },
progressFn);
success = visitor.parseAndBuildPages();
SD.remove(tmpHtmlPath.c_str());
if (!success) {
Serial.printf("[%lu] [SCT] Failed to parse XML and build pages\n", millis());
file.close();
SD.remove(filePath.c_str());
return false;
}
writeCacheMetadata(fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight);
const auto lutOffset = file.position();
bool hasFailedLutRecords = false;
// Write LUT
for (const auto& pos : lut) {
if (pos == 0) {
hasFailedLutRecords = true;
break;
}
serialization::writePod(file, pos);
}
if (hasFailedLutRecords) {
Serial.printf("[%lu] [SCT] Failed to write LUT due to invalid page positions\n", millis());
file.close();
SD.remove(filePath.c_str());
return false;
}
// Go back and write LUT offset
file.seek(HEADER_SIZE - sizeof(size_t) - sizeof(pageCount));
serialization::writePod(file, pageCount);
serialization::writePod(file, lutOffset);
file.close();
return true;
}
std::unique_ptr<Page> Section::loadPageFromSD() const {
const auto filePath = cachePath + "/page_" + std::to_string(currentPage) + ".bin";
File inputFile;
if (!FsHelpers::openFileForRead("SCT", filePath, inputFile)) {
std::unique_ptr<Page> Section::loadPageFromSectionFile() {
if (!FsHelpers::openFileForRead("SCT", filePath, file)) {
return nullptr;
}
auto page = Page::deserialize(inputFile);
inputFile.close();
file.seek(HEADER_SIZE - sizeof(size_t));
size_t lutOffset;
serialization::readPod(file, lutOffset);
file.seek(lutOffset + sizeof(size_t) * currentPage);
size_t pagePos;
serialization::readPod(file, pagePos);
file.seek(pagePos);
auto page = Page::deserialize(file);
file.close();
return page;
}

View File

@@ -11,11 +11,12 @@ class Section {
std::shared_ptr<Epub> epub;
const int spineIndex;
GfxRenderer& renderer;
std::string cachePath;
std::string filePath;
File file;
void writeCacheMetadata(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth,
int viewportHeight) const;
void onPageComplete(std::unique_ptr<Page> page);
void writeSectionFileHeader(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth,
int viewportHeight);
size_t onPageComplete(std::unique_ptr<Page> page);
public:
int pageCount = 0;
@@ -25,14 +26,13 @@ class Section {
: epub(epub),
spineIndex(spineIndex),
renderer(renderer),
cachePath(epub->getCachePath() + "/" + std::to_string(spineIndex)) {}
filePath(epub->getCachePath() + "/sections/" + std::to_string(spineIndex) + ".bin") {}
~Section() = default;
bool loadCacheMetadata(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth,
int viewportHeight);
void setupCacheDir() const;
bool loadSectionFile(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth,
int viewportHeight);
bool clearCache() const;
bool persistPageDataToSD(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth,
int viewportHeight, const std::function<void()>& progressSetupFn = nullptr,
const std::function<void(int)>& progressFn = nullptr);
std::unique_ptr<Page> loadPageFromSD() const;
bool createSectionFile(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth,
int viewportHeight, const std::function<void()>& progressSetupFn = nullptr,
const std::function<void(int)>& progressFn = nullptr);
std::unique_ptr<Page> loadPageFromSectionFile();
};

View File

@@ -24,34 +24,33 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int
}
}
void TextBlock::serialize(File& file) const {
// words
const uint32_t wc = words.size();
serialization::writePod(file, wc);
bool TextBlock::serialize(File& file) const {
if (words.size() != wordXpos.size() || words.size() != wordStyles.size()) {
Serial.printf("[%lu] [TXB] Serialization failed: size mismatch (words=%u, xpos=%u, styles=%u)\n", millis(),
words.size(), wordXpos.size(), wordStyles.size());
return false;
}
// Word data
serialization::writePod(file, static_cast<uint32_t>(words.size()));
for (const auto& w : words) serialization::writeString(file, w);
// wordXpos
const uint32_t xc = wordXpos.size();
serialization::writePod(file, xc);
for (auto x : wordXpos) serialization::writePod(file, x);
// wordStyles
const uint32_t sc = wordStyles.size();
serialization::writePod(file, sc);
for (auto s : wordStyles) serialization::writePod(file, s);
// style
// Block style
serialization::writePod(file, style);
return true;
}
std::unique_ptr<TextBlock> TextBlock::deserialize(File& file) {
uint32_t wc, xc, sc;
uint32_t wc;
std::list<std::string> words;
std::list<uint16_t> wordXpos;
std::list<EpdFontStyle> wordStyles;
BLOCK_STYLE style;
// words
// Word count
serialization::readPod(file, wc);
// Sanity check: prevent allocation of unreasonably large lists (max 10000 words per block)
@@ -60,27 +59,15 @@ std::unique_ptr<TextBlock> TextBlock::deserialize(File& file) {
return nullptr;
}
// Word data
words.resize(wc);
wordXpos.resize(wc);
wordStyles.resize(wc);
for (auto& w : words) serialization::readString(file, w);
// wordXpos
serialization::readPod(file, xc);
wordXpos.resize(xc);
for (auto& x : wordXpos) serialization::readPod(file, x);
// wordStyles
serialization::readPod(file, sc);
wordStyles.resize(sc);
for (auto& s : wordStyles) serialization::readPod(file, s);
// Validate data consistency: all three lists must have the same size
if (wc != xc || wc != sc) {
Serial.printf("[%lu] [TXB] Deserialization failed: size mismatch (words=%u, xpos=%u, styles=%u)\n", millis(), wc,
xc, sc);
return nullptr;
}
// style
// Block style
serialization::readPod(file, style);
return std::unique_ptr<TextBlock>(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style));

View File

@@ -36,6 +36,6 @@ class TextBlock final : public Block {
// given a renderer works out where to break the words into lines
void render(const GfxRenderer& renderer, int fontId, int x, int y) const;
BlockType getType() override { return TEXT_BLOCK; }
void serialize(File& file) const;
bool serialize(File& file) const;
static std::unique_ptr<TextBlock> deserialize(File& file);
};