Files
crosspoint-reader/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
Tannay dd280bdc97 Rotation Support (#77)
•  What is the goal of this PR?  
Implement a horizontal EPUB reading mode so books can be read in
landscape orientation (both 90° and 270°), while keeping the rest of the
UI in portrait.

•  What changes are included?
◦  Rendering / Display
▪ Added an orientation model to GfxRenderer (Portrait, LandscapeNormal,
LandscapeFlipped) and made:
▪ drawPixel, drawImage, displayWindow map logical coordinates
differently depending on orientation.
▪ getScreenWidth() / getScreenHeight() return orientation‑aware logical
dimensions (480×800 in portrait, 800×480 in landscape).
◦  Settings / Configuration
▪  Extended CrossPointSettings with:
▪  landscapeReading (toggle for portrait vs. landscape EPUB reading).
▪ landscapeFlipped (toggle to flip landscape 180° so both horizontal
holding directions are supported).
▪ Updated settings serialization/deserialization to persist these fields
while remaining backward‑compatible with existing settings files.
▪  Updated SettingsActivity to expose two new toggles:
▪  “Landscape Reading”
▪  “Flip Landscape (swap top/bottom)”
◦  EPUB Reader
▪  In EpubReaderActivity:
▪ On onEnter, set GfxRenderer orientation based on the new settings
(Portrait, LandscapeNormal, or LandscapeFlipped).
▪ On onExit, reset orientation back to Portrait so Home, WiFi, Settings,
etc. continue to render as before.
▪ Adjusted renderStatusBar to position the status bar and battery
indicator relative to GfxRenderer::getScreenHeight() instead of
hard‑coded Y coordinates, so it stays correctly at the bottom in both
portrait and landscape.
◦  EPUB Caching / Layout
▪ Extended Section cache metadata (section.bin) to include the logical
screenWidth and screenHeight used when pages were generated; bumped
SECTION_FILE_VERSION.
▪  Updated loadCacheMetadata to compare:
▪ font/margins/line compression/extraParagraphSpacing and screen
dimensions; mismatches now invalidate and clear the cache.
▪ Updated persistPageDataToSD and all call sites in EpubReaderActivity
to pass the current GfxRenderer::getScreenWidth() / getScreenHeight() so
portrait and landscape caches are kept separate and correctly sized.



Additional Context

•  Cache behavior / migration
◦ Existing section.bin files (old SECTION_FILE_VERSION) will be detected
as incompatible and their caches cleared and rebuilt once per chapter
when first opened after this change.
◦ Within a given orientation, caches will be reused as before. Switching
orientation (portrait ↔ landscape) will cause a one‑time re‑index of
each chapter in the new orientation.
•  Scope and risks
◦ Orientation changes are scoped to the EPUB reader; the Home screen,
Settings, WiFi selection, sleep screens, and web server UI continue to
assume portrait orientation.
◦ The renderer’s orientation is a static/global setting; if future code
uses GfxRenderer outside the reader while a reader instance is active,
it should be aware that orientation is no longer implicitly fixed.
◦ All drawing primitives now go through orientation‑aware coordinate
transforms; any code that previously relied on edge‑case behavior or
out‑of‑bounds writes might surface as logged “Outside range” warnings
instead.
•  Testing suggestions / areas to focus on
◦  Verify in hardware:
▪ Portrait mode still renders correctly (boot, home, settings, WiFi,
reader).
▪  Landscape reading in both directions:
▪  Landscape Reading = ON, Flip Landscape = OFF.
▪  Landscape Reading = ON, Flip Landscape = ON.
▪ Status bar (page X/Y, % progress, battery icon) is fully visible and
aligned at the bottom in all three combinations.
◦  Open the same book:
▪  In portrait first, then switch to landscape and reopen it.
▪  Confirm that:
▪ Old portrait caches are rebuilt once for landscape (you should see the
“Indexing…” page).
▪ Progress save/restore still works (resume opens to the correct page in
the current orientation).
◦ Ensure grayscale rendering (the secondary pass in
EpubReaderActivity::renderContents) still looks correct in both
orientations.

---------

Co-authored-by: Dave Allie <dave@daveallie.com>
2025-12-28 21:33:20 +11:00

335 lines
11 KiB
C++

#include "ChapterHtmlSlimParser.h"
#include <FsHelpers.h>
#include <GfxRenderer.h>
#include <HardwareSerial.h>
#include <expat.h>
#include "../Page.h"
#include "../htmlEntities.h"
const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"};
constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]);
// Minimum file size (in bytes) to show progress bar - smaller chapters don't benefit from it
constexpr size_t MIN_SIZE_FOR_PROGRESS = 50 * 1024; // 50KB
const char* BLOCK_TAGS[] = {"p", "li", "div", "br", "blockquote"};
constexpr int NUM_BLOCK_TAGS = sizeof(BLOCK_TAGS) / sizeof(BLOCK_TAGS[0]);
const char* BOLD_TAGS[] = {"b", "strong"};
constexpr int NUM_BOLD_TAGS = sizeof(BOLD_TAGS) / sizeof(BOLD_TAGS[0]);
const char* ITALIC_TAGS[] = {"i", "em"};
constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]);
const char* IMAGE_TAGS[] = {"img"};
constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]);
const char* SKIP_TAGS[] = {"head", "table"};
constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]);
bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n' || c == '\t'; }
// given the start and end of a tag, check to see if it matches a known tag
bool matches(const char* tag_name, const char* possible_tags[], const int possible_tag_count) {
for (int i = 0; i < possible_tag_count; i++) {
if (strcmp(tag_name, possible_tags[i]) == 0) {
return true;
}
}
return false;
}
// start a new text block if needed
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::BLOCK_STYLE style) {
if (currentTextBlock) {
// already have a text block running and it is empty - just reuse it
if (currentTextBlock->isEmpty()) {
currentTextBlock->setStyle(style);
return;
}
makePages();
}
currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing));
}
void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
(void)atts;
// Middle of skip
if (self->skipUntilDepth < self->depth) {
self->depth += 1;
return;
}
if (matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS)) {
// TODO: Start processing image tags
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
if (matches(name, SKIP_TAGS, NUM_SKIP_TAGS)) {
// start skip
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
// Skip blocks with role="doc-pagebreak" and epub:type="pagebreak"
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "role") == 0 && strcmp(atts[i + 1], "doc-pagebreak") == 0 ||
strcmp(atts[i], "epub:type") == 0 && strcmp(atts[i + 1], "pagebreak") == 0) {
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
}
}
if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) {
self->startNewTextBlock(TextBlock::CENTER_ALIGN);
self->boldUntilDepth = min(self->boldUntilDepth, self->depth);
} else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) {
if (strcmp(name, "br") == 0) {
self->startNewTextBlock(self->currentTextBlock->getStyle());
} else {
self->startNewTextBlock(TextBlock::JUSTIFIED);
}
} else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) {
self->boldUntilDepth = min(self->boldUntilDepth, self->depth);
} else if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) {
self->italicUntilDepth = min(self->italicUntilDepth, self->depth);
}
self->depth += 1;
}
void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char* s, const int len) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
// Middle of skip
if (self->skipUntilDepth < self->depth) {
return;
}
EpdFontStyle fontStyle = REGULAR;
if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) {
fontStyle = BOLD_ITALIC;
} else if (self->boldUntilDepth < self->depth) {
fontStyle = BOLD;
} else if (self->italicUntilDepth < self->depth) {
fontStyle = ITALIC;
}
for (int i = 0; i < len; i++) {
if (isWhitespace(s[i])) {
// Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
if (self->partWordBufferIndex > 0) {
self->partWordBuffer[self->partWordBufferIndex] = '\0';
self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
self->partWordBufferIndex = 0;
}
// Skip the whitespace char
continue;
}
// If we're about to run out of space, then cut the word off and start a new one
if (self->partWordBufferIndex >= MAX_WORD_SIZE) {
self->partWordBuffer[self->partWordBufferIndex] = '\0';
self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
self->partWordBufferIndex = 0;
}
self->partWordBuffer[self->partWordBufferIndex++] = s[i];
}
// If we have > 750 words buffered up, perform the layout and consume out all but the last line
// There should be enough here to build out 1-2 full pages and doing this will free up a lot of
// memory.
// Spotted when reading Intermezzo, there are some really long text blocks in there.
if (self->currentTextBlock->size() > 750) {
Serial.printf("[%lu] [EHP] Text block too long, splitting into multiple pages\n", millis());
self->currentTextBlock->layoutAndExtractLines(
self->renderer, self->fontId, self->viewportWidth,
[self](const std::shared_ptr<TextBlock>& textBlock) { self->addLineToPage(textBlock); }, false);
}
}
void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
(void)name;
if (self->partWordBufferIndex > 0) {
// Only flush out part word buffer if we're closing a block tag or are at the top of the HTML file.
// We don't want to flush out content when closing inline tags like <span>.
// Currently this also flushes out on closing <b> and <i> tags, but they are line tags so that shouldn't happen,
// text styling needs to be overhauled to fix it.
const bool shouldBreakText =
matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) ||
matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || self->depth == 1;
if (shouldBreakText) {
EpdFontStyle fontStyle = REGULAR;
if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) {
fontStyle = BOLD_ITALIC;
} else if (self->boldUntilDepth < self->depth) {
fontStyle = BOLD;
} else if (self->italicUntilDepth < self->depth) {
fontStyle = ITALIC;
}
self->partWordBuffer[self->partWordBufferIndex] = '\0';
self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
self->partWordBufferIndex = 0;
}
}
self->depth -= 1;
// Leaving skip
if (self->skipUntilDepth == self->depth) {
self->skipUntilDepth = INT_MAX;
}
// Leaving bold
if (self->boldUntilDepth == self->depth) {
self->boldUntilDepth = INT_MAX;
}
// Leaving italic
if (self->italicUntilDepth == self->depth) {
self->italicUntilDepth = INT_MAX;
}
}
bool ChapterHtmlSlimParser::parseAndBuildPages() {
startNewTextBlock(TextBlock::JUSTIFIED);
const XML_Parser parser = XML_ParserCreate(nullptr);
int done;
if (!parser) {
Serial.printf("[%lu] [EHP] Couldn't allocate memory for parser\n", millis());
return false;
}
File file;
if (!FsHelpers::openFileForRead("EHP", filepath, file)) {
XML_ParserFree(parser);
return false;
}
// Get file size for progress calculation
const size_t totalSize = file.size();
size_t bytesRead = 0;
int lastProgress = -1;
XML_SetUserData(parser, this);
XML_SetElementHandler(parser, startElement, endElement);
XML_SetCharacterDataHandler(parser, characterData);
do {
void* const buf = XML_GetBuffer(parser, 1024);
if (!buf) {
Serial.printf("[%lu] [EHP] Couldn't allocate memory for buffer\n", millis());
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
XML_SetCharacterDataHandler(parser, nullptr);
XML_ParserFree(parser);
file.close();
return false;
}
const size_t len = file.read(static_cast<uint8_t*>(buf), 1024);
if (len == 0) {
Serial.printf("[%lu] [EHP] File read error\n", millis());
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
XML_SetCharacterDataHandler(parser, nullptr);
XML_ParserFree(parser);
file.close();
return false;
}
// Update progress (call every 10% change to avoid too frequent updates)
// Only show progress for larger chapters where rendering overhead is worth it
bytesRead += len;
if (progressFn && totalSize >= MIN_SIZE_FOR_PROGRESS) {
const int progress = static_cast<int>((bytesRead * 100) / totalSize);
if (lastProgress / 10 != progress / 10) {
lastProgress = progress;
progressFn(progress);
}
}
done = file.available() == 0;
if (XML_ParseBuffer(parser, static_cast<int>(len), done) == XML_STATUS_ERROR) {
Serial.printf("[%lu] [EHP] Parse error at line %lu:\n%s\n", millis(), XML_GetCurrentLineNumber(parser),
XML_ErrorString(XML_GetErrorCode(parser)));
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
XML_SetCharacterDataHandler(parser, nullptr);
XML_ParserFree(parser);
file.close();
return false;
}
} while (!done);
XML_StopParser(parser, XML_FALSE); // Stop any pending processing
XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks
XML_SetCharacterDataHandler(parser, nullptr);
XML_ParserFree(parser);
file.close();
// Process last page if there is still text
if (currentTextBlock) {
makePages();
completePageFn(std::move(currentPage));
currentPage.reset();
currentTextBlock.reset();
}
return true;
}
void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr<TextBlock> line) {
const int lineHeight = renderer.getLineHeight(fontId) * lineCompression;
if (currentPageNextY + lineHeight > viewportHeight) {
completePageFn(std::move(currentPage));
currentPage.reset(new Page());
currentPageNextY = 0;
}
currentPage->elements.push_back(std::make_shared<PageLine>(line, 0, currentPageNextY));
currentPageNextY += lineHeight;
}
void ChapterHtmlSlimParser::makePages() {
if (!currentTextBlock) {
Serial.printf("[%lu] [EHP] !! No text block to make pages for !!\n", millis());
return;
}
if (!currentPage) {
currentPage.reset(new Page());
currentPageNextY = 0;
}
const int lineHeight = renderer.getLineHeight(fontId) * lineCompression;
currentTextBlock->layoutAndExtractLines(
renderer, fontId, viewportWidth,
[this](const std::shared_ptr<TextBlock>& textBlock) { addLineToPage(textBlock); });
// Extra paragraph spacing if enabled
if (extraParagraphSpacing) {
currentPageNextY += lineHeight / 2;
}
}