diff --git a/lib/Epub/Epub/htmlEntities.cpp b/lib/Epub/Epub/htmlEntities.cpp
deleted file mode 100644
index f44a158..0000000
--- a/lib/Epub/Epub/htmlEntities.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-// from
-// https://github.com/atomic14/diy-esp32-epub-reader/blob/2c2f57fdd7e2a788d14a0bcb26b9e845a47aac42/lib/Epub/RubbishHtmlParser/htmlEntities.cpp
-
-#include "htmlEntities.h"
-
-#include
-#include
-
-const int MAX_ENTITY_LENGTH = 10;
-
-// Use book: entities_ww2.epub to test this (Page 7: Entities parser test)
-// Note the supported keys are only in lowercase
-// Store the mappings in a unordered hash map
-static std::unordered_map entity_lookup(
- {{""", "\""}, {"⁄", "⁄"}, {"&", "&"}, {"<", "<"}, {">", ">"},
- {"À", "À"}, {"Á", "Á"}, {"Â", "Â"}, {"Ã", "Ã"}, {"Ä", "Ä"},
- {"Å", "Å"}, {"Æ", "Æ"}, {"Ç", "Ç"}, {"È", "È"}, {"É", "É"},
- {"Ê", "Ê"}, {"Ë", "Ë"}, {"Ì", "Ì"}, {"Í", "Í"}, {"Î", "Î"},
- {"Ï", "Ï"}, {"Ð", "Ð"}, {"Ñ", "Ñ"}, {"Ò", "Ò"}, {"Ó", "Ó"},
- {"Ô", "Ô"}, {"Õ", "Õ"}, {"Ö", "Ö"}, {"Ø", "Ø"}, {"Ù", "Ù"},
- {"Ú", "Ú"}, {"Û", "Û"}, {"Ü", "Ü"}, {"Ý", "Ý"}, {"Þ", "Þ"},
- {"ß", "ß"}, {"à", "à"}, {"á", "á"}, {"â", "â"}, {"ã", "ã"},
- {"ä", "ä"}, {"å", "å"}, {"æ", "æ"}, {"ç", "ç"}, {"è", "è"},
- {"é", "é"}, {"ê", "ê"}, {"ë", "ë"}, {"ì", "ì"}, {"í", "í"},
- {"î", "î"}, {"ï", "ï"}, {"ð", "ð"}, {"ñ", "ñ"}, {"ò", "ò"},
- {"ó", "ó"}, {"ô", "ô"}, {"õ", "õ"}, {"ö", "ö"}, {"ø", "ø"},
- {"ù", "ù"}, {"ú", "ú"}, {"û", "û"}, {"ü", "ü"}, {"ý", "ý"},
- {"þ", "þ"}, {"ÿ", "ÿ"}, {" ", " "}, {"¡", "¡"}, {"¢", "¢"},
- {"£", "£"}, {"¤", "¤"}, {"¥", "¥"}, {"¦", "¦"}, {"§", "§"},
- {"¨", "¨"}, {"©", "©"}, {"ª", "ª"}, {"«", "«"}, {"¬", "¬"},
- {"", ""}, {"®", "®"}, {"¯", "¯"}, {"°", "°"}, {"±", "±"},
- {"²", "²"}, {"³", "³"}, {"´", "´"}, {"µ", "µ"}, {"¶", "¶"},
- {"¸", "¸"}, {"¹", "¹"}, {"º", "º"}, {"»", "»"}, {"¼", "¼"},
- {"½", "½"}, {"¾", "¾"}, {"¿", "¿"}, {"×", "×"}, {"÷", "÷"},
- {"∀", "∀"}, {"∂", "∂"}, {"∃", "∃"}, {"∅", "∅"}, {"∇", "∇"},
- {"∈", "∈"}, {"∉", "∉"}, {"∋", "∋"}, {"∏", "∏"}, {"∑", "∑"},
- {"−", "−"}, {"∗", "∗"}, {"√", "√"}, {"∝", "∝"}, {"∞", "∞"},
- {"∠", "∠"}, {"∧", "∧"}, {"∨", "∨"}, {"∩", "∩"}, {"∪", "∪"},
- {"∫", "∫"}, {"∴", "∴"}, {"∼", "∼"}, {"≅", "≅"}, {"≈", "≈"},
- {"≠", "≠"}, {"≡", "≡"}, {"≤", "≤"}, {"≥", "≥"}, {"⊂", "⊂"},
- {"⊃", "⊃"}, {"⊄", "⊄"}, {"⊆", "⊆"}, {"⊇", "⊇"}, {"⊕", "⊕"},
- {"⊗", "⊗"}, {"⊥", "⊥"}, {"⋅", "⋅"}, {"Α", "Α"}, {"Β", "Β"},
- {"Γ", "Γ"}, {"Δ", "Δ"}, {"Ε", "Ε"}, {"Ζ", "Ζ"}, {"Η", "Η"},
- {"Θ", "Θ"}, {"Ι", "Ι"}, {"Κ", "Κ"}, {"Λ", "Λ"}, {"Μ", "Μ"},
- {"Ν", "Ν"}, {"Ξ", "Ξ"}, {"Ο", "Ο"}, {"Π", "Π"}, {"Ρ", "Ρ"},
- {"Σ", "Σ"}, {"Τ", "Τ"}, {"Υ", "Υ"}, {"Φ", "Φ"}, {"Χ", "Χ"},
- {"Ψ", "Ψ"}, {"Ω", "Ω"}, {"α", "α"}, {"β", "β"}, {"γ", "γ"},
- {"δ", "δ"}, {"ε", "ε"}, {"ζ", "ζ"}, {"η", "η"}, {"θ", "θ"},
- {"ι", "ι"}, {"κ", "κ"}, {"λ", "λ"}, {"μ", "μ"}, {"ν", "ν"},
- {"ξ", "ξ"}, {"ο", "ο"}, {"π", "π"}, {"ρ", "ρ"}, {"ς", "ς"},
- {"σ", "σ"}, {"τ", "τ"}, {"υ", "υ"}, {"φ", "φ"}, {"χ", "χ"},
- {"ψ", "ψ"}, {"ω", "ω"}, {"ϑ", "ϑ"}, {"ϒ", "ϒ"}, {"ϖ", "ϖ"},
- {"Œ", "Œ"}, {"œ", "œ"}, {"Š", "Š"}, {"š", "š"}, {"Ÿ", "Ÿ"},
- {"ƒ", "ƒ"}, {"ˆ", "ˆ"}, {"˜", "˜"}, {" ", ""}, {" ", ""},
- {" ", ""}, {"", ""}, {"", ""}, {"", ""}, {"", ""},
- {"–", "–"}, {"—", "—"}, {"‘", "‘"}, {"’", "’"}, {"‚", "‚"},
- {"“", "“"}, {"”", "”"}, {"„", "„"}, {"†", "†"}, {"‡", "‡"},
- {"•", "•"}, {"…", "…"}, {"‰", "‰"}, {"′", "′"}, {"″", "″"},
- {"‹", "‹"}, {"›", "›"}, {"‾", "‾"}, {"€", "€"}, {"™", "™"},
- {"←", "←"}, {"↑", "↑"}, {"→", "→"}, {"↓", "↓"}, {"↔", "↔"},
- {"↵", "↵"}, {"⌈", "⌈"}, {"⌉", "⌉"}, {"⌊", "⌊"}, {"⌋", "⌋"},
- {"◊", "◊"}, {"♠", "♠"}, {"♣", "♣"}, {"♥", "♥"}, {"♦", "♦"}});
-
-// converts from a unicode code point to the utf8 equivalent
-void convert_to_utf8(const int code, std::string& res) {
- // convert to a utf8 sequence
- if (code < 0x80) {
- res += static_cast(code);
- } else if (code < 0x800) {
- res += static_cast(0xc0 | (code >> 6));
- res += static_cast(0x80 | (code & 0x3f));
- } else if (code < 0x10000) {
- res += static_cast(0xe0 | (code >> 12));
- res += static_cast(0x80 | ((code >> 6) & 0x3f));
- res += static_cast(0x80 | (code & 0x3f));
- } else if (code < 0x200000) {
- res += static_cast(0xf0 | (code >> 18));
- res += static_cast(0x80 | ((code >> 12) & 0x3f));
- res += static_cast(0x80 | ((code >> 6) & 0x3f));
- res += static_cast(0x80 | (code & 0x3f));
- } else if (code < 0x4000000) {
- res += static_cast(0xf8 | (code >> 24));
- res += static_cast(0x80 | ((code >> 18) & 0x3f));
- res += static_cast(0x80 | ((code >> 12) & 0x3f));
- res += static_cast(0x80 | ((code >> 6) & 0x3f));
- res += static_cast(0x80 | (code & 0x3f));
- } else if (code < 0x80000000) {
- res += static_cast(0xfc | (code >> 30));
- res += static_cast(0x80 | ((code >> 24) & 0x3f));
- res += static_cast(0x80 | ((code >> 18) & 0x3f));
- res += static_cast(0x80 | ((code >> 12) & 0x3f));
- res += static_cast(0x80 | ((code >> 6) & 0x3f));
- }
-}
-
-// handles numeric entities - e.g. Ӓ or ሴ
-bool process_numeric_entity(const std::string& entity, std::string& res) {
- int code = 0;
- // is it hex?
- if (entity[2] == 'x' || entity[2] == 'X') {
- // parse the hex code
- code = strtol(entity.substr(3, entity.size() - 3).c_str(), nullptr, 16);
- } else {
- code = strtol(entity.substr(2, entity.size() - 3).c_str(), nullptr, 10);
- }
- if (code != 0) {
- // special handling for nbsp
- if (code == 0xA0) {
- res += " ";
- } else {
- convert_to_utf8(code, res);
- }
- return true;
- }
- return false;
-}
-
-// handles named entities - e.g. &
-bool process_string_entity(const std::string& entity, std::string& res) {
- // it's a named entity - find it in the lookup table
- // find it in the map
- const auto it = entity_lookup.find(entity);
- if (it != entity_lookup.end()) {
- res += it->second;
- return true;
- }
- return false;
-}
-
-// replace all the entities in the string
-std::string replaceHtmlEntities(const char* text) {
- std::string res;
- res.reserve(strlen(text));
- for (int i = 0; i < strlen(text); ++i) {
- bool flag = false;
- // do we have a potential entity?
- if (text[i] == '&') {
- // find the end of the entity
- int j = i + 1;
- while (j < strlen(text) && text[j] != ';' && j - i < MAX_ENTITY_LENGTH) {
- j++;
- }
- if (j - i > 2) {
- char entity[j - i + 1];
- strncpy(entity, text + i, j - i);
- // is it a numeric code?
- if (entity[1] == '#') {
- flag = process_numeric_entity(entity, res);
- } else {
- flag = process_string_entity(entity, res);
- }
- // skip past the entity if we successfully decoded it
- if (flag) {
- i = j;
- }
- }
- }
- if (!flag) {
- res += text[i];
- }
- }
- return res;
-}
diff --git a/lib/Epub/Epub/htmlEntities.h b/lib/Epub/Epub/htmlEntities.h
deleted file mode 100644
index 109f717..0000000
--- a/lib/Epub/Epub/htmlEntities.h
+++ /dev/null
@@ -1,7 +0,0 @@
-// from
-// https://github.com/atomic14/diy-esp32-epub-reader/blob/2c2f57fdd7e2a788d14a0bcb26b9e845a47aac42/lib/Epub/RubbishHtmlParser/htmlEntities.cpp
-
-#pragma once
-#include
-
-std::string replaceHtmlEntities(const char* text);
diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
index e5eb4d1..b96d28f 100644
--- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
+++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
@@ -6,7 +6,6 @@
#include
#include "../Page.h"
-#include "../htmlEntities.h"
const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"};
constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]);
@@ -130,7 +129,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
// Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
if (self->partWordBufferIndex > 0) {
self->partWordBuffer[self->partWordBufferIndex] = '\0';
- self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
+ self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
self->partWordBufferIndex = 0;
}
// Skip the whitespace char
@@ -155,7 +154,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
// If we're about to run out of space, then cut the word off and start a new one
if (self->partWordBufferIndex >= MAX_WORD_SIZE) {
self->partWordBuffer[self->partWordBufferIndex] = '\0';
- self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
+ self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
self->partWordBufferIndex = 0;
}
@@ -197,7 +196,7 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
}
self->partWordBuffer[self->partWordBufferIndex] = '\0';
- self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
+ self->currentTextBlock->addWord(self->partWordBuffer, fontStyle);
self->partWordBufferIndex = 0;
}
}