Commit c529cf7d authored by Fred Chasen's avatar Fred Chasen
Browse files

Initial epub support

parent b6d4ef67
Pipeline #509 passed with stages
in 1 minute and 37 seconds
......@@ -25,6 +25,7 @@ program
// .option("-e, --encoding [type]", "Set the encoding of the input html, defaults to "utf-8"")
.option("-t, --timeout [ms]", "Set a max timeout of [ms]")
.option("-x, --html", "output html file")
.option("-e, --epub", "output epub file")
.option("-b, --blockLocal", "Disallow access to filesystem for local files")
.option("-r, --blockRemote", "Disallow requests to remote servers")
.option("--allowedPath [allowedPaths]", "Only allow access to given filesystem paths, repeatable.", collect, [])
......@@ -142,6 +143,9 @@ if (typeof input === "string") {
if (program.html) {
file = await printer.html(input, options);
output = replaceExt(output, ".html");
} else if (program.epub) {
file = await printer.epub(input, options);
output = replaceExt(output, ".epub");
} else {
options.outlineTags = !program.outlineTags ? [] : program.outlineTags.split(",");
file = await printer.pdf(input, options);
......
{
"name": "pagedjs-cli",
"version": "0.0.10",
"version": "0.1.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
......@@ -74,6 +74,11 @@
"resolved": "https://registry.npmjs.org/@types/mime-types/-/mime-types-2.1.0.tgz",
"integrity": "sha1-nKUs2jY/aZxpRmwqbM2q2RPqenM="
},
"@types/node": {
"version": "14.0.5",
"resolved": "https://registry.npmjs.org/@types/node/-/node-14.0.5.tgz",
"integrity": "sha512-90hiq6/VqtQgX8Sp0EzeIsv3r+ellbGj4URKj5j30tLlZvRUpnAe9YbYnjl3pJM93GyXU0tghHhvXHq+5rnCKA=="
},
"accepts": {
"version": "1.3.7",
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz",
......@@ -194,6 +199,11 @@
"type-is": "~1.6.17"
}
},
"boolbase": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
"integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24="
},
"brace-expansion": {
"version": "1.1.11",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
......@@ -240,6 +250,19 @@
"integrity": "sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA==",
"dev": true
},
"cheerio": {
"version": "1.0.0-rc.3",
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.3.tgz",
"integrity": "sha512-0td5ijfUPuubwLUu0OBoe98gZj8C/AA+RW3v67GPlGOrvxWjZmBXiBCRU+I8VEiNyJzjth40POfHiz2RB3gImA==",
"requires": {
"css-select": "~1.2.0",
"dom-serializer": "~0.1.1",
"entities": "~1.1.1",
"htmlparser2": "^3.9.1",
"lodash": "^4.15.0",
"parse5": "^3.0.1"
}
},
"cli-cursor": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-3.1.0.tgz",
......@@ -352,6 +375,17 @@
}
}
},
"css-select": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-1.2.0.tgz",
"integrity": "sha1-KzoRBTnFNV8c2NMUYj6HCxIeyFg=",
"requires": {
"boolbase": "~1.0.0",
"css-what": "2.1",
"domutils": "1.5.1",
"nth-check": "~1.0.1"
}
},
"css-tree": {
"version": "1.0.0-alpha.39",
"resolved": "https://registry.npmjs.org/css-tree/-/css-tree-1.0.0-alpha.39.tgz",
......@@ -361,6 +395,11 @@
"source-map": "^0.6.1"
}
},
"css-what": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.3.tgz",
"integrity": "sha512-a+EPoD+uZiNfh+5fxw2nO9QwFa6nJe2Or35fGY6Ipw1R3R4AGz1d1TEZrCegvw2YTmZ0jXirGYlzxxpYSHwpEg=="
},
"d": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/d/-/d-1.0.1.tgz",
......@@ -411,6 +450,37 @@
"esutils": "^2.0.2"
}
},
"dom-serializer": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.1.tgz",
"integrity": "sha512-l0IU0pPzLWSHBcieZbpOKgkIn3ts3vAh7ZuFyXNwJxJXk/c4Gwj9xaTJwIDVQCXawWD0qb3IzMGH5rglQaO0XA==",
"requires": {
"domelementtype": "^1.3.0",
"entities": "^1.1.1"
}
},
"domelementtype": {
"version": "1.3.1",
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.3.1.tgz",
"integrity": "sha512-BSKB+TSpMpFI/HOxCNr1O8aMOTZ8hT3pM3GQ0w/mWRmkhEDSFJkkyzz4XQsBV44BChwGkrDfMyjVD0eA2aFV3w=="
},
"domhandler": {
"version": "2.4.2",
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.4.2.tgz",
"integrity": "sha512-JiK04h0Ht5u/80fdLMCEmV4zkNh2BcoMFBmZ/91WtYZ8qVXSKjiw7fXMgFPnHcSZgOo3XdinHvmnDUeMf5R4wA==",
"requires": {
"domelementtype": "1"
}
},
"domutils": {
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/domutils/-/domutils-1.5.1.tgz",
"integrity": "sha1-3NhIiib1Y9YQeeSMn3t+Mjc2gs8=",
"requires": {
"dom-serializer": "0",
"domelementtype": "1"
}
},
"ee-first": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
......@@ -427,6 +497,11 @@
"resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
"integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k="
},
"entities": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/entities/-/entities-1.1.2.tgz",
"integrity": "sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w=="
},
"es5-ext": {
"version": "0.10.53",
"resolved": "https://registry.npmjs.org/es5-ext/-/es5-ext-0.10.53.tgz",
......@@ -854,6 +929,36 @@
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
"integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0="
},
"html-entities": {
"version": "1.3.1",
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-1.3.1.tgz",
"integrity": "sha512-rhE/4Z3hIhzHAUKbW8jVcCyuT5oJCXXqhN/6mXXVCpzTmvJnoH2HL/bt3EZ6p55jbFJBeAe1ZNpL5BugLujxNA=="
},
"htmlparser2": {
"version": "3.10.1",
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.10.1.tgz",
"integrity": "sha512-IgieNijUMbkDovyoKObU1DUhm1iwNYE/fuifEoEHfd1oZKZDaONBSkal7Y01shxsM49R4XaMdGez3WnF9UfiCQ==",
"requires": {
"domelementtype": "^1.3.1",
"domhandler": "^2.3.0",
"domutils": "^1.5.1",
"entities": "^1.1.1",
"inherits": "^2.0.1",
"readable-stream": "^3.1.1"
},
"dependencies": {
"readable-stream": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz",
"integrity": "sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==",
"requires": {
"inherits": "^2.0.3",
"string_decoder": "^1.1.1",
"util-deprecate": "^1.0.1"
}
}
}
},
"http-errors": {
"version": "1.7.2",
"resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.7.2.tgz",
......@@ -909,6 +1014,11 @@
"integrity": "sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==",
"dev": true
},
"immediate": {
"version": "3.0.6",
"resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz",
"integrity": "sha1-nbHb0Pr43m++D13V5Wu2BigN5ps="
},
"import-fresh": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.2.1.tgz",
......@@ -1097,6 +1207,17 @@
"integrity": "sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE=",
"dev": true
},
"jszip": {
"version": "3.4.0",
"resolved": "https://registry.npmjs.org/jszip/-/jszip-3.4.0.tgz",
"integrity": "sha512-gZAOYuPl4EhPTXT0GjhI3o+ZAz3su6EhLrKUoAivcKqyqC7laS5JEv4XWZND9BgcDcF83vI85yGbDmDR6UhrIg==",
"requires": {
"lie": "~3.3.0",
"pako": "~1.0.2",
"readable-stream": "~2.3.6",
"set-immediate-shim": "~1.0.1"
}
},
"katex": {
"version": "0.11.1",
"resolved": "https://registry.npmjs.org/katex/-/katex-0.11.1.tgz",
......@@ -1122,6 +1243,14 @@
"type-check": "~0.3.2"
}
},
"lie": {
"version": "3.3.0",
"resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz",
"integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==",
"requires": {
"immediate": "~3.0.5"
}
},
"lodash": {
"version": "4.17.15",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz",
......@@ -1178,6 +1307,11 @@
"mime-db": "1.40.0"
}
},
"mimetype": {
"version": "0.0.8",
"resolved": "https://registry.npmjs.org/mimetype/-/mimetype-0.0.8.tgz",
"integrity": "sha1-+zACJ5S793Jct7Rt+CDofdkf0IY="
},
"mimic-fn": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz",
......@@ -1243,6 +1377,14 @@
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.0.tgz",
"integrity": "sha512-8dG4H5ujfvFiqDmVu9fQ5bOHUC15JMjMY/Zumv26oOvvVJjM67KF8koCWIabKQ1GJIa9r2mMZscBq/TbdOcmNA=="
},
"nth-check": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-1.0.2.tgz",
"integrity": "sha512-WeBOdju8SnzPN5vTUJYxYUxLeXpCaVP5i5e0LF8fg7WORF2Wd7wFX/pk0tYZk7s8T+J7VLy0Da6J1+wCT0AtHg==",
"requires": {
"boolbase": "~1.0.0"
}
},
"on-finished": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz",
......@@ -1381,6 +1523,14 @@
"callsites": "^3.0.0"
}
},
"parse5": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-3.0.3.tgz",
"integrity": "sha512-rgO9Zg5LLLkfJF9E6CCmXlSE4UVceloys8JrFqCcHloC3usd/kJCyPDwH2SOlzix2j3xaP9sUX3e8+kvkuleAA==",
"requires": {
"@types/node": "*"
}
},
"parseurl": {
"version": "1.3.3",
"resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
......@@ -1665,6 +1815,11 @@
"send": "0.17.1"
}
},
"set-immediate-shim": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/set-immediate-shim/-/set-immediate-shim-1.0.1.tgz",
"integrity": "sha1-SysbJ+uAip+NzEgaWOXlb1mfP2E="
},
"setprototypeof": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.1.tgz",
......
......@@ -16,12 +16,16 @@
},
"main": "index.js",
"dependencies": {
"cheerio": "^1.0.0-rc.3",
"commander": "^5.0.0",
"express": "^4.17.1",
"html-entities": "^1.3.1",
"hyphenopoly": "^4.2.1",
"jszip": "^3.4.0",
"katex": "^0.11.1",
"lodash": "^4.17.15",
"mathjax": "^3.0.1",
"mimetype": "0.0.8",
"node-fetch": "^2.6.0",
"ora": "^4.0.3",
"pagedjs": "0.1.40",
......
const fs = require("fs");
const JSZip = require("jszip");
const cheerio = require("cheerio");
const EventEmitter = require("events");
const path = require("path");
const mimetype = require('mimetype');
const Entities = require('html-entities').AllHtmlEntities;
class EpubProcesser extends EventEmitter {
constructor(html, assets, size) {
super();
if (!html) {
throw "Must pass a HTML Buffer to EpubProcesser";
}
this.assets = assets;
// const entities = new Entities();
// this.html = entities.decode(html);
this.html = html;
this.$ = cheerio.load(html, {
xmlMode: true,
decodeEntities: false
});
this.size = size;
this.zip = this.createZip();
this.metadata = {};
this.stylesheet = "styles/main.css";
this.assemble(this.$);
return this.output();
}
assemble($) {
this.addContainer();
// Get Metadata
let title = $("title").text();
let metadata = {
title: title
};
let metaElements = $("meta");
metaElements.each(function (i, elem) {
let name = $(this).attr("name");
let content = $(this).attr("content");
if (name && content) {
metadata[name] = content;
}
});
this.metadata = metadata;
this.addAssets(this.assets);
// Get Image Assets
let images = [];
let imgElements = $("img");
imgElements.each(function (i, elem) {
let src = $(this).attr("src");
try {
let uri = new URL(src)
if (uri.protocol === "data:") {
return;
}
} catch (error) {
// no need to handle
}
if (src) {
images.push(src);
}
let filename = path.basename(src);
$(this).attr("src", `../assets/${filename}`);
});
// Links
let links = [];
let linksElements = $("a");
linksElements.each(function (i, elem) {
let href = $(this).attr("href");
if (href && href[0] === "#") {
links.push(href);
let target = $(href);
let page = target.closest(".pagedjs_page")
let pageNumber = page.attr("data-page-number");
$(this).attr("href", `section_${pageNumber}.xhtml#${href}`);
}
});
// Parse HTML into pages
let pages = [];
let pageElements = $(".pagedjs_pages .pagedjs_page");
pageElements.each(function (i, elem) {
let pg = $.html(elem);
pages.push(pg);
});
this.pages = pages;
this.addContent(pages);
// Get CSS
let styles = [];
let styleElements = $("style");
styleElements.each(function (i, elem) {
styles.push($(this).html());
});
this.addStyles(styles);
this.manifest = this.createManifestItems();
this.spine = this.createSpineItems();
this.addToc();
this.addOPF();
}
createZip() {
let zip = new JSZip();
zip.file("mimetype", "application/epub+zip", {
compression: "store"
});
return zip;
}
addContainer(path="OEBPS/content.opf") {
// Create the base of the zip
const containerXML = `<?xml version="1.0" encoding="UTF-8"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="${path}" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>`;
let metaInf = this.zip.folder("META-INF");
metaInf.file("container.xml", containerXML);
}
addStyles(styles) {
let stylesheet = this.zip.file("OEBPS/" + this.stylesheet, styles.join("\n"), { createFolders: true });
return stylesheet;
}
addAssets(assets) {
let assetsFolder = this.zip.folder("OEBPS/assets");
for (const asset of assets) {
let uri = new URL(asset.url)
if (uri.protocol === "data:") {
continue;
}
assetsFolder.file(asset.filename, asset.buffer, {
binary: true,
base64: false
});
}
return assetsFolder;
}
createPage(content, index) {
const pageTemplate = `<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=${this.size.width}, height=${this.size.height}" />
<title>${this.metadata.title}</title>
<link href="../${this.stylesheet}" type="text/css" rel="stylesheet" />
</head>
<body>
<div style="counter-reset: page ${index}">
${content}
</div>
</body>
</html>
`;
return pageTemplate;
}
addContent(pages) {
let contentFolder = this.zip.folder("OEBPS/content");
let counter = 1;
for (const page of pages) {
let html = this.createPage(page, counter);
contentFolder.file(`section_${counter}.xhtml`, html);
counter++;
}
return contentFolder;
}
createManifestItems() {
let manifest = "";
for (let index = 1; index <= this.pages.length; index++) {
manifest += `<item href="content/section_${index}.xhtml" id="section_${index}" media-type="application/xhtml+xml" />\n`;
}
for (let index = 1; index <= this.assets.length; index++) {
let uri = new URL(this.assets[index-1].url);
let filename = this.assets[index - 1].filename;
let mime = mimetype.lookup(filename);
if (uri.protocol !== "data:") {
manifest += `<item href="assets/${filename}" id="asset_${index}" media-type="${mime}" />\n`;
}
}
return manifest;
}
createSpineItems() {
let spine = "";
for (let index = 1; index <= this.pages.length; index++) {
spine += `<itemref idref="section_${index}" />\n`;
}
return spine;
}
createOPF(ident = "12345") {
const opf = `<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<package xmlns="http://www.idpf.org/2007/opf" prefix="rendition: http://www.idpf.org/vocab/rendition/# ibooks: http://vocabulary.itunes.apple.com/rdf/ibooks/vocabulary-extensions-1.0/" unique-identifier="ident" version="3.0" xml:lang="${this.metadata.lang || "en"}">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
<dc:identifier id="ident">${ident}</dc:identifier>
<dc:title>${this.metadata.title}</dc:title>
<dc:creator>${this.metadata.author}</dc:creator>
<dc:publisher>${this.metadata.creator}</dc:publisher>
<meta property="dcterms:modified">${this.metadata.modified}</meta>
<meta property="ibooks:version">3.0</meta>
<meta property="rendition:layout">pre-paginated</meta>
<meta property="rendition:spread">auto</meta>
<meta property="rendition:orientation">auto</meta>
<meta property="ibooks:specified-fonts">true</meta>
<!-- <meta name="Cover" content="cover-image" /> -->
</metadata>
<manifest>
${this.manifest}
<item href="${this.stylesheet}" id="css" media-type="text/css" />
<!-- <item href="Content/Cover.xhtml" id="Cover" media-type="application/xhtml+xml" /> -->
<!-- <item href="assets/cover.jpg" id="cover-image" media-type="image/jpeg" properties="cover-image"/> -->
<item href="content/nav.xhtml" id="nav" media-type="application/xhtml+xml" properties="nav" />
</manifest>
<spine>
${this.spine}
</spine>
</package>`;
return opf;
}
addOPF(ident) {
let opf = this.createOPF(ident);
let opfFile = this.zip.file("OEBPS/content.opf", opf, { createFolders: true });
return opfFile;
}
createTocItems() {
let toc = "";
for (let index = 1; index <= this.pages.length; index++) {
toc += `<li><a href="section_${index}.xhtml">Page ${index}</a></li>\n`;
}
return toc;
}
createPageList() {
let toc = "";
for (let index = 1; index <= this.pages.length; index++) {
toc += `<li><a href="section_${index}.xhtml">${index}</a></li>\n`;
}
return toc;
}
createToc() {
let sections = this.createTocItems();
let pagelist = this.createPageList();
let tocHtml = `<?xml version="1.0" encoding="UTF-8"?>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="fr" xml:lang="fr">
<head>
<title>${this.metadata.title}</title>
<meta charset="utf-8" />
</head>
<body>
<section class="frontmatter TableOfContents" epub:type="frontmatter toc">
<header>
<h1>Table of Contents</h1>
</header>
<nav xmlns:epub="http://www.idpf.org/2007/ops" epub:type="toc" id="toc">
<ol>
${sections}
</ol>
</nav>
<nav epub:type="page-list">
<ol>
${pagelist}
</ol>
</nav>
</section>
</body>
</html>`;
return tocHtml;
}
addToc() {
let tocHTML = this.createToc();
let tocFile = this.zip.file("OEBPS/content/nav.xhtml", tocHTML, { createFolders: true });
return tocFile;
}
output() {
return this.zip.generateAsync({
type: "nodebuffer",
streamFiles: true,
mimeType: "application/epub+zip",
compression: "DEFLATE"
});
}
}
module.exports = EpubProcesser;
......@@ -12,6 +12,7 @@ let paths = pagedjsLocation.split("node_modules");