Commit aee3fc48 authored by Fred Chasen's avatar Fred Chasen

Add post processing, update CLI, remove local server

parent 7ccd8672
#!/usr/bin/env node
const program = require('commander');
const ora = require('ora');
const Printer = require("../");
const path = require('path');
const fs = require('fs');
......@@ -24,13 +25,25 @@ program
// .option('-e, --encoding [type]', 'Set the encoding of the input html, defaults to "utf-8"')
.option('-t, --timeout [ms]', 'Set a max timeout of [ms]')
.option('-x, --html', 'output html file')
.option('-b, --blockLocal', 'Disallow access to filesystem for local files')
.parse(process.argv);
let input = program.inputs || program.args[0];
let dir = process.cwd();
let relativePath = path.resolve(dir, input);
let url;
let relativePath;
let allowLocal;
try {
url = new URL(input);
allowLocal = false;
} catch {
relativePath = path.resolve(dir, input);
allowLocal = !program.blockLocal;
}
let output;
let tmpFile, tmpPath;
......@@ -44,16 +57,19 @@ if (!input) {
return process.exit(1);
}
if (['.html', '.xhtml'].indexOf(path.extname(relativePath)) === -1) {
console.error("Must pass a html or xhtml file as input");
return process.exit(1);
}
if (relativePath) {
try {
fs.accessSync(relativePath, fs.F_OK);
} catch (e) {
console.error("Input cannot be found", e);
if (['.html', '.xhtml'].indexOf(path.extname(relativePath)) === -1) {
console.error("Must pass a html or xhtml file as input");
return process.exit(1);
}
try {
fs.accessSync(relativePath, fs.F_OK);
} catch (e) {
console.error("Input cannot be found", e);
return process.exit(1);
}
}
if (typeof(program.output) === "string") {
......@@ -64,34 +80,40 @@ if (typeof(program.output) === "string") {
output = "output.pdf";
}
/*
if (program.hyphenate) {
hyphenateOptions = {
ignore: program.hypher_ignore || undefined,
only: program.hypher_only || undefined,
encoding: program.encoding || undefined
}
tmpPath = replaceExt(relativePath, ".hyphenated.html");
const spinner = ora({
spinner: "circleQuarters"
})
// tmpFile = temp.openSync({suffix: '.html'});
// tmpPath = tmpFile.path;
// Create a new Hyphenator, with passed language
hyphenator = new Hyphenator(program.hyphenate);
hyphenator.process(relativePath, tmpPath, hyphenateOptions);
console.log("Hyphenated for", typeof(program.hyphenate) === "string" ? program.hyphenate : "en-us");
if (typeof input === "string") {
spinner.start("Loading: " + input);
} else {
spinner.start("Loading");
}
if (program.debug && tmpPath) {
console.log("Hyphenated file at:", tmpPath);
}
(async () => {
let printer = new Printer(headless, allowLocal);
}
*/
printer.on("page", (page) => {
if (page.position === 0) {
spinner.succeed("Loaded");
spinner.start("Rendering: Page " + (page.position + 1));
} else {
spinner.text = "Rendering: Page " + (page.position + 1)
}
});
(async () => {
let printer = new Printer(headless);
printer.on("rendered", (msg) => {
spinner.succeed(msg);
spinner.start("Generating");
});
printer.on("postprocessing", (msg) => {
spinner.succeed("Generated");
spinner.start("Processing");
});
let file;
if (headless) {
......@@ -106,10 +128,12 @@ if (program.hyphenate) {
printer.preview(input);
}
spinner.succeed("Processed");
if (file) {
fs.writeFile(output, file, (err) => {
if (err) throw err;
console.log('Saved to', output);
spinner.succeed("Saved to " + output);
process.exit(0);
});
}
......
const Paged = require('pagedjs');
const EventEmitter = require('events');
const puppeteer = require('puppeteer');
// const temp = require("temp").track();
const path = require('path');
const fs = require('fs');
const express = require('express');
const app = express();
const PORT = 9999;
let dir = process.cwd();
let scriptPath = path.resolve(dir, "./node_modules/pagedjs/dist/");
const PDF_SETTINGS = {
printBackground: true,
displayHeaderFooter: false,
preferCSSPageSize: true,
margin: {
top: 0,
right: 0,
bottom: 0,
left: 0,
}
};
class Printer extends EventEmitter {
constructor(headless) {
super();
this.headless = headless !== false;
}
async setup() {
const browser = await puppeteer.launch({
headless: this.headless
});
return browser;
}
async serve(input) {
let relativePath = path.resolve(dir, input);
let dirname = path.dirname(relativePath);
app.use("/print", express.static(dirname))
let scriptPath = path.resolve(dir, "./node_modules/pagedjs/dist/");
app.use("/polyfill", express.static(scriptPath))
app.set('port', process.env.PORT || 0);
return new Promise(function(resolve, reject) {
let server = app.listen(app.get('port'), () => {
resolve(server);
});
});
}
async render(input) {
let resolver;
let rendered = new Promise(function(resolve, reject) {
resolver = resolve;
});
if (!this.browser) {
this.browser = await this.setup();
}
const page = await this.browser.newPage();
let server = await this.serve(input);
let port = server.address().port;
let relativePath = path.resolve(dir, input);
let basename = path.basename(relativePath);
await page.goto(`http://localhost:${port}/print/${basename}`)
.catch((e) => {
console.error(e);
});
await page.exposeFunction('PuppeteerLogger', (msg, counter) => {
console.log(msg, counter);
this.emit(msg, counter);
});
await page.exposeFunction('onPagesRendered', async (msg, width, height, orientation) => {
console.log('onPagesRendered', msg, width, height, orientation);
this.emit(msg, width, height, orientation);
resolver({msg, width, height, orientation});
});
await page.addScriptTag({
url: `http://localhost:${port}/polyfill/paged.polyfill.js`
});
await rendered;
await page.waitForSelector(".pagedjs_pages");
server.close();
return page;
}
async pdf(input, options={}) {
let page = await this.render(input);
let settings = {
printBackground: true,
displayHeaderFooter: false,
preferCSSPageSize: options.width ? false : true,
width: options.width,
height: options.height,
orientation: options.orientation,
margin: {
top: 0,
right: 0,
bottom: 0,
left: 0,
}
}
let pdf = await page.pdf(PDF_SETTINGS)
.catch((e) => {
console.error(e);
});
await page.close();
return pdf;
}
async html(input, stayopen) {
let page = await this.render(input);
let content = await page.content()
.catch((e) => {
console.error(e);
});
await page.close();
return content;
}
async preview(input) {
let page = await this.render(input);
return page;
}
}
const Printer = require('./src/printer');
module.exports = Printer;
This diff is collapsed.
......@@ -15,16 +15,19 @@
},
"main": "index.js",
"dependencies": {
"commander": "^2.19.0",
"commander": "^2.20.0",
"express": "^4.16.4",
"hyphenopoly": "^2.5.1",
"katex": "^0.10.0",
"hyphenopoly": "^3.0.1",
"katex": "^0.10.1",
"lodash": "^4.17.11",
"mathjax": "^2.7.5",
"pagedjs": "^0.1.26",
"puppeteer": "^1.10.0",
"ora": "^3.4.0",
"pagedjs": "^0.1.34",
"pdf-lib": "^0.6.1",
"puppeteer": "^1.15.0",
"replace-ext": "^1.0.0"
},
"devDependencies": {
"eslint": "^5.9.0"
"eslint": "^5.16.0"
}
}
const PDFLib = require("pdf-lib");
const EventEmitter = require('events');
const PDFDocumentWriter = require('./writer');
class PostProcesser extends EventEmitter {
constructor(pdf) {
super();
if (!pdf) {
throw "Must pass a PDF Buffer to PostProcesser"
}
this.pdf = pdf
this.pdfDoc = PDFLib.PDFDocumentFactory.load(pdf);
}
metadata(meta) {
if (meta.keywords && typeof meta.keywords === "string") {
meta.keywords = meta.keywords.split(",");
}
if (!meta.keywords) {
meta.keywords = [];
}
// Overwrite Dates
if (!(meta.creationDate instanceof Date)) {
meta.creationDate = new Date();
}
meta.modDate = new Date();
meta.metadataDate = new Date();
// Get the existing Info
let info = this.getInfoDict();
if (!meta.creator) {
meta.creator = info.creator + " + Paged.js";
}
if (!meta.producer) {
meta.producer = info.producer;
}
// Add meta
this.addXmpMetadata(meta);
this.updateInfoDict(meta);
}
getInfoDict(){
// Info Reference in Skia pdfs is always 1st
let ref = PDFLib.PDFIndirectReference.forNumbers(1, 0);
let info = this.pdfDoc.index.lookup(ref);
return {
title: info.getMaybe("Title") && info.getMaybe("Title").string,
subject: info.getMaybe("Subject") && info.getMaybe("Subject").string,
keywords: info.getMaybe("Keywords") && info.getMaybe("Keywords").string,
author: info.getMaybe("Author") && info.getMaybe("Author").string,
creationDate: info.getMaybe("CreationDate") && info.getMaybe("CreationDate").string,
modDate: info.getMaybe("ModDate") && info.getMaybe("ModDate").string,
creator: info.getMaybe("Creator") && info.getMaybe("Creator").string,
producer: info.getMaybe("Producer") && info.getMaybe("Producer").string
}
}
updateInfoDict(meta) {
// Info Reference in Skia pdfs is always 1st
let ref = PDFLib.PDFIndirectReference.forNumbers(1, 0);
let info = this.pdfDoc.index.lookup(ref);
if (meta.title) {
info.set("Title", PDFLib.PDFString.fromString(meta.title));
}
if (meta.subject) {
info.set("Subject", PDFLib.PDFString.fromString(meta.subject));
}
if (meta.keywords && meta.keywords.length) {
info.set("Keywords", PDFLib.PDFString.fromString(meta.keywords.join(", ")));
}
if (meta.author) {
info.set("Author", PDFLib.PDFString.fromString(meta.author));
}
if (meta.creationDate) {
info.set("CreationDate", PDFLib.PDFString.fromString(meta.creationDate.toISOString()));
}
if (meta.modDate) {
info.set("ModDate", PDFLib.PDFString.fromString(meta.modDate.toISOString()));
}
if (meta.creator) {
info.set("Creator", PDFLib.PDFString.fromString(meta.creator));
}
if (meta.producer) {
info.set("Producer", PDFLib.PDFString.fromString(meta.producer));
}
}
addXmpMetadata(meta) {
const charCodes = (str) => str.split('').map((c) => c.charCodeAt(0));
const typedArrayFor = (str) => new Uint8Array(charCodes(str));
const whitespacePadding = new Array(20).fill(' '.repeat(100)).join('\n');
const metadataXML = `
<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core 5.2-c001 63.139439, 2010/09/27-13:37:26">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:format>application/pdf</dc:format>
<dc:creator>
<rdf:Seq>
<rdf:li>${meta.author}</rdf:li>
</rdf:Seq>
</dc:creator>
<dc:title>
<rdf:Alt>
<rdf:li xml:lang="x-default">${meta.title}</rdf:li>
</rdf:Alt>
</dc:title>
<dc:subject>
<rdf:Bag>
${meta.keywords
.map((keyword) => `<rdf:li>${keyword}</rdf:li>`)
.join('\n')}
</rdf:Bag>
</dc:subject>
</rdf:Description>
<rdf:Description rdf:about="" xmlns:xmp="http://ns.adobe.com/xap/1.0/">
<xmp:CreatorTool>${meta.creatorTool}</xmp:CreatorTool>
<xmp:CreateDate>${meta.creationDate.toISOString()}</xmp:CreateDate>
<xmp:ModifyDate>${meta.modDate.toISOString()}</xmp:ModifyDate>
<xmp:MetadataDate>${meta.metadataDate.toISOString()}</xmp:MetadataDate>
</rdf:Description>
<rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/">
<pdf:Subject>${meta.subject}</pdf:Subject>
<pdf:Producer>${meta.producer}</pdf:Producer>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta>
${whitespacePadding}
<?xpacket end="w"?>
`.trim();
const metadataStreamDict = PDFLib.PDFDictionary.from(
{
Type: PDFLib.PDFName.from('Metadata'),
Subtype: PDFLib.PDFName.from('XML'),
Length: PDFLib.PDFNumber.fromNumber(metadataXML.length),
},
this.pdfDoc.index,
);
const metadataStream = PDFLib.PDFRawStream.from(
metadataStreamDict,
typedArrayFor(metadataXML),
);
const metadataStreamRef = this.pdfDoc.register(metadataStream);
this.pdfDoc.catalog.set('Metadata', metadataStreamRef);
};
boxes(pages) {
const pdfPages = this.pdfDoc.getPages();
pdfPages.forEach((pdfPage, index) => {
const page = pages[index];
if (!page) {
return; // page was not rendered
}
let { boxes } = page;
if (Object.is(boxes.media, boxes.crop)) {
return; // No bleed set
}
const rectangle = PDFLib.PDFArray.fromArray(
[
PDFLib.PDFNumber.fromNumber(boxes.crop.x * 2),
PDFLib.PDFNumber.fromNumber(boxes.crop.y * 2),
PDFLib.PDFNumber.fromNumber(boxes.crop.width),
PDFLib.PDFNumber.fromNumber(boxes.crop.height),
],
pdfPage.index,
);
// pdfPage.set("ArtBox", rectangle);
pdfPage.set("TrimBox", rectangle);
pdfPage.set("CropBox", rectangle);
});
}
updatePageBoxes(page) {
console.log(page);
}
save() {
let writer = new PDFDocumentWriter();
const pdfBytes = writer.saveToBytesWithXRefTable(this.pdfDoc);
this.pdf = pdfBytes;
return this.pdf;
}
}
module.exports = PostProcesser;
const Paged = require('pagedjs');
const EventEmitter = require('events');
const puppeteer = require('puppeteer');
const util = require('util');
const fs = require('fs');
const readFile = util.promisify(fs.readFile);
const path = require('path');
let dir = process.cwd();
let scriptPath = path.resolve(dir, "./node_modules/pagedjs/dist/");
const PostProcesser = require('./postprocesser');
const PDF_SETTINGS = {
printBackground: true,
displayHeaderFooter: false,
preferCSSPageSize: true,
margin: {
top: 0,
right: 0,
bottom: 0,
left: 0,
}
};
class Printer extends EventEmitter {
constructor(headless, allowLocal) {
super();
this.headless = headless !== false;
this.allowLocal = allowLocal;
this.pages = [];
}
async setup() {
const browser = await puppeteer.launch({
headless: this.headless,
args: this.allowLocal ? ['--allow-file-access-from-files'] : []
});
return browser;
}
async render(input) {
let resolver;
let rendered = new Promise(function(resolve, reject) {
resolver = resolve;
});
if (!this.browser) {
this.browser = await this.setup();
}
const page = await this.browser.newPage();
let url, html;
if (typeof input === "string") {
try {
url = new URL(input);
} catch {
let relativePath = path.resolve(dir, input);
url = "file://" + relativePath;
}
} else {
url = input.url;
html = input.html;
}
if (html) {
await page.setContent(html)
.catch((e) => {
console.error(e);
});
} else {
await page.goto(url)
.catch((e) => {
console.error(e);
});
}
await page.evaluate(() => {
window.PagedConfig = {
auto: false
}
});
await page.addScriptTag({
path: path.resolve(__dirname, "../node_modules/pagedjs/dist/paged.polyfill.js")
});
// await page.exposeFunction('PuppeteerLogger', (msg) => {
// console.log(msg);
// });
await page.exposeFunction('onSize', (size) => {
this.emit("size", size);
});
await page.exposeFunction('onPage', (page) => {
// console.log("page", page.position + 1);
this.pages.push(page);
this.emit("page", page);
});
await page.exposeFunction('onRendered', (msg, width, height, orientation) => {
this.emit("rendered", msg, width, height, orientation);
resolver({msg, width, height, orientation});
});
await page.evaluate(() => {
window.PagedPolyfill.on("page", (page) => {
const { id, width, height, startToken, endToken, breakAfter, breakBefore, position } = page;
const mediabox = page.element.getBoundingClientRect();
const cropbox = page.pagebox.getBoundingClientRect();
function getPointsValue(value) {
return (Math.round(CSS.px(value).to("pt").value * 100) / 100);
}
let boxes = {
media: {
width: getPointsValue(mediabox.width),
height: getPointsValue(mediabox.height),
x: 0,
y: 0
},
crop: {
width: getPointsValue(cropbox.width),
height: getPointsValue(cropbox.height),
x: getPointsValue(cropbox.x) - getPointsValue(mediabox.x),
y: getPointsValue(cropbox.y) - getPointsValue(mediabox.y)
}
}
window.onPage({ id, width, height, startToken, endToken, breakAfter, breakBefore, position, boxes });
});
window.PagedPolyfill.on("size", (size) => {
window.onSize(size);
});
window.PagedPolyfill.on("rendered", (flow) => {
let msg = "Rendering " + flow.total + " pages took " + flow.performance + " milliseconds.";
window.onRendered(msg, flow.width, flow.height, flow.orientation);
});
window.PagedPolyfill.preview();
});
await rendered;
await page.waitForSelector(".pagedjs_pages");
return page;
}
async pdf(input, options={}) {
let page = await this.render(input);
// Get metatags
const meta = await page.evaluate(() => {
let meta = {};
let title = document.querySelector("title");
meta.title = title.textContent.trim();
let metaTags = document.querySelectorAll("meta");
[...metaTags].forEach((tag) => {
if (tag.name) {
meta[tag.name] = tag.content;
}
})
return meta;
});
let settings = {
printBackground: true,
displayHeaderFooter: false,
preferCSSPageSize: options.width ? false : true,
width: options.width,
height: options.height,
orientation: options.orientation,
margin: {
top: 0,
right: 0,
bottom: 0,
left: 0,
}
}
let pdf = await page.pdf(PDF_SETTINGS)
.catch((e) => {
console.error(e);
});
await page.close();