Commit 0d70e3b5 authored by Guillaume's avatar Guillaume
Browse files

resolves #176 whitespaces that can be ignored are: Line Feed, Carriage Return, Tab and Space

parent 0504c329
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Whitespaces</title>
</head>
<script src="../../dist/paged.polyfill.js"></script>
<body>
<main>
<section>
<h2>Section 1</h2>
<p class="whitespaces-a">&nbsp;</p>
<p class="whitespaces-b">&nbsp;&nbsp;</p>
<p class="whitespaces-c">&nbsp;&nbsp;c</p>
<p class="whitespaces-d">&nbsp;&nbsp;&nbsp;</p>
<p class="whitespaces-e"></p><!-- empty -->
<p class="whitespaces-f"> </p><!-- one space -->
<p class="whitespaces-g"> </p><!-- two spaces -->
<p class="whitespaces-h">&thinsp;&thinsp;</p><!-- thin spaces! -->
<p class="whitespaces-i"> </p><!-- two tabs -->
<p class="whitespaces-j">
</p><!-- two tabs and new line -->
<p class="whitespaces-k">
</p><!-- spaces, tab and new line -->
<p class="whitespaces-l">&nbsp; </p><!-- non breaking space and spaces -->
<p class="whitespaces-m"> &nbsp;</p><!-- spaces and non breaking space -->
</section>
</main>
</body>
</html>
const TIMEOUT = 10000; // Some book might take longer than this to renderer
describe("whitespaces", () => {
let page;
beforeAll(async () => {
page = await loadPage("whitespaces/whitespaces.html");
return page.rendered;
}, TIMEOUT);
afterAll(async () => {
if (!DEBUG) {
await page.close();
}
});
it("should properly ignore white space characters", async () => {
async function getCharCodes(page, selector) {
return await page.$eval(selector, (el) => el.textContent.split("").map(l => l.charCodeAt(0)));
}
const singleNbspCharCodes = await getCharCodes(page, ".whitespaces-a");
expect(singleNbspCharCodes).toEqual([160]);
const twoNbspCharCodes = await getCharCodes(page, ".whitespaces-b");
expect(twoNbspCharCodes).toEqual([160, 160]);
const twoNbspAndLetterCCharCodes = await getCharCodes(page, ".whitespaces-c");
expect(twoNbspAndLetterCCharCodes).toEqual([160, 160, 99]);
const threeNbspCharCodes = await getCharCodes(page, ".whitespaces-d");
expect(threeNbspCharCodes).toEqual([160, 160, 160]);
const emptyCharCodes = await getCharCodes(page, ".whitespaces-e");
expect(emptyCharCodes).toEqual([]);
const oneSpaceCharCodes = await getCharCodes(page, ".whitespaces-f");
expect(oneSpaceCharCodes).toEqual([]);
const twoSpacesCharCodes = await getCharCodes(page, ".whitespaces-g");
expect(twoSpacesCharCodes).toEqual([]);
const twoThinSpacesCharCodes = await getCharCodes(page, ".whitespaces-h");
expect(twoThinSpacesCharCodes).toEqual([8201, 8201]);
const twoTabsCharCodes = await getCharCodes(page, ".whitespaces-i");
expect(twoTabsCharCodes).toEqual([]);
const twoTabsAndNewLineCharCodes = await getCharCodes(page, ".whitespaces-j");
expect(twoTabsAndNewLineCharCodes).toEqual([]);
const spacesTabAndNewLineCharCodes = await getCharCodes(page, ".whitespaces-k");
expect(spacesTabAndNewLineCharCodes).toEqual([]);
const NonBreakingSpaceAndSpacesCharCodes = await getCharCodes(page, ".whitespaces-l");
expect(NonBreakingSpaceAndSpacesCharCodes).toEqual([160, 32, 32, 32, 32]);
const spacesAndNonBreakingSpaceCharCodes = await getCharCodes(page, ".whitespaces-m");
expect(spacesAndNonBreakingSpaceCharCodes).toEqual([32, 32, 32, 160]);
});
}
);
import { UUID } from "../utils/utils";
import { isElement } from "../utils/dom";
import {UUID} from "../utils/utils";
import {isElement} from "../utils/dom";
/**
* Render a flow of text offscreen
......@@ -71,20 +71,39 @@ class ContentParser {
}
removeEmpty(content) {
var treeWalker = document.createTreeWalker(
const self = this;
const treeWalker = document.createTreeWalker(
content,
NodeFilter.SHOW_TEXT,
{ acceptNode: function(node) {
// Only remove more than a single space
if (node.textContent.length > 1 && !node.textContent.trim()) {
if (self.isIgnorable(node)) {
// Don't touch whitespace if text is preformated
// Don't touch whitespace if text is pre-formatted
let parent = node.parentNode;
let pre = isElement(parent) && parent.closest("pre");
if (pre) {
return NodeFilter.FILTER_REJECT;
}
// TODO: we also need to ignore spaces when the parent has white-space rule:
// pre
// Sequences of white space are preserved. Lines are only broken at newline characters in the source and at <br> elements.
//
// pre-wrap
// Sequences of white space are preserved. Lines are broken at newline characters, at <br>, and as necessary to fill line boxes.
//
// pre-line
// Sequences of white space are collapsed. Lines are broken at newline characters, at <br>, and as necessary to fill line boxes.
//
// break-spaces
// The behavior is identical to that of pre-wrap, except that:
// - Any sequence of preserved white space always takes up space, including at the end of the line.
// - A line breaking opportunity exists after every preserved white space character, including between white space characters.
// - Such preserved spaces take up space and do not hang, and thus affect the box’s intrinsic sizes (min-content size and max-content size).
//
// See: https://developer.mozilla.org/en-US/docs/Web/CSS/white-space#Values
return NodeFilter.FILTER_ACCEPT;
} else {
return NodeFilter.FILTER_REJECT;
......@@ -99,12 +118,46 @@ class ContentParser {
while(node) {
current = node;
node = treeWalker.nextNode();
// if (!current.nextSibling || (current.nextSibling && current.nextSibling.nodeType === 1)) {
current.parentNode.removeChild(current);
// }
}
}
/**
* Throughout, whitespace is defined as one of the characters
* "\t" TAB \u0009
* "\n" LF \u000A
* "\r" CR \u000D
* " " SPC \u0020
*
* This does not use Javascript's "\s" because that includes non-breaking
* spaces (and also some other characters).
*/
/**
* Determine if a node should be ignored by the iterator functions.
* taken from https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model/Whitespace#Whitespace_helper_functions
*
* @param {Node} node An object implementing the DOM1 |Node| interface.
* @return {boolean} true if the node is:
* 1) A |Text| node that is all whitespace
* 2) A |Comment| node
* and otherwise false.
*/
isIgnorable(node) {
return (node.nodeType === 8) || // A comment node
((node.nodeType === 3) && this.isAllWhitespace(node)); // a text node, all whitespace
}
/**
* Determine whether a node's text content is entirely whitespace.
*
* @param {Node} node A node implementing the |CharacterData| interface (i.e., a |Text|, |Comment|, or |CDATASection| node
* @return {boolean} true if all of the text content of |nod| is whitespace, otherwise false.
*/
isAllWhitespace(node) {
return !(/[^\t\n\r ]/.test(node.textContent));
}
find(ref) {
return this.refs[ref];
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment