first commit

This commit is contained in:
monjack
2025-06-20 18:01:48 +08:00
commit 6daa6d65c1
24611 changed files with 2512443 additions and 0 deletions

View File

@ -0,0 +1,19 @@
Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -0,0 +1,34 @@
<p align="center">
<a href="https://github.com/inikulin/parse5">
<img src="https://raw.github.com/inikulin/parse5/master/media/logo.png" alt="parse5" />
</a>
</p>
<div align="center">
<h1>parse5-htmlparser2-tree-adapter</h1>
<i><b><a href="https://github.com/fb55/htmlparser2">htmlparser2</a> tree adapter for <a href="https://github.com/inikulin/parse5">parse5</a>.</b></i>
</div>
<br>
<div align="center">
<code>npm install --save parse5-htmlparser2-tree-adapter</code>
</div>
<br>
<p align="center">
📖 <a href="https://github.com/inikulin/parse5/tree/master/packages/parse5-htmlparser2-tree-adapter/docs/index.md"><b>Documentation</b></a> 📖
</p>
---
<p align="center">
<a href="https://github.com/inikulin/parse5/tree/master/docs/list-of-packages.md">List of parse5 toolset packages</a>
</p>
<p align="center">
<a href="https://github.com/inikulin/parse5">GitHub</a>
</p>
<p align="center">
<a href="https://github.com/inikulin/parse5/tree/master/docs/version-history.md">Version history</a>
</p>

View File

@ -0,0 +1,348 @@
'use strict';
const doctype = require('parse5/lib/common/doctype');
const { DOCUMENT_MODE } = require('parse5/lib/common/html');
//Conversion tables for DOM Level1 structure emulation
const nodeTypes = {
element: 1,
text: 3,
cdata: 4,
comment: 8
};
const nodePropertyShorthands = {
tagName: 'name',
childNodes: 'children',
parentNode: 'parent',
previousSibling: 'prev',
nextSibling: 'next',
nodeValue: 'data'
};
//Node
class Node {
constructor(props) {
for (const key of Object.keys(props)) {
this[key] = props[key];
}
}
get firstChild() {
const children = this.children;
return (children && children[0]) || null;
}
get lastChild() {
const children = this.children;
return (children && children[children.length - 1]) || null;
}
get nodeType() {
return nodeTypes[this.type] || nodeTypes.element;
}
}
Object.keys(nodePropertyShorthands).forEach(key => {
const shorthand = nodePropertyShorthands[key];
Object.defineProperty(Node.prototype, key, {
get: function() {
return this[shorthand] || null;
},
set: function(val) {
this[shorthand] = val;
return val;
}
});
});
//Node construction
exports.createDocument = function() {
return new Node({
type: 'root',
name: 'root',
parent: null,
prev: null,
next: null,
children: [],
'x-mode': DOCUMENT_MODE.NO_QUIRKS
});
};
exports.createDocumentFragment = function() {
return new Node({
type: 'root',
name: 'root',
parent: null,
prev: null,
next: null,
children: []
});
};
exports.createElement = function(tagName, namespaceURI, attrs) {
const attribs = Object.create(null);
const attribsNamespace = Object.create(null);
const attribsPrefix = Object.create(null);
for (let i = 0; i < attrs.length; i++) {
const attrName = attrs[i].name;
attribs[attrName] = attrs[i].value;
attribsNamespace[attrName] = attrs[i].namespace;
attribsPrefix[attrName] = attrs[i].prefix;
}
return new Node({
type: tagName === 'script' || tagName === 'style' ? tagName : 'tag',
name: tagName,
namespace: namespaceURI,
attribs: attribs,
'x-attribsNamespace': attribsNamespace,
'x-attribsPrefix': attribsPrefix,
children: [],
parent: null,
prev: null,
next: null
});
};
exports.createCommentNode = function(data) {
return new Node({
type: 'comment',
data: data,
parent: null,
prev: null,
next: null
});
};
const createTextNode = function(value) {
return new Node({
type: 'text',
data: value,
parent: null,
prev: null,
next: null
});
};
//Tree mutation
const appendChild = (exports.appendChild = function(parentNode, newNode) {
const prev = parentNode.children[parentNode.children.length - 1];
if (prev) {
prev.next = newNode;
newNode.prev = prev;
}
parentNode.children.push(newNode);
newNode.parent = parentNode;
});
const insertBefore = (exports.insertBefore = function(parentNode, newNode, referenceNode) {
const insertionIdx = parentNode.children.indexOf(referenceNode);
const prev = referenceNode.prev;
if (prev) {
prev.next = newNode;
newNode.prev = prev;
}
referenceNode.prev = newNode;
newNode.next = referenceNode;
parentNode.children.splice(insertionIdx, 0, newNode);
newNode.parent = parentNode;
});
exports.setTemplateContent = function(templateElement, contentElement) {
appendChild(templateElement, contentElement);
};
exports.getTemplateContent = function(templateElement) {
return templateElement.children[0];
};
exports.setDocumentType = function(document, name, publicId, systemId) {
const data = doctype.serializeContent(name, publicId, systemId);
let doctypeNode = null;
for (let i = 0; i < document.children.length; i++) {
if (document.children[i].type === 'directive' && document.children[i].name === '!doctype') {
doctypeNode = document.children[i];
break;
}
}
if (doctypeNode) {
doctypeNode.data = data;
doctypeNode['x-name'] = name;
doctypeNode['x-publicId'] = publicId;
doctypeNode['x-systemId'] = systemId;
} else {
appendChild(
document,
new Node({
type: 'directive',
name: '!doctype',
data: data,
'x-name': name,
'x-publicId': publicId,
'x-systemId': systemId
})
);
}
};
exports.setDocumentMode = function(document, mode) {
document['x-mode'] = mode;
};
exports.getDocumentMode = function(document) {
return document['x-mode'];
};
exports.detachNode = function(node) {
if (node.parent) {
const idx = node.parent.children.indexOf(node);
const prev = node.prev;
const next = node.next;
node.prev = null;
node.next = null;
if (prev) {
prev.next = next;
}
if (next) {
next.prev = prev;
}
node.parent.children.splice(idx, 1);
node.parent = null;
}
};
exports.insertText = function(parentNode, text) {
const lastChild = parentNode.children[parentNode.children.length - 1];
if (lastChild && lastChild.type === 'text') {
lastChild.data += text;
} else {
appendChild(parentNode, createTextNode(text));
}
};
exports.insertTextBefore = function(parentNode, text, referenceNode) {
const prevNode = parentNode.children[parentNode.children.indexOf(referenceNode) - 1];
if (prevNode && prevNode.type === 'text') {
prevNode.data += text;
} else {
insertBefore(parentNode, createTextNode(text), referenceNode);
}
};
exports.adoptAttributes = function(recipient, attrs) {
for (let i = 0; i < attrs.length; i++) {
const attrName = attrs[i].name;
if (typeof recipient.attribs[attrName] === 'undefined') {
recipient.attribs[attrName] = attrs[i].value;
recipient['x-attribsNamespace'][attrName] = attrs[i].namespace;
recipient['x-attribsPrefix'][attrName] = attrs[i].prefix;
}
}
};
//Tree traversing
exports.getFirstChild = function(node) {
return node.children[0];
};
exports.getChildNodes = function(node) {
return node.children;
};
exports.getParentNode = function(node) {
return node.parent;
};
exports.getAttrList = function(element) {
const attrList = [];
for (const name in element.attribs) {
attrList.push({
name: name,
value: element.attribs[name],
namespace: element['x-attribsNamespace'][name],
prefix: element['x-attribsPrefix'][name]
});
}
return attrList;
};
//Node data
exports.getTagName = function(element) {
return element.name;
};
exports.getNamespaceURI = function(element) {
return element.namespace;
};
exports.getTextNodeContent = function(textNode) {
return textNode.data;
};
exports.getCommentNodeContent = function(commentNode) {
return commentNode.data;
};
exports.getDocumentTypeNodeName = function(doctypeNode) {
return doctypeNode['x-name'];
};
exports.getDocumentTypeNodePublicId = function(doctypeNode) {
return doctypeNode['x-publicId'];
};
exports.getDocumentTypeNodeSystemId = function(doctypeNode) {
return doctypeNode['x-systemId'];
};
//Node types
exports.isTextNode = function(node) {
return node.type === 'text';
};
exports.isCommentNode = function(node) {
return node.type === 'comment';
};
exports.isDocumentTypeNode = function(node) {
return node.type === 'directive' && node.name === '!doctype';
};
exports.isElementNode = function(node) {
return !!node.attribs;
};
// Source code location
exports.setNodeSourceCodeLocation = function(node, location) {
node.sourceCodeLocation = location;
};
exports.getNodeSourceCodeLocation = function(node) {
return node.sourceCodeLocation;
};
exports.updateNodeSourceCodeLocation = function(node, endLocation) {
node.sourceCodeLocation = Object.assign(node.sourceCodeLocation, endLocation);
};

View File

@ -0,0 +1,19 @@
Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -0,0 +1,38 @@
<p align="center">
<a href="https://github.com/inikulin/parse5">
<img src="https://raw.github.com/inikulin/parse5/master/media/logo.png" alt="parse5" />
</a>
</p>
<div align="center">
<h1>parse5</h1>
<i><b>HTML parser and serializer.</b></i>
</div>
<br>
<div align="center">
<code>npm install --save parse5</code>
</div>
<br>
<p align="center">
📖 <a href="https://github.com/inikulin/parse5/tree/master/packages/parse5/docs/index.md"><b>Documentation</b></a> 📖
</p>
---
<p align="center">
<a href="https://github.com/inikulin/parse5/tree/master/docs/list-of-packages.md">List of parse5 toolset packages</a>
</p>
<p align="center">
<a href="https://github.com/inikulin/parse5">GitHub</a>
</p>
<p align="center">
<a href="http://astexplorer.net/#/1CHlCXc4n4">Online playground</a>
</p>
<p align="center">
<a href="https://github.com/inikulin/parse5/tree/master/docs/version-history.md">Version history</a>
</p>

View File

@ -0,0 +1,162 @@
'use strict';
const { DOCUMENT_MODE } = require('./html');
//Const
const VALID_DOCTYPE_NAME = 'html';
const VALID_SYSTEM_ID = 'about:legacy-compat';
const QUIRKS_MODE_SYSTEM_ID = 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd';
const QUIRKS_MODE_PUBLIC_ID_PREFIXES = [
'+//silmaril//dtd html pro v0r11 19970101//',
'-//as//dtd html 3.0 aswedit + extensions//',
'-//advasoft ltd//dtd html 3.0 aswedit + extensions//',
'-//ietf//dtd html 2.0 level 1//',
'-//ietf//dtd html 2.0 level 2//',
'-//ietf//dtd html 2.0 strict level 1//',
'-//ietf//dtd html 2.0 strict level 2//',
'-//ietf//dtd html 2.0 strict//',
'-//ietf//dtd html 2.0//',
'-//ietf//dtd html 2.1e//',
'-//ietf//dtd html 3.0//',
'-//ietf//dtd html 3.2 final//',
'-//ietf//dtd html 3.2//',
'-//ietf//dtd html 3//',
'-//ietf//dtd html level 0//',
'-//ietf//dtd html level 1//',
'-//ietf//dtd html level 2//',
'-//ietf//dtd html level 3//',
'-//ietf//dtd html strict level 0//',
'-//ietf//dtd html strict level 1//',
'-//ietf//dtd html strict level 2//',
'-//ietf//dtd html strict level 3//',
'-//ietf//dtd html strict//',
'-//ietf//dtd html//',
'-//metrius//dtd metrius presentational//',
'-//microsoft//dtd internet explorer 2.0 html strict//',
'-//microsoft//dtd internet explorer 2.0 html//',
'-//microsoft//dtd internet explorer 2.0 tables//',
'-//microsoft//dtd internet explorer 3.0 html strict//',
'-//microsoft//dtd internet explorer 3.0 html//',
'-//microsoft//dtd internet explorer 3.0 tables//',
'-//netscape comm. corp.//dtd html//',
'-//netscape comm. corp.//dtd strict html//',
"-//o'reilly and associates//dtd html 2.0//",
"-//o'reilly and associates//dtd html extended 1.0//",
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
'-//sq//dtd html 2.0 hotmetal + extensions//',
'-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//',
'-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//',
'-//spyglass//dtd html 2.0 extended//',
'-//sun microsystems corp.//dtd hotjava html//',
'-//sun microsystems corp.//dtd hotjava strict html//',
'-//w3c//dtd html 3 1995-03-24//',
'-//w3c//dtd html 3.2 draft//',
'-//w3c//dtd html 3.2 final//',
'-//w3c//dtd html 3.2//',
'-//w3c//dtd html 3.2s draft//',
'-//w3c//dtd html 4.0 frameset//',
'-//w3c//dtd html 4.0 transitional//',
'-//w3c//dtd html experimental 19960712//',
'-//w3c//dtd html experimental 970421//',
'-//w3c//dtd w3 html//',
'-//w3o//dtd w3 html 3.0//',
'-//webtechs//dtd mozilla html 2.0//',
'-//webtechs//dtd mozilla html//'
];
const QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = QUIRKS_MODE_PUBLIC_ID_PREFIXES.concat([
'-//w3c//dtd html 4.01 frameset//',
'-//w3c//dtd html 4.01 transitional//'
]);
const QUIRKS_MODE_PUBLIC_IDS = ['-//w3o//dtd w3 html strict 3.0//en//', '-/w3c/dtd html 4.0 transitional/en', 'html'];
const LIMITED_QUIRKS_PUBLIC_ID_PREFIXES = ['-//w3c//dtd xhtml 1.0 frameset//', '-//w3c//dtd xhtml 1.0 transitional//'];
const LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES = LIMITED_QUIRKS_PUBLIC_ID_PREFIXES.concat([
'-//w3c//dtd html 4.01 frameset//',
'-//w3c//dtd html 4.01 transitional//'
]);
//Utils
function enquoteDoctypeId(id) {
const quote = id.indexOf('"') !== -1 ? "'" : '"';
return quote + id + quote;
}
function hasPrefix(publicId, prefixes) {
for (let i = 0; i < prefixes.length; i++) {
if (publicId.indexOf(prefixes[i]) === 0) {
return true;
}
}
return false;
}
//API
exports.isConforming = function(token) {
return (
token.name === VALID_DOCTYPE_NAME &&
token.publicId === null &&
(token.systemId === null || token.systemId === VALID_SYSTEM_ID)
);
};
exports.getDocumentMode = function(token) {
if (token.name !== VALID_DOCTYPE_NAME) {
return DOCUMENT_MODE.QUIRKS;
}
const systemId = token.systemId;
if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID) {
return DOCUMENT_MODE.QUIRKS;
}
let publicId = token.publicId;
if (publicId !== null) {
publicId = publicId.toLowerCase();
if (QUIRKS_MODE_PUBLIC_IDS.indexOf(publicId) > -1) {
return DOCUMENT_MODE.QUIRKS;
}
let prefixes = systemId === null ? QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES : QUIRKS_MODE_PUBLIC_ID_PREFIXES;
if (hasPrefix(publicId, prefixes)) {
return DOCUMENT_MODE.QUIRKS;
}
prefixes =
systemId === null ? LIMITED_QUIRKS_PUBLIC_ID_PREFIXES : LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES;
if (hasPrefix(publicId, prefixes)) {
return DOCUMENT_MODE.LIMITED_QUIRKS;
}
}
return DOCUMENT_MODE.NO_QUIRKS;
};
exports.serializeContent = function(name, publicId, systemId) {
let str = '!DOCTYPE ';
if (name) {
str += name;
}
if (publicId) {
str += ' PUBLIC ' + enquoteDoctypeId(publicId);
} else if (systemId) {
str += ' SYSTEM';
}
if (systemId !== null) {
str += ' ' + enquoteDoctypeId(systemId);
}
return str;
};

View File

@ -0,0 +1,65 @@
'use strict';
module.exports = {
controlCharacterInInputStream: 'control-character-in-input-stream',
noncharacterInInputStream: 'noncharacter-in-input-stream',
surrogateInInputStream: 'surrogate-in-input-stream',
nonVoidHtmlElementStartTagWithTrailingSolidus: 'non-void-html-element-start-tag-with-trailing-solidus',
endTagWithAttributes: 'end-tag-with-attributes',
endTagWithTrailingSolidus: 'end-tag-with-trailing-solidus',
unexpectedSolidusInTag: 'unexpected-solidus-in-tag',
unexpectedNullCharacter: 'unexpected-null-character',
unexpectedQuestionMarkInsteadOfTagName: 'unexpected-question-mark-instead-of-tag-name',
invalidFirstCharacterOfTagName: 'invalid-first-character-of-tag-name',
unexpectedEqualsSignBeforeAttributeName: 'unexpected-equals-sign-before-attribute-name',
missingEndTagName: 'missing-end-tag-name',
unexpectedCharacterInAttributeName: 'unexpected-character-in-attribute-name',
unknownNamedCharacterReference: 'unknown-named-character-reference',
missingSemicolonAfterCharacterReference: 'missing-semicolon-after-character-reference',
unexpectedCharacterAfterDoctypeSystemIdentifier: 'unexpected-character-after-doctype-system-identifier',
unexpectedCharacterInUnquotedAttributeValue: 'unexpected-character-in-unquoted-attribute-value',
eofBeforeTagName: 'eof-before-tag-name',
eofInTag: 'eof-in-tag',
missingAttributeValue: 'missing-attribute-value',
missingWhitespaceBetweenAttributes: 'missing-whitespace-between-attributes',
missingWhitespaceAfterDoctypePublicKeyword: 'missing-whitespace-after-doctype-public-keyword',
missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers:
'missing-whitespace-between-doctype-public-and-system-identifiers',
missingWhitespaceAfterDoctypeSystemKeyword: 'missing-whitespace-after-doctype-system-keyword',
missingQuoteBeforeDoctypePublicIdentifier: 'missing-quote-before-doctype-public-identifier',
missingQuoteBeforeDoctypeSystemIdentifier: 'missing-quote-before-doctype-system-identifier',
missingDoctypePublicIdentifier: 'missing-doctype-public-identifier',
missingDoctypeSystemIdentifier: 'missing-doctype-system-identifier',
abruptDoctypePublicIdentifier: 'abrupt-doctype-public-identifier',
abruptDoctypeSystemIdentifier: 'abrupt-doctype-system-identifier',
cdataInHtmlContent: 'cdata-in-html-content',
incorrectlyOpenedComment: 'incorrectly-opened-comment',
eofInScriptHtmlCommentLikeText: 'eof-in-script-html-comment-like-text',
eofInDoctype: 'eof-in-doctype',
nestedComment: 'nested-comment',
abruptClosingOfEmptyComment: 'abrupt-closing-of-empty-comment',
eofInComment: 'eof-in-comment',
incorrectlyClosedComment: 'incorrectly-closed-comment',
eofInCdata: 'eof-in-cdata',
absenceOfDigitsInNumericCharacterReference: 'absence-of-digits-in-numeric-character-reference',
nullCharacterReference: 'null-character-reference',
surrogateCharacterReference: 'surrogate-character-reference',
characterReferenceOutsideUnicodeRange: 'character-reference-outside-unicode-range',
controlCharacterReference: 'control-character-reference',
noncharacterCharacterReference: 'noncharacter-character-reference',
missingWhitespaceBeforeDoctypeName: 'missing-whitespace-before-doctype-name',
missingDoctypeName: 'missing-doctype-name',
invalidCharacterSequenceAfterDoctypeName: 'invalid-character-sequence-after-doctype-name',
duplicateAttribute: 'duplicate-attribute',
nonConformingDoctype: 'non-conforming-doctype',
missingDoctype: 'missing-doctype',
misplacedDoctype: 'misplaced-doctype',
endTagWithoutMatchingOpenElement: 'end-tag-without-matching-open-element',
closingOfElementWithOpenChildElements: 'closing-of-element-with-open-child-elements',
disallowedContentInNoscriptInHead: 'disallowed-content-in-noscript-in-head',
openElementsLeftAfterEof: 'open-elements-left-after-eof',
abandonedHeadElementChild: 'abandoned-head-element-child',
misplacedStartTagForHeadElement: 'misplaced-start-tag-for-head-element',
nestedNoscriptInHead: 'nested-noscript-in-head',
eofInElementThatCanContainOnlyText: 'eof-in-element-that-can-contain-only-text'
};

View File

@ -0,0 +1,265 @@
'use strict';
const Tokenizer = require('../tokenizer');
const HTML = require('./html');
//Aliases
const $ = HTML.TAG_NAMES;
const NS = HTML.NAMESPACES;
const ATTRS = HTML.ATTRS;
//MIME types
const MIME_TYPES = {
TEXT_HTML: 'text/html',
APPLICATION_XML: 'application/xhtml+xml'
};
//Attributes
const DEFINITION_URL_ATTR = 'definitionurl';
const ADJUSTED_DEFINITION_URL_ATTR = 'definitionURL';
const SVG_ATTRS_ADJUSTMENT_MAP = {
attributename: 'attributeName',
attributetype: 'attributeType',
basefrequency: 'baseFrequency',
baseprofile: 'baseProfile',
calcmode: 'calcMode',
clippathunits: 'clipPathUnits',
diffuseconstant: 'diffuseConstant',
edgemode: 'edgeMode',
filterunits: 'filterUnits',
glyphref: 'glyphRef',
gradienttransform: 'gradientTransform',
gradientunits: 'gradientUnits',
kernelmatrix: 'kernelMatrix',
kernelunitlength: 'kernelUnitLength',
keypoints: 'keyPoints',
keysplines: 'keySplines',
keytimes: 'keyTimes',
lengthadjust: 'lengthAdjust',
limitingconeangle: 'limitingConeAngle',
markerheight: 'markerHeight',
markerunits: 'markerUnits',
markerwidth: 'markerWidth',
maskcontentunits: 'maskContentUnits',
maskunits: 'maskUnits',
numoctaves: 'numOctaves',
pathlength: 'pathLength',
patterncontentunits: 'patternContentUnits',
patterntransform: 'patternTransform',
patternunits: 'patternUnits',
pointsatx: 'pointsAtX',
pointsaty: 'pointsAtY',
pointsatz: 'pointsAtZ',
preservealpha: 'preserveAlpha',
preserveaspectratio: 'preserveAspectRatio',
primitiveunits: 'primitiveUnits',
refx: 'refX',
refy: 'refY',
repeatcount: 'repeatCount',
repeatdur: 'repeatDur',
requiredextensions: 'requiredExtensions',
requiredfeatures: 'requiredFeatures',
specularconstant: 'specularConstant',
specularexponent: 'specularExponent',
spreadmethod: 'spreadMethod',
startoffset: 'startOffset',
stddeviation: 'stdDeviation',
stitchtiles: 'stitchTiles',
surfacescale: 'surfaceScale',
systemlanguage: 'systemLanguage',
tablevalues: 'tableValues',
targetx: 'targetX',
targety: 'targetY',
textlength: 'textLength',
viewbox: 'viewBox',
viewtarget: 'viewTarget',
xchannelselector: 'xChannelSelector',
ychannelselector: 'yChannelSelector',
zoomandpan: 'zoomAndPan'
};
const XML_ATTRS_ADJUSTMENT_MAP = {
'xlink:actuate': { prefix: 'xlink', name: 'actuate', namespace: NS.XLINK },
'xlink:arcrole': { prefix: 'xlink', name: 'arcrole', namespace: NS.XLINK },
'xlink:href': { prefix: 'xlink', name: 'href', namespace: NS.XLINK },
'xlink:role': { prefix: 'xlink', name: 'role', namespace: NS.XLINK },
'xlink:show': { prefix: 'xlink', name: 'show', namespace: NS.XLINK },
'xlink:title': { prefix: 'xlink', name: 'title', namespace: NS.XLINK },
'xlink:type': { prefix: 'xlink', name: 'type', namespace: NS.XLINK },
'xml:base': { prefix: 'xml', name: 'base', namespace: NS.XML },
'xml:lang': { prefix: 'xml', name: 'lang', namespace: NS.XML },
'xml:space': { prefix: 'xml', name: 'space', namespace: NS.XML },
xmlns: { prefix: '', name: 'xmlns', namespace: NS.XMLNS },
'xmlns:xlink': { prefix: 'xmlns', name: 'xlink', namespace: NS.XMLNS }
};
//SVG tag names adjustment map
const SVG_TAG_NAMES_ADJUSTMENT_MAP = (exports.SVG_TAG_NAMES_ADJUSTMENT_MAP = {
altglyph: 'altGlyph',
altglyphdef: 'altGlyphDef',
altglyphitem: 'altGlyphItem',
animatecolor: 'animateColor',
animatemotion: 'animateMotion',
animatetransform: 'animateTransform',
clippath: 'clipPath',
feblend: 'feBlend',
fecolormatrix: 'feColorMatrix',
fecomponenttransfer: 'feComponentTransfer',
fecomposite: 'feComposite',
feconvolvematrix: 'feConvolveMatrix',
fediffuselighting: 'feDiffuseLighting',
fedisplacementmap: 'feDisplacementMap',
fedistantlight: 'feDistantLight',
feflood: 'feFlood',
fefunca: 'feFuncA',
fefuncb: 'feFuncB',
fefuncg: 'feFuncG',
fefuncr: 'feFuncR',
fegaussianblur: 'feGaussianBlur',
feimage: 'feImage',
femerge: 'feMerge',
femergenode: 'feMergeNode',
femorphology: 'feMorphology',
feoffset: 'feOffset',
fepointlight: 'fePointLight',
fespecularlighting: 'feSpecularLighting',
fespotlight: 'feSpotLight',
fetile: 'feTile',
feturbulence: 'feTurbulence',
foreignobject: 'foreignObject',
glyphref: 'glyphRef',
lineargradient: 'linearGradient',
radialgradient: 'radialGradient',
textpath: 'textPath'
});
//Tags that causes exit from foreign content
const EXITS_FOREIGN_CONTENT = {
[$.B]: true,
[$.BIG]: true,
[$.BLOCKQUOTE]: true,
[$.BODY]: true,
[$.BR]: true,
[$.CENTER]: true,
[$.CODE]: true,
[$.DD]: true,
[$.DIV]: true,
[$.DL]: true,
[$.DT]: true,
[$.EM]: true,
[$.EMBED]: true,
[$.H1]: true,
[$.H2]: true,
[$.H3]: true,
[$.H4]: true,
[$.H5]: true,
[$.H6]: true,
[$.HEAD]: true,
[$.HR]: true,
[$.I]: true,
[$.IMG]: true,
[$.LI]: true,
[$.LISTING]: true,
[$.MENU]: true,
[$.META]: true,
[$.NOBR]: true,
[$.OL]: true,
[$.P]: true,
[$.PRE]: true,
[$.RUBY]: true,
[$.S]: true,
[$.SMALL]: true,
[$.SPAN]: true,
[$.STRONG]: true,
[$.STRIKE]: true,
[$.SUB]: true,
[$.SUP]: true,
[$.TABLE]: true,
[$.TT]: true,
[$.U]: true,
[$.UL]: true,
[$.VAR]: true
};
//Check exit from foreign content
exports.causesExit = function(startTagToken) {
const tn = startTagToken.tagName;
const isFontWithAttrs =
tn === $.FONT &&
(Tokenizer.getTokenAttr(startTagToken, ATTRS.COLOR) !== null ||
Tokenizer.getTokenAttr(startTagToken, ATTRS.SIZE) !== null ||
Tokenizer.getTokenAttr(startTagToken, ATTRS.FACE) !== null);
return isFontWithAttrs ? true : EXITS_FOREIGN_CONTENT[tn];
};
//Token adjustments
exports.adjustTokenMathMLAttrs = function(token) {
for (let i = 0; i < token.attrs.length; i++) {
if (token.attrs[i].name === DEFINITION_URL_ATTR) {
token.attrs[i].name = ADJUSTED_DEFINITION_URL_ATTR;
break;
}
}
};
exports.adjustTokenSVGAttrs = function(token) {
for (let i = 0; i < token.attrs.length; i++) {
const adjustedAttrName = SVG_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name];
if (adjustedAttrName) {
token.attrs[i].name = adjustedAttrName;
}
}
};
exports.adjustTokenXMLAttrs = function(token) {
for (let i = 0; i < token.attrs.length; i++) {
const adjustedAttrEntry = XML_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name];
if (adjustedAttrEntry) {
token.attrs[i].prefix = adjustedAttrEntry.prefix;
token.attrs[i].name = adjustedAttrEntry.name;
token.attrs[i].namespace = adjustedAttrEntry.namespace;
}
}
};
exports.adjustTokenSVGTagName = function(token) {
const adjustedTagName = SVG_TAG_NAMES_ADJUSTMENT_MAP[token.tagName];
if (adjustedTagName) {
token.tagName = adjustedTagName;
}
};
//Integration points
function isMathMLTextIntegrationPoint(tn, ns) {
return ns === NS.MATHML && (tn === $.MI || tn === $.MO || tn === $.MN || tn === $.MS || tn === $.MTEXT);
}
function isHtmlIntegrationPoint(tn, ns, attrs) {
if (ns === NS.MATHML && tn === $.ANNOTATION_XML) {
for (let i = 0; i < attrs.length; i++) {
if (attrs[i].name === ATTRS.ENCODING) {
const value = attrs[i].value.toLowerCase();
return value === MIME_TYPES.TEXT_HTML || value === MIME_TYPES.APPLICATION_XML;
}
}
}
return ns === NS.SVG && (tn === $.FOREIGN_OBJECT || tn === $.DESC || tn === $.TITLE);
}
exports.isIntegrationPoint = function(tn, ns, attrs, foreignNS) {
if ((!foreignNS || foreignNS === NS.HTML) && isHtmlIntegrationPoint(tn, ns, attrs)) {
return true;
}
if ((!foreignNS || foreignNS === NS.MATHML) && isMathMLTextIntegrationPoint(tn, ns)) {
return true;
}
return false;
};

View File

@ -0,0 +1,272 @@
'use strict';
const NS = (exports.NAMESPACES = {
HTML: 'http://www.w3.org/1999/xhtml',
MATHML: 'http://www.w3.org/1998/Math/MathML',
SVG: 'http://www.w3.org/2000/svg',
XLINK: 'http://www.w3.org/1999/xlink',
XML: 'http://www.w3.org/XML/1998/namespace',
XMLNS: 'http://www.w3.org/2000/xmlns/'
});
exports.ATTRS = {
TYPE: 'type',
ACTION: 'action',
ENCODING: 'encoding',
PROMPT: 'prompt',
NAME: 'name',
COLOR: 'color',
FACE: 'face',
SIZE: 'size'
};
exports.DOCUMENT_MODE = {
NO_QUIRKS: 'no-quirks',
QUIRKS: 'quirks',
LIMITED_QUIRKS: 'limited-quirks'
};
const $ = (exports.TAG_NAMES = {
A: 'a',
ADDRESS: 'address',
ANNOTATION_XML: 'annotation-xml',
APPLET: 'applet',
AREA: 'area',
ARTICLE: 'article',
ASIDE: 'aside',
B: 'b',
BASE: 'base',
BASEFONT: 'basefont',
BGSOUND: 'bgsound',
BIG: 'big',
BLOCKQUOTE: 'blockquote',
BODY: 'body',
BR: 'br',
BUTTON: 'button',
CAPTION: 'caption',
CENTER: 'center',
CODE: 'code',
COL: 'col',
COLGROUP: 'colgroup',
DD: 'dd',
DESC: 'desc',
DETAILS: 'details',
DIALOG: 'dialog',
DIR: 'dir',
DIV: 'div',
DL: 'dl',
DT: 'dt',
EM: 'em',
EMBED: 'embed',
FIELDSET: 'fieldset',
FIGCAPTION: 'figcaption',
FIGURE: 'figure',
FONT: 'font',
FOOTER: 'footer',
FOREIGN_OBJECT: 'foreignObject',
FORM: 'form',
FRAME: 'frame',
FRAMESET: 'frameset',
H1: 'h1',
H2: 'h2',
H3: 'h3',
H4: 'h4',
H5: 'h5',
H6: 'h6',
HEAD: 'head',
HEADER: 'header',
HGROUP: 'hgroup',
HR: 'hr',
HTML: 'html',
I: 'i',
IMG: 'img',
IMAGE: 'image',
INPUT: 'input',
IFRAME: 'iframe',
KEYGEN: 'keygen',
LABEL: 'label',
LI: 'li',
LINK: 'link',
LISTING: 'listing',
MAIN: 'main',
MALIGNMARK: 'malignmark',
MARQUEE: 'marquee',
MATH: 'math',
MENU: 'menu',
META: 'meta',
MGLYPH: 'mglyph',
MI: 'mi',
MO: 'mo',
MN: 'mn',
MS: 'ms',
MTEXT: 'mtext',
NAV: 'nav',
NOBR: 'nobr',
NOFRAMES: 'noframes',
NOEMBED: 'noembed',
NOSCRIPT: 'noscript',
OBJECT: 'object',
OL: 'ol',
OPTGROUP: 'optgroup',
OPTION: 'option',
P: 'p',
PARAM: 'param',
PLAINTEXT: 'plaintext',
PRE: 'pre',
RB: 'rb',
RP: 'rp',
RT: 'rt',
RTC: 'rtc',
RUBY: 'ruby',
S: 's',
SCRIPT: 'script',
SECTION: 'section',
SELECT: 'select',
SOURCE: 'source',
SMALL: 'small',
SPAN: 'span',
STRIKE: 'strike',
STRONG: 'strong',
STYLE: 'style',
SUB: 'sub',
SUMMARY: 'summary',
SUP: 'sup',
TABLE: 'table',
TBODY: 'tbody',
TEMPLATE: 'template',
TEXTAREA: 'textarea',
TFOOT: 'tfoot',
TD: 'td',
TH: 'th',
THEAD: 'thead',
TITLE: 'title',
TR: 'tr',
TRACK: 'track',
TT: 'tt',
U: 'u',
UL: 'ul',
SVG: 'svg',
VAR: 'var',
WBR: 'wbr',
XMP: 'xmp'
});
exports.SPECIAL_ELEMENTS = {
[NS.HTML]: {
[$.ADDRESS]: true,
[$.APPLET]: true,
[$.AREA]: true,
[$.ARTICLE]: true,
[$.ASIDE]: true,
[$.BASE]: true,
[$.BASEFONT]: true,
[$.BGSOUND]: true,
[$.BLOCKQUOTE]: true,
[$.BODY]: true,
[$.BR]: true,
[$.BUTTON]: true,
[$.CAPTION]: true,
[$.CENTER]: true,
[$.COL]: true,
[$.COLGROUP]: true,
[$.DD]: true,
[$.DETAILS]: true,
[$.DIR]: true,
[$.DIV]: true,
[$.DL]: true,
[$.DT]: true,
[$.EMBED]: true,
[$.FIELDSET]: true,
[$.FIGCAPTION]: true,
[$.FIGURE]: true,
[$.FOOTER]: true,
[$.FORM]: true,
[$.FRAME]: true,
[$.FRAMESET]: true,
[$.H1]: true,
[$.H2]: true,
[$.H3]: true,
[$.H4]: true,
[$.H5]: true,
[$.H6]: true,
[$.HEAD]: true,
[$.HEADER]: true,
[$.HGROUP]: true,
[$.HR]: true,
[$.HTML]: true,
[$.IFRAME]: true,
[$.IMG]: true,
[$.INPUT]: true,
[$.LI]: true,
[$.LINK]: true,
[$.LISTING]: true,
[$.MAIN]: true,
[$.MARQUEE]: true,
[$.MENU]: true,
[$.META]: true,
[$.NAV]: true,
[$.NOEMBED]: true,
[$.NOFRAMES]: true,
[$.NOSCRIPT]: true,
[$.OBJECT]: true,
[$.OL]: true,
[$.P]: true,
[$.PARAM]: true,
[$.PLAINTEXT]: true,
[$.PRE]: true,
[$.SCRIPT]: true,
[$.SECTION]: true,
[$.SELECT]: true,
[$.SOURCE]: true,
[$.STYLE]: true,
[$.SUMMARY]: true,
[$.TABLE]: true,
[$.TBODY]: true,
[$.TD]: true,
[$.TEMPLATE]: true,
[$.TEXTAREA]: true,
[$.TFOOT]: true,
[$.TH]: true,
[$.THEAD]: true,
[$.TITLE]: true,
[$.TR]: true,
[$.TRACK]: true,
[$.UL]: true,
[$.WBR]: true,
[$.XMP]: true
},
[NS.MATHML]: {
[$.MI]: true,
[$.MO]: true,
[$.MN]: true,
[$.MS]: true,
[$.MTEXT]: true,
[$.ANNOTATION_XML]: true
},
[NS.SVG]: {
[$.TITLE]: true,
[$.FOREIGN_OBJECT]: true,
[$.DESC]: true
}
};

View File

@ -0,0 +1,109 @@
'use strict';
const UNDEFINED_CODE_POINTS = [
0xfffe,
0xffff,
0x1fffe,
0x1ffff,
0x2fffe,
0x2ffff,
0x3fffe,
0x3ffff,
0x4fffe,
0x4ffff,
0x5fffe,
0x5ffff,
0x6fffe,
0x6ffff,
0x7fffe,
0x7ffff,
0x8fffe,
0x8ffff,
0x9fffe,
0x9ffff,
0xafffe,
0xaffff,
0xbfffe,
0xbffff,
0xcfffe,
0xcffff,
0xdfffe,
0xdffff,
0xefffe,
0xeffff,
0xffffe,
0xfffff,
0x10fffe,
0x10ffff
];
exports.REPLACEMENT_CHARACTER = '\uFFFD';
exports.CODE_POINTS = {
EOF: -1,
NULL: 0x00,
TABULATION: 0x09,
CARRIAGE_RETURN: 0x0d,
LINE_FEED: 0x0a,
FORM_FEED: 0x0c,
SPACE: 0x20,
EXCLAMATION_MARK: 0x21,
QUOTATION_MARK: 0x22,
NUMBER_SIGN: 0x23,
AMPERSAND: 0x26,
APOSTROPHE: 0x27,
HYPHEN_MINUS: 0x2d,
SOLIDUS: 0x2f,
DIGIT_0: 0x30,
DIGIT_9: 0x39,
SEMICOLON: 0x3b,
LESS_THAN_SIGN: 0x3c,
EQUALS_SIGN: 0x3d,
GREATER_THAN_SIGN: 0x3e,
QUESTION_MARK: 0x3f,
LATIN_CAPITAL_A: 0x41,
LATIN_CAPITAL_F: 0x46,
LATIN_CAPITAL_X: 0x58,
LATIN_CAPITAL_Z: 0x5a,
RIGHT_SQUARE_BRACKET: 0x5d,
GRAVE_ACCENT: 0x60,
LATIN_SMALL_A: 0x61,
LATIN_SMALL_F: 0x66,
LATIN_SMALL_X: 0x78,
LATIN_SMALL_Z: 0x7a,
REPLACEMENT_CHARACTER: 0xfffd
};
exports.CODE_POINT_SEQUENCES = {
DASH_DASH_STRING: [0x2d, 0x2d], //--
DOCTYPE_STRING: [0x44, 0x4f, 0x43, 0x54, 0x59, 0x50, 0x45], //DOCTYPE
CDATA_START_STRING: [0x5b, 0x43, 0x44, 0x41, 0x54, 0x41, 0x5b], //[CDATA[
SCRIPT_STRING: [0x73, 0x63, 0x72, 0x69, 0x70, 0x74], //script
PUBLIC_STRING: [0x50, 0x55, 0x42, 0x4c, 0x49, 0x43], //PUBLIC
SYSTEM_STRING: [0x53, 0x59, 0x53, 0x54, 0x45, 0x4d] //SYSTEM
};
//Surrogates
exports.isSurrogate = function(cp) {
return cp >= 0xd800 && cp <= 0xdfff;
};
exports.isSurrogatePair = function(cp) {
return cp >= 0xdc00 && cp <= 0xdfff;
};
exports.getSurrogatePairCodePoint = function(cp1, cp2) {
return (cp1 - 0xd800) * 0x400 + 0x2400 + cp2;
};
//NOTE: excluding NULL and ASCII whitespace
exports.isControlCodePoint = function(cp) {
return (
(cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) ||
(cp >= 0x7f && cp <= 0x9f)
);
};
exports.isUndefinedCodePoint = function(cp) {
return (cp >= 0xfdd0 && cp <= 0xfdef) || UNDEFINED_CODE_POINTS.indexOf(cp) > -1;
};

View File

@ -0,0 +1,43 @@
'use strict';
const Mixin = require('../../utils/mixin');
class ErrorReportingMixinBase extends Mixin {
constructor(host, opts) {
super(host);
this.posTracker = null;
this.onParseError = opts.onParseError;
}
_setErrorLocation(err) {
err.startLine = err.endLine = this.posTracker.line;
err.startCol = err.endCol = this.posTracker.col;
err.startOffset = err.endOffset = this.posTracker.offset;
}
_reportError(code) {
const err = {
code: code,
startLine: -1,
startCol: -1,
startOffset: -1,
endLine: -1,
endCol: -1,
endOffset: -1
};
this._setErrorLocation(err);
this.onParseError(err);
}
_getOverriddenMethods(mxn) {
return {
_err(code) {
mxn._reportError(code);
}
};
}
}
module.exports = ErrorReportingMixinBase;

View File

@ -0,0 +1,52 @@
'use strict';
const ErrorReportingMixinBase = require('./mixin-base');
const ErrorReportingTokenizerMixin = require('./tokenizer-mixin');
const LocationInfoTokenizerMixin = require('../location-info/tokenizer-mixin');
const Mixin = require('../../utils/mixin');
class ErrorReportingParserMixin extends ErrorReportingMixinBase {
constructor(parser, opts) {
super(parser, opts);
this.opts = opts;
this.ctLoc = null;
this.locBeforeToken = false;
}
_setErrorLocation(err) {
if (this.ctLoc) {
err.startLine = this.ctLoc.startLine;
err.startCol = this.ctLoc.startCol;
err.startOffset = this.ctLoc.startOffset;
err.endLine = this.locBeforeToken ? this.ctLoc.startLine : this.ctLoc.endLine;
err.endCol = this.locBeforeToken ? this.ctLoc.startCol : this.ctLoc.endCol;
err.endOffset = this.locBeforeToken ? this.ctLoc.startOffset : this.ctLoc.endOffset;
}
}
_getOverriddenMethods(mxn, orig) {
return {
_bootstrap(document, fragmentContext) {
orig._bootstrap.call(this, document, fragmentContext);
Mixin.install(this.tokenizer, ErrorReportingTokenizerMixin, mxn.opts);
Mixin.install(this.tokenizer, LocationInfoTokenizerMixin);
},
_processInputToken(token) {
mxn.ctLoc = token.location;
orig._processInputToken.call(this, token);
},
_err(code, options) {
mxn.locBeforeToken = options && options.beforeToken;
mxn._reportError(code);
}
};
}
}
module.exports = ErrorReportingParserMixin;

View File

@ -0,0 +1,24 @@
'use strict';
const ErrorReportingMixinBase = require('./mixin-base');
const PositionTrackingPreprocessorMixin = require('../position-tracking/preprocessor-mixin');
const Mixin = require('../../utils/mixin');
class ErrorReportingPreprocessorMixin extends ErrorReportingMixinBase {
constructor(preprocessor, opts) {
super(preprocessor, opts);
this.posTracker = Mixin.install(preprocessor, PositionTrackingPreprocessorMixin);
this.lastErrOffset = -1;
}
_reportError(code) {
//NOTE: avoid reporting error twice on advance/retreat
if (this.lastErrOffset !== this.posTracker.offset) {
this.lastErrOffset = this.posTracker.offset;
super._reportError(code);
}
}
}
module.exports = ErrorReportingPreprocessorMixin;

View File

@ -0,0 +1,17 @@
'use strict';
const ErrorReportingMixinBase = require('./mixin-base');
const ErrorReportingPreprocessorMixin = require('./preprocessor-mixin');
const Mixin = require('../../utils/mixin');
class ErrorReportingTokenizerMixin extends ErrorReportingMixinBase {
constructor(tokenizer, opts) {
super(tokenizer, opts);
const preprocessorMixin = Mixin.install(tokenizer.preprocessor, ErrorReportingPreprocessorMixin, opts);
this.posTracker = preprocessorMixin.posTracker;
}
}
module.exports = ErrorReportingTokenizerMixin;

View File

@ -0,0 +1,35 @@
'use strict';
const Mixin = require('../../utils/mixin');
class LocationInfoOpenElementStackMixin extends Mixin {
constructor(stack, opts) {
super(stack);
this.onItemPop = opts.onItemPop;
}
_getOverriddenMethods(mxn, orig) {
return {
pop() {
mxn.onItemPop(this.current);
orig.pop.call(this);
},
popAllUpToHtmlElement() {
for (let i = this.stackTop; i > 0; i--) {
mxn.onItemPop(this.items[i]);
}
orig.popAllUpToHtmlElement.call(this);
},
remove(element) {
mxn.onItemPop(this.current);
orig.remove.call(this, element);
}
};
}
}
module.exports = LocationInfoOpenElementStackMixin;

View File

@ -0,0 +1,223 @@
'use strict';
const Mixin = require('../../utils/mixin');
const Tokenizer = require('../../tokenizer');
const LocationInfoTokenizerMixin = require('./tokenizer-mixin');
const LocationInfoOpenElementStackMixin = require('./open-element-stack-mixin');
const HTML = require('../../common/html');
//Aliases
const $ = HTML.TAG_NAMES;
class LocationInfoParserMixin extends Mixin {
constructor(parser) {
super(parser);
this.parser = parser;
this.treeAdapter = this.parser.treeAdapter;
this.posTracker = null;
this.lastStartTagToken = null;
this.lastFosterParentingLocation = null;
this.currentToken = null;
}
_setStartLocation(element) {
let loc = null;
if (this.lastStartTagToken) {
loc = Object.assign({}, this.lastStartTagToken.location);
loc.startTag = this.lastStartTagToken.location;
}
this.treeAdapter.setNodeSourceCodeLocation(element, loc);
}
_setEndLocation(element, closingToken) {
const loc = this.treeAdapter.getNodeSourceCodeLocation(element);
if (loc) {
if (closingToken.location) {
const ctLoc = closingToken.location;
const tn = this.treeAdapter.getTagName(element);
// NOTE: For cases like <p> <p> </p> - First 'p' closes without a closing
// tag and for cases like <td> <p> </td> - 'p' closes without a closing tag.
const isClosingEndTag = closingToken.type === Tokenizer.END_TAG_TOKEN && tn === closingToken.tagName;
const endLoc = {};
if (isClosingEndTag) {
endLoc.endTag = Object.assign({}, ctLoc);
endLoc.endLine = ctLoc.endLine;
endLoc.endCol = ctLoc.endCol;
endLoc.endOffset = ctLoc.endOffset;
} else {
endLoc.endLine = ctLoc.startLine;
endLoc.endCol = ctLoc.startCol;
endLoc.endOffset = ctLoc.startOffset;
}
this.treeAdapter.updateNodeSourceCodeLocation(element, endLoc);
}
}
}
_getOverriddenMethods(mxn, orig) {
return {
_bootstrap(document, fragmentContext) {
orig._bootstrap.call(this, document, fragmentContext);
mxn.lastStartTagToken = null;
mxn.lastFosterParentingLocation = null;
mxn.currentToken = null;
const tokenizerMixin = Mixin.install(this.tokenizer, LocationInfoTokenizerMixin);
mxn.posTracker = tokenizerMixin.posTracker;
Mixin.install(this.openElements, LocationInfoOpenElementStackMixin, {
onItemPop: function(element) {
mxn._setEndLocation(element, mxn.currentToken);
}
});
},
_runParsingLoop(scriptHandler) {
orig._runParsingLoop.call(this, scriptHandler);
// NOTE: generate location info for elements
// that remains on open element stack
for (let i = this.openElements.stackTop; i >= 0; i--) {
mxn._setEndLocation(this.openElements.items[i], mxn.currentToken);
}
},
//Token processing
_processTokenInForeignContent(token) {
mxn.currentToken = token;
orig._processTokenInForeignContent.call(this, token);
},
_processToken(token) {
mxn.currentToken = token;
orig._processToken.call(this, token);
//NOTE: <body> and <html> are never popped from the stack, so we need to updated
//their end location explicitly.
const requireExplicitUpdate =
token.type === Tokenizer.END_TAG_TOKEN &&
(token.tagName === $.HTML || (token.tagName === $.BODY && this.openElements.hasInScope($.BODY)));
if (requireExplicitUpdate) {
for (let i = this.openElements.stackTop; i >= 0; i--) {
const element = this.openElements.items[i];
if (this.treeAdapter.getTagName(element) === token.tagName) {
mxn._setEndLocation(element, token);
break;
}
}
}
},
//Doctype
_setDocumentType(token) {
orig._setDocumentType.call(this, token);
const documentChildren = this.treeAdapter.getChildNodes(this.document);
const cnLength = documentChildren.length;
for (let i = 0; i < cnLength; i++) {
const node = documentChildren[i];
if (this.treeAdapter.isDocumentTypeNode(node)) {
this.treeAdapter.setNodeSourceCodeLocation(node, token.location);
break;
}
}
},
//Elements
_attachElementToTree(element) {
//NOTE: _attachElementToTree is called from _appendElement, _insertElement and _insertTemplate methods.
//So we will use token location stored in this methods for the element.
mxn._setStartLocation(element);
mxn.lastStartTagToken = null;
orig._attachElementToTree.call(this, element);
},
_appendElement(token, namespaceURI) {
mxn.lastStartTagToken = token;
orig._appendElement.call(this, token, namespaceURI);
},
_insertElement(token, namespaceURI) {
mxn.lastStartTagToken = token;
orig._insertElement.call(this, token, namespaceURI);
},
_insertTemplate(token) {
mxn.lastStartTagToken = token;
orig._insertTemplate.call(this, token);
const tmplContent = this.treeAdapter.getTemplateContent(this.openElements.current);
this.treeAdapter.setNodeSourceCodeLocation(tmplContent, null);
},
_insertFakeRootElement() {
orig._insertFakeRootElement.call(this);
this.treeAdapter.setNodeSourceCodeLocation(this.openElements.current, null);
},
//Comments
_appendCommentNode(token, parent) {
orig._appendCommentNode.call(this, token, parent);
const children = this.treeAdapter.getChildNodes(parent);
const commentNode = children[children.length - 1];
this.treeAdapter.setNodeSourceCodeLocation(commentNode, token.location);
},
//Text
_findFosterParentingLocation() {
//NOTE: store last foster parenting location, so we will be able to find inserted text
//in case of foster parenting
mxn.lastFosterParentingLocation = orig._findFosterParentingLocation.call(this);
return mxn.lastFosterParentingLocation;
},
_insertCharacters(token) {
orig._insertCharacters.call(this, token);
const hasFosterParent = this._shouldFosterParentOnInsertion();
const parent =
(hasFosterParent && mxn.lastFosterParentingLocation.parent) ||
this.openElements.currentTmplContent ||
this.openElements.current;
const siblings = this.treeAdapter.getChildNodes(parent);
const textNodeIdx =
hasFosterParent && mxn.lastFosterParentingLocation.beforeElement
? siblings.indexOf(mxn.lastFosterParentingLocation.beforeElement) - 1
: siblings.length - 1;
const textNode = siblings[textNodeIdx];
//NOTE: if we have location assigned by another token, then just update end position
const tnLoc = this.treeAdapter.getNodeSourceCodeLocation(textNode);
if (tnLoc) {
const { endLine, endCol, endOffset } = token.location;
this.treeAdapter.updateNodeSourceCodeLocation(textNode, { endLine, endCol, endOffset });
} else {
this.treeAdapter.setNodeSourceCodeLocation(textNode, token.location);
}
}
};
}
}
module.exports = LocationInfoParserMixin;

View File

@ -0,0 +1,146 @@
'use strict';
const Mixin = require('../../utils/mixin');
const Tokenizer = require('../../tokenizer');
const PositionTrackingPreprocessorMixin = require('../position-tracking/preprocessor-mixin');
class LocationInfoTokenizerMixin extends Mixin {
constructor(tokenizer) {
super(tokenizer);
this.tokenizer = tokenizer;
this.posTracker = Mixin.install(tokenizer.preprocessor, PositionTrackingPreprocessorMixin);
this.currentAttrLocation = null;
this.ctLoc = null;
}
_getCurrentLocation() {
return {
startLine: this.posTracker.line,
startCol: this.posTracker.col,
startOffset: this.posTracker.offset,
endLine: -1,
endCol: -1,
endOffset: -1
};
}
_attachCurrentAttrLocationInfo() {
this.currentAttrLocation.endLine = this.posTracker.line;
this.currentAttrLocation.endCol = this.posTracker.col;
this.currentAttrLocation.endOffset = this.posTracker.offset;
const currentToken = this.tokenizer.currentToken;
const currentAttr = this.tokenizer.currentAttr;
if (!currentToken.location.attrs) {
currentToken.location.attrs = Object.create(null);
}
currentToken.location.attrs[currentAttr.name] = this.currentAttrLocation;
}
_getOverriddenMethods(mxn, orig) {
const methods = {
_createStartTagToken() {
orig._createStartTagToken.call(this);
this.currentToken.location = mxn.ctLoc;
},
_createEndTagToken() {
orig._createEndTagToken.call(this);
this.currentToken.location = mxn.ctLoc;
},
_createCommentToken() {
orig._createCommentToken.call(this);
this.currentToken.location = mxn.ctLoc;
},
_createDoctypeToken(initialName) {
orig._createDoctypeToken.call(this, initialName);
this.currentToken.location = mxn.ctLoc;
},
_createCharacterToken(type, ch) {
orig._createCharacterToken.call(this, type, ch);
this.currentCharacterToken.location = mxn.ctLoc;
},
_createEOFToken() {
orig._createEOFToken.call(this);
this.currentToken.location = mxn._getCurrentLocation();
},
_createAttr(attrNameFirstCh) {
orig._createAttr.call(this, attrNameFirstCh);
mxn.currentAttrLocation = mxn._getCurrentLocation();
},
_leaveAttrName(toState) {
orig._leaveAttrName.call(this, toState);
mxn._attachCurrentAttrLocationInfo();
},
_leaveAttrValue(toState) {
orig._leaveAttrValue.call(this, toState);
mxn._attachCurrentAttrLocationInfo();
},
_emitCurrentToken() {
const ctLoc = this.currentToken.location;
//NOTE: if we have pending character token make it's end location equal to the
//current token's start location.
if (this.currentCharacterToken) {
this.currentCharacterToken.location.endLine = ctLoc.startLine;
this.currentCharacterToken.location.endCol = ctLoc.startCol;
this.currentCharacterToken.location.endOffset = ctLoc.startOffset;
}
if (this.currentToken.type === Tokenizer.EOF_TOKEN) {
ctLoc.endLine = ctLoc.startLine;
ctLoc.endCol = ctLoc.startCol;
ctLoc.endOffset = ctLoc.startOffset;
} else {
ctLoc.endLine = mxn.posTracker.line;
ctLoc.endCol = mxn.posTracker.col + 1;
ctLoc.endOffset = mxn.posTracker.offset + 1;
}
orig._emitCurrentToken.call(this);
},
_emitCurrentCharacterToken() {
const ctLoc = this.currentCharacterToken && this.currentCharacterToken.location;
//NOTE: if we have character token and it's location wasn't set in the _emitCurrentToken(),
//then set it's location at the current preprocessor position.
//We don't need to increment preprocessor position, since character token
//emission is always forced by the start of the next character token here.
//So, we already have advanced position.
if (ctLoc && ctLoc.endOffset === -1) {
ctLoc.endLine = mxn.posTracker.line;
ctLoc.endCol = mxn.posTracker.col;
ctLoc.endOffset = mxn.posTracker.offset;
}
orig._emitCurrentCharacterToken.call(this);
}
};
//NOTE: patch initial states for each mode to obtain token start position
Object.keys(Tokenizer.MODE).forEach(modeName => {
const state = Tokenizer.MODE[modeName];
methods[state] = function(cp) {
mxn.ctLoc = mxn._getCurrentLocation();
orig[state].call(this, cp);
};
});
return methods;
}
}
module.exports = LocationInfoTokenizerMixin;

View File

@ -0,0 +1,64 @@
'use strict';
const Mixin = require('../../utils/mixin');
class PositionTrackingPreprocessorMixin extends Mixin {
constructor(preprocessor) {
super(preprocessor);
this.preprocessor = preprocessor;
this.isEol = false;
this.lineStartPos = 0;
this.droppedBufferSize = 0;
this.offset = 0;
this.col = 0;
this.line = 1;
}
_getOverriddenMethods(mxn, orig) {
return {
advance() {
const pos = this.pos + 1;
const ch = this.html[pos];
//NOTE: LF should be in the last column of the line
if (mxn.isEol) {
mxn.isEol = false;
mxn.line++;
mxn.lineStartPos = pos;
}
if (ch === '\n' || (ch === '\r' && this.html[pos + 1] !== '\n')) {
mxn.isEol = true;
}
mxn.col = pos - mxn.lineStartPos + 1;
mxn.offset = mxn.droppedBufferSize + pos;
return orig.advance.call(this);
},
retreat() {
orig.retreat.call(this);
mxn.isEol = false;
mxn.col = this.pos - mxn.lineStartPos + 1;
},
dropParsedChunk() {
const prevPos = this.pos;
orig.dropParsedChunk.call(this);
const reduction = prevPos - this.pos;
mxn.lineStartPos -= reduction;
mxn.droppedBufferSize += reduction;
mxn.offset = mxn.droppedBufferSize + this.pos;
}
};
}
}
module.exports = PositionTrackingPreprocessorMixin;

View File

@ -0,0 +1,29 @@
'use strict';
const Parser = require('./parser');
const Serializer = require('./serializer');
// Shorthands
exports.parse = function parse(html, options) {
const parser = new Parser(options);
return parser.parse(html);
};
exports.parseFragment = function parseFragment(fragmentContext, html, options) {
if (typeof fragmentContext === 'string') {
options = html;
html = fragmentContext;
fragmentContext = null;
}
const parser = new Parser(options);
return parser.parseFragment(html, fragmentContext);
};
exports.serialize = function(node, options) {
const serializer = new Serializer(node, options);
return serializer.serialize();
};

View File

@ -0,0 +1,181 @@
'use strict';
//Const
const NOAH_ARK_CAPACITY = 3;
//List of formatting elements
class FormattingElementList {
constructor(treeAdapter) {
this.length = 0;
this.entries = [];
this.treeAdapter = treeAdapter;
this.bookmark = null;
}
//Noah Ark's condition
//OPTIMIZATION: at first we try to find possible candidates for exclusion using
//lightweight heuristics without thorough attributes check.
_getNoahArkConditionCandidates(newElement) {
const candidates = [];
if (this.length >= NOAH_ARK_CAPACITY) {
const neAttrsLength = this.treeAdapter.getAttrList(newElement).length;
const neTagName = this.treeAdapter.getTagName(newElement);
const neNamespaceURI = this.treeAdapter.getNamespaceURI(newElement);
for (let i = this.length - 1; i >= 0; i--) {
const entry = this.entries[i];
if (entry.type === FormattingElementList.MARKER_ENTRY) {
break;
}
const element = entry.element;
const elementAttrs = this.treeAdapter.getAttrList(element);
const isCandidate =
this.treeAdapter.getTagName(element) === neTagName &&
this.treeAdapter.getNamespaceURI(element) === neNamespaceURI &&
elementAttrs.length === neAttrsLength;
if (isCandidate) {
candidates.push({ idx: i, attrs: elementAttrs });
}
}
}
return candidates.length < NOAH_ARK_CAPACITY ? [] : candidates;
}
_ensureNoahArkCondition(newElement) {
const candidates = this._getNoahArkConditionCandidates(newElement);
let cLength = candidates.length;
if (cLength) {
const neAttrs = this.treeAdapter.getAttrList(newElement);
const neAttrsLength = neAttrs.length;
const neAttrsMap = Object.create(null);
//NOTE: build attrs map for the new element so we can perform fast lookups
for (let i = 0; i < neAttrsLength; i++) {
const neAttr = neAttrs[i];
neAttrsMap[neAttr.name] = neAttr.value;
}
for (let i = 0; i < neAttrsLength; i++) {
for (let j = 0; j < cLength; j++) {
const cAttr = candidates[j].attrs[i];
if (neAttrsMap[cAttr.name] !== cAttr.value) {
candidates.splice(j, 1);
cLength--;
}
if (candidates.length < NOAH_ARK_CAPACITY) {
return;
}
}
}
//NOTE: remove bottommost candidates until Noah's Ark condition will not be met
for (let i = cLength - 1; i >= NOAH_ARK_CAPACITY - 1; i--) {
this.entries.splice(candidates[i].idx, 1);
this.length--;
}
}
}
//Mutations
insertMarker() {
this.entries.push({ type: FormattingElementList.MARKER_ENTRY });
this.length++;
}
pushElement(element, token) {
this._ensureNoahArkCondition(element);
this.entries.push({
type: FormattingElementList.ELEMENT_ENTRY,
element: element,
token: token
});
this.length++;
}
insertElementAfterBookmark(element, token) {
let bookmarkIdx = this.length - 1;
for (; bookmarkIdx >= 0; bookmarkIdx--) {
if (this.entries[bookmarkIdx] === this.bookmark) {
break;
}
}
this.entries.splice(bookmarkIdx + 1, 0, {
type: FormattingElementList.ELEMENT_ENTRY,
element: element,
token: token
});
this.length++;
}
removeEntry(entry) {
for (let i = this.length - 1; i >= 0; i--) {
if (this.entries[i] === entry) {
this.entries.splice(i, 1);
this.length--;
break;
}
}
}
clearToLastMarker() {
while (this.length) {
const entry = this.entries.pop();
this.length--;
if (entry.type === FormattingElementList.MARKER_ENTRY) {
break;
}
}
}
//Search
getElementEntryInScopeWithTagName(tagName) {
for (let i = this.length - 1; i >= 0; i--) {
const entry = this.entries[i];
if (entry.type === FormattingElementList.MARKER_ENTRY) {
return null;
}
if (this.treeAdapter.getTagName(entry.element) === tagName) {
return entry;
}
}
return null;
}
getElementEntry(element) {
for (let i = this.length - 1; i >= 0; i--) {
const entry = this.entries[i];
if (entry.type === FormattingElementList.ELEMENT_ENTRY && entry.element === element) {
return entry;
}
}
return null;
}
}
//Entry types
FormattingElementList.MARKER_ENTRY = 'MARKER_ENTRY';
FormattingElementList.ELEMENT_ENTRY = 'ELEMENT_ENTRY';
module.exports = FormattingElementList;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,482 @@
'use strict';
const HTML = require('../common/html');
//Aliases
const $ = HTML.TAG_NAMES;
const NS = HTML.NAMESPACES;
//Element utils
//OPTIMIZATION: Integer comparisons are low-cost, so we can use very fast tag name length filters here.
//It's faster than using dictionary.
function isImpliedEndTagRequired(tn) {
switch (tn.length) {
case 1:
return tn === $.P;
case 2:
return tn === $.RB || tn === $.RP || tn === $.RT || tn === $.DD || tn === $.DT || tn === $.LI;
case 3:
return tn === $.RTC;
case 6:
return tn === $.OPTION;
case 8:
return tn === $.OPTGROUP;
}
return false;
}
function isImpliedEndTagRequiredThoroughly(tn) {
switch (tn.length) {
case 1:
return tn === $.P;
case 2:
return (
tn === $.RB ||
tn === $.RP ||
tn === $.RT ||
tn === $.DD ||
tn === $.DT ||
tn === $.LI ||
tn === $.TD ||
tn === $.TH ||
tn === $.TR
);
case 3:
return tn === $.RTC;
case 5:
return tn === $.TBODY || tn === $.TFOOT || tn === $.THEAD;
case 6:
return tn === $.OPTION;
case 7:
return tn === $.CAPTION;
case 8:
return tn === $.OPTGROUP || tn === $.COLGROUP;
}
return false;
}
function isScopingElement(tn, ns) {
switch (tn.length) {
case 2:
if (tn === $.TD || tn === $.TH) {
return ns === NS.HTML;
} else if (tn === $.MI || tn === $.MO || tn === $.MN || tn === $.MS) {
return ns === NS.MATHML;
}
break;
case 4:
if (tn === $.HTML) {
return ns === NS.HTML;
} else if (tn === $.DESC) {
return ns === NS.SVG;
}
break;
case 5:
if (tn === $.TABLE) {
return ns === NS.HTML;
} else if (tn === $.MTEXT) {
return ns === NS.MATHML;
} else if (tn === $.TITLE) {
return ns === NS.SVG;
}
break;
case 6:
return (tn === $.APPLET || tn === $.OBJECT) && ns === NS.HTML;
case 7:
return (tn === $.CAPTION || tn === $.MARQUEE) && ns === NS.HTML;
case 8:
return tn === $.TEMPLATE && ns === NS.HTML;
case 13:
return tn === $.FOREIGN_OBJECT && ns === NS.SVG;
case 14:
return tn === $.ANNOTATION_XML && ns === NS.MATHML;
}
return false;
}
//Stack of open elements
class OpenElementStack {
constructor(document, treeAdapter) {
this.stackTop = -1;
this.items = [];
this.current = document;
this.currentTagName = null;
this.currentTmplContent = null;
this.tmplCount = 0;
this.treeAdapter = treeAdapter;
}
//Index of element
_indexOf(element) {
let idx = -1;
for (let i = this.stackTop; i >= 0; i--) {
if (this.items[i] === element) {
idx = i;
break;
}
}
return idx;
}
//Update current element
_isInTemplate() {
return this.currentTagName === $.TEMPLATE && this.treeAdapter.getNamespaceURI(this.current) === NS.HTML;
}
_updateCurrentElement() {
this.current = this.items[this.stackTop];
this.currentTagName = this.current && this.treeAdapter.getTagName(this.current);
this.currentTmplContent = this._isInTemplate() ? this.treeAdapter.getTemplateContent(this.current) : null;
}
//Mutations
push(element) {
this.items[++this.stackTop] = element;
this._updateCurrentElement();
if (this._isInTemplate()) {
this.tmplCount++;
}
}
pop() {
this.stackTop--;
if (this.tmplCount > 0 && this._isInTemplate()) {
this.tmplCount--;
}
this._updateCurrentElement();
}
replace(oldElement, newElement) {
const idx = this._indexOf(oldElement);
this.items[idx] = newElement;
if (idx === this.stackTop) {
this._updateCurrentElement();
}
}
insertAfter(referenceElement, newElement) {
const insertionIdx = this._indexOf(referenceElement) + 1;
this.items.splice(insertionIdx, 0, newElement);
if (insertionIdx === ++this.stackTop) {
this._updateCurrentElement();
}
}
popUntilTagNamePopped(tagName) {
while (this.stackTop > -1) {
const tn = this.currentTagName;
const ns = this.treeAdapter.getNamespaceURI(this.current);
this.pop();
if (tn === tagName && ns === NS.HTML) {
break;
}
}
}
popUntilElementPopped(element) {
while (this.stackTop > -1) {
const poppedElement = this.current;
this.pop();
if (poppedElement === element) {
break;
}
}
}
popUntilNumberedHeaderPopped() {
while (this.stackTop > -1) {
const tn = this.currentTagName;
const ns = this.treeAdapter.getNamespaceURI(this.current);
this.pop();
if (
tn === $.H1 ||
tn === $.H2 ||
tn === $.H3 ||
tn === $.H4 ||
tn === $.H5 ||
(tn === $.H6 && ns === NS.HTML)
) {
break;
}
}
}
popUntilTableCellPopped() {
while (this.stackTop > -1) {
const tn = this.currentTagName;
const ns = this.treeAdapter.getNamespaceURI(this.current);
this.pop();
if (tn === $.TD || (tn === $.TH && ns === NS.HTML)) {
break;
}
}
}
popAllUpToHtmlElement() {
//NOTE: here we assume that root <html> element is always first in the open element stack, so
//we perform this fast stack clean up.
this.stackTop = 0;
this._updateCurrentElement();
}
clearBackToTableContext() {
while (
(this.currentTagName !== $.TABLE && this.currentTagName !== $.TEMPLATE && this.currentTagName !== $.HTML) ||
this.treeAdapter.getNamespaceURI(this.current) !== NS.HTML
) {
this.pop();
}
}
clearBackToTableBodyContext() {
while (
(this.currentTagName !== $.TBODY &&
this.currentTagName !== $.TFOOT &&
this.currentTagName !== $.THEAD &&
this.currentTagName !== $.TEMPLATE &&
this.currentTagName !== $.HTML) ||
this.treeAdapter.getNamespaceURI(this.current) !== NS.HTML
) {
this.pop();
}
}
clearBackToTableRowContext() {
while (
(this.currentTagName !== $.TR && this.currentTagName !== $.TEMPLATE && this.currentTagName !== $.HTML) ||
this.treeAdapter.getNamespaceURI(this.current) !== NS.HTML
) {
this.pop();
}
}
remove(element) {
for (let i = this.stackTop; i >= 0; i--) {
if (this.items[i] === element) {
this.items.splice(i, 1);
this.stackTop--;
this._updateCurrentElement();
break;
}
}
}
//Search
tryPeekProperlyNestedBodyElement() {
//Properly nested <body> element (should be second element in stack).
const element = this.items[1];
return element && this.treeAdapter.getTagName(element) === $.BODY ? element : null;
}
contains(element) {
return this._indexOf(element) > -1;
}
getCommonAncestor(element) {
let elementIdx = this._indexOf(element);
return --elementIdx >= 0 ? this.items[elementIdx] : null;
}
isRootHtmlElementCurrent() {
return this.stackTop === 0 && this.currentTagName === $.HTML;
}
//Element in scope
hasInScope(tagName) {
for (let i = this.stackTop; i >= 0; i--) {
const tn = this.treeAdapter.getTagName(this.items[i]);
const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
if (tn === tagName && ns === NS.HTML) {
return true;
}
if (isScopingElement(tn, ns)) {
return false;
}
}
return true;
}
hasNumberedHeaderInScope() {
for (let i = this.stackTop; i >= 0; i--) {
const tn = this.treeAdapter.getTagName(this.items[i]);
const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
if (
(tn === $.H1 || tn === $.H2 || tn === $.H3 || tn === $.H4 || tn === $.H5 || tn === $.H6) &&
ns === NS.HTML
) {
return true;
}
if (isScopingElement(tn, ns)) {
return false;
}
}
return true;
}
hasInListItemScope(tagName) {
for (let i = this.stackTop; i >= 0; i--) {
const tn = this.treeAdapter.getTagName(this.items[i]);
const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
if (tn === tagName && ns === NS.HTML) {
return true;
}
if (((tn === $.UL || tn === $.OL) && ns === NS.HTML) || isScopingElement(tn, ns)) {
return false;
}
}
return true;
}
hasInButtonScope(tagName) {
for (let i = this.stackTop; i >= 0; i--) {
const tn = this.treeAdapter.getTagName(this.items[i]);
const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
if (tn === tagName && ns === NS.HTML) {
return true;
}
if ((tn === $.BUTTON && ns === NS.HTML) || isScopingElement(tn, ns)) {
return false;
}
}
return true;
}
hasInTableScope(tagName) {
for (let i = this.stackTop; i >= 0; i--) {
const tn = this.treeAdapter.getTagName(this.items[i]);
const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
if (ns !== NS.HTML) {
continue;
}
if (tn === tagName) {
return true;
}
if (tn === $.TABLE || tn === $.TEMPLATE || tn === $.HTML) {
return false;
}
}
return true;
}
hasTableBodyContextInTableScope() {
for (let i = this.stackTop; i >= 0; i--) {
const tn = this.treeAdapter.getTagName(this.items[i]);
const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
if (ns !== NS.HTML) {
continue;
}
if (tn === $.TBODY || tn === $.THEAD || tn === $.TFOOT) {
return true;
}
if (tn === $.TABLE || tn === $.HTML) {
return false;
}
}
return true;
}
hasInSelectScope(tagName) {
for (let i = this.stackTop; i >= 0; i--) {
const tn = this.treeAdapter.getTagName(this.items[i]);
const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
if (ns !== NS.HTML) {
continue;
}
if (tn === tagName) {
return true;
}
if (tn !== $.OPTION && tn !== $.OPTGROUP) {
return false;
}
}
return true;
}
//Implied end tags
generateImpliedEndTags() {
while (isImpliedEndTagRequired(this.currentTagName)) {
this.pop();
}
}
generateImpliedEndTagsThoroughly() {
while (isImpliedEndTagRequiredThoroughly(this.currentTagName)) {
this.pop();
}
}
generateImpliedEndTagsWithExclusion(exclusionTagName) {
while (isImpliedEndTagRequired(this.currentTagName) && this.currentTagName !== exclusionTagName) {
this.pop();
}
}
}
module.exports = OpenElementStack;

View File

@ -0,0 +1,176 @@
'use strict';
const defaultTreeAdapter = require('../tree-adapters/default');
const mergeOptions = require('../utils/merge-options');
const doctype = require('../common/doctype');
const HTML = require('../common/html');
//Aliases
const $ = HTML.TAG_NAMES;
const NS = HTML.NAMESPACES;
//Default serializer options
const DEFAULT_OPTIONS = {
treeAdapter: defaultTreeAdapter
};
//Escaping regexes
const AMP_REGEX = /&/g;
const NBSP_REGEX = /\u00a0/g;
const DOUBLE_QUOTE_REGEX = /"/g;
const LT_REGEX = /</g;
const GT_REGEX = />/g;
//Serializer
class Serializer {
constructor(node, options) {
this.options = mergeOptions(DEFAULT_OPTIONS, options);
this.treeAdapter = this.options.treeAdapter;
this.html = '';
this.startNode = node;
}
//API
serialize() {
this._serializeChildNodes(this.startNode);
return this.html;
}
//Internals
_serializeChildNodes(parentNode) {
const childNodes = this.treeAdapter.getChildNodes(parentNode);
if (childNodes) {
for (let i = 0, cnLength = childNodes.length; i < cnLength; i++) {
const currentNode = childNodes[i];
if (this.treeAdapter.isElementNode(currentNode)) {
this._serializeElement(currentNode);
} else if (this.treeAdapter.isTextNode(currentNode)) {
this._serializeTextNode(currentNode);
} else if (this.treeAdapter.isCommentNode(currentNode)) {
this._serializeCommentNode(currentNode);
} else if (this.treeAdapter.isDocumentTypeNode(currentNode)) {
this._serializeDocumentTypeNode(currentNode);
}
}
}
}
_serializeElement(node) {
const tn = this.treeAdapter.getTagName(node);
const ns = this.treeAdapter.getNamespaceURI(node);
this.html += '<' + tn;
this._serializeAttributes(node);
this.html += '>';
if (
tn !== $.AREA &&
tn !== $.BASE &&
tn !== $.BASEFONT &&
tn !== $.BGSOUND &&
tn !== $.BR &&
tn !== $.COL &&
tn !== $.EMBED &&
tn !== $.FRAME &&
tn !== $.HR &&
tn !== $.IMG &&
tn !== $.INPUT &&
tn !== $.KEYGEN &&
tn !== $.LINK &&
tn !== $.META &&
tn !== $.PARAM &&
tn !== $.SOURCE &&
tn !== $.TRACK &&
tn !== $.WBR
) {
const childNodesHolder =
tn === $.TEMPLATE && ns === NS.HTML ? this.treeAdapter.getTemplateContent(node) : node;
this._serializeChildNodes(childNodesHolder);
this.html += '</' + tn + '>';
}
}
_serializeAttributes(node) {
const attrs = this.treeAdapter.getAttrList(node);
for (let i = 0, attrsLength = attrs.length; i < attrsLength; i++) {
const attr = attrs[i];
const value = Serializer.escapeString(attr.value, true);
this.html += ' ';
if (!attr.namespace) {
this.html += attr.name;
} else if (attr.namespace === NS.XML) {
this.html += 'xml:' + attr.name;
} else if (attr.namespace === NS.XMLNS) {
if (attr.name !== 'xmlns') {
this.html += 'xmlns:';
}
this.html += attr.name;
} else if (attr.namespace === NS.XLINK) {
this.html += 'xlink:' + attr.name;
} else {
this.html += attr.prefix + ':' + attr.name;
}
this.html += '="' + value + '"';
}
}
_serializeTextNode(node) {
const content = this.treeAdapter.getTextNodeContent(node);
const parent = this.treeAdapter.getParentNode(node);
let parentTn = void 0;
if (parent && this.treeAdapter.isElementNode(parent)) {
parentTn = this.treeAdapter.getTagName(parent);
}
if (
parentTn === $.STYLE ||
parentTn === $.SCRIPT ||
parentTn === $.XMP ||
parentTn === $.IFRAME ||
parentTn === $.NOEMBED ||
parentTn === $.NOFRAMES ||
parentTn === $.PLAINTEXT ||
parentTn === $.NOSCRIPT
) {
this.html += content;
} else {
this.html += Serializer.escapeString(content, false);
}
}
_serializeCommentNode(node) {
this.html += '<!--' + this.treeAdapter.getCommentNodeContent(node) + '-->';
}
_serializeDocumentTypeNode(node) {
const name = this.treeAdapter.getDocumentTypeNodeName(node);
this.html += '<' + doctype.serializeContent(name, null, null) + '>';
}
}
// NOTE: used in tests and by rewriting stream
Serializer.escapeString = function(str, attrMode) {
str = str.replace(AMP_REGEX, '&amp;').replace(NBSP_REGEX, '&nbsp;');
if (attrMode) {
str = str.replace(DOUBLE_QUOTE_REGEX, '&quot;');
} else {
str = str.replace(LT_REGEX, '&lt;').replace(GT_REGEX, '&gt;');
}
return str;
};
module.exports = Serializer;

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,159 @@
'use strict';
const unicode = require('../common/unicode');
const ERR = require('../common/error-codes');
//Aliases
const $ = unicode.CODE_POINTS;
//Const
const DEFAULT_BUFFER_WATERLINE = 1 << 16;
//Preprocessor
//NOTE: HTML input preprocessing
//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream)
class Preprocessor {
constructor() {
this.html = null;
this.pos = -1;
this.lastGapPos = -1;
this.lastCharPos = -1;
this.gapStack = [];
this.skipNextNewLine = false;
this.lastChunkWritten = false;
this.endOfChunkHit = false;
this.bufferWaterline = DEFAULT_BUFFER_WATERLINE;
}
_err() {
// NOTE: err reporting is noop by default. Enabled by mixin.
}
_addGap() {
this.gapStack.push(this.lastGapPos);
this.lastGapPos = this.pos;
}
_processSurrogate(cp) {
//NOTE: try to peek a surrogate pair
if (this.pos !== this.lastCharPos) {
const nextCp = this.html.charCodeAt(this.pos + 1);
if (unicode.isSurrogatePair(nextCp)) {
//NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
this.pos++;
//NOTE: add gap that should be avoided during retreat
this._addGap();
return unicode.getSurrogatePairCodePoint(cp, nextCp);
}
}
//NOTE: we are at the end of a chunk, therefore we can't infer surrogate pair yet.
else if (!this.lastChunkWritten) {
this.endOfChunkHit = true;
return $.EOF;
}
//NOTE: isolated surrogate
this._err(ERR.surrogateInInputStream);
return cp;
}
dropParsedChunk() {
if (this.pos > this.bufferWaterline) {
this.lastCharPos -= this.pos;
this.html = this.html.substring(this.pos);
this.pos = 0;
this.lastGapPos = -1;
this.gapStack = [];
}
}
write(chunk, isLastChunk) {
if (this.html) {
this.html += chunk;
} else {
this.html = chunk;
}
this.lastCharPos = this.html.length - 1;
this.endOfChunkHit = false;
this.lastChunkWritten = isLastChunk;
}
insertHtmlAtCurrentPos(chunk) {
this.html = this.html.substring(0, this.pos + 1) + chunk + this.html.substring(this.pos + 1, this.html.length);
this.lastCharPos = this.html.length - 1;
this.endOfChunkHit = false;
}
advance() {
this.pos++;
if (this.pos > this.lastCharPos) {
this.endOfChunkHit = !this.lastChunkWritten;
return $.EOF;
}
let cp = this.html.charCodeAt(this.pos);
//NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
//must be ignored.
if (this.skipNextNewLine && cp === $.LINE_FEED) {
this.skipNextNewLine = false;
this._addGap();
return this.advance();
}
//NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters
if (cp === $.CARRIAGE_RETURN) {
this.skipNextNewLine = true;
return $.LINE_FEED;
}
this.skipNextNewLine = false;
if (unicode.isSurrogate(cp)) {
cp = this._processSurrogate(cp);
}
//OPTIMIZATION: first check if code point is in the common allowed
//range (ASCII alphanumeric, whitespaces, big chunk of BMP)
//before going into detailed performance cost validation.
const isCommonValidRange =
(cp > 0x1f && cp < 0x7f) || cp === $.LINE_FEED || cp === $.CARRIAGE_RETURN || (cp > 0x9f && cp < 0xfdd0);
if (!isCommonValidRange) {
this._checkForProblematicCharacters(cp);
}
return cp;
}
_checkForProblematicCharacters(cp) {
if (unicode.isControlCodePoint(cp)) {
this._err(ERR.controlCharacterInInputStream);
} else if (unicode.isUndefinedCodePoint(cp)) {
this._err(ERR.noncharacterInInputStream);
}
}
retreat() {
if (this.pos === this.lastGapPos) {
this.lastGapPos = this.gapStack.pop();
this.pos--;
}
this.pos--;
}
}
module.exports = Preprocessor;

View File

@ -0,0 +1,221 @@
'use strict';
const { DOCUMENT_MODE } = require('../common/html');
//Node construction
exports.createDocument = function() {
return {
nodeName: '#document',
mode: DOCUMENT_MODE.NO_QUIRKS,
childNodes: []
};
};
exports.createDocumentFragment = function() {
return {
nodeName: '#document-fragment',
childNodes: []
};
};
exports.createElement = function(tagName, namespaceURI, attrs) {
return {
nodeName: tagName,
tagName: tagName,
attrs: attrs,
namespaceURI: namespaceURI,
childNodes: [],
parentNode: null
};
};
exports.createCommentNode = function(data) {
return {
nodeName: '#comment',
data: data,
parentNode: null
};
};
const createTextNode = function(value) {
return {
nodeName: '#text',
value: value,
parentNode: null
};
};
//Tree mutation
const appendChild = (exports.appendChild = function(parentNode, newNode) {
parentNode.childNodes.push(newNode);
newNode.parentNode = parentNode;
});
const insertBefore = (exports.insertBefore = function(parentNode, newNode, referenceNode) {
const insertionIdx = parentNode.childNodes.indexOf(referenceNode);
parentNode.childNodes.splice(insertionIdx, 0, newNode);
newNode.parentNode = parentNode;
});
exports.setTemplateContent = function(templateElement, contentElement) {
templateElement.content = contentElement;
};
exports.getTemplateContent = function(templateElement) {
return templateElement.content;
};
exports.setDocumentType = function(document, name, publicId, systemId) {
let doctypeNode = null;
for (let i = 0; i < document.childNodes.length; i++) {
if (document.childNodes[i].nodeName === '#documentType') {
doctypeNode = document.childNodes[i];
break;
}
}
if (doctypeNode) {
doctypeNode.name = name;
doctypeNode.publicId = publicId;
doctypeNode.systemId = systemId;
} else {
appendChild(document, {
nodeName: '#documentType',
name: name,
publicId: publicId,
systemId: systemId
});
}
};
exports.setDocumentMode = function(document, mode) {
document.mode = mode;
};
exports.getDocumentMode = function(document) {
return document.mode;
};
exports.detachNode = function(node) {
if (node.parentNode) {
const idx = node.parentNode.childNodes.indexOf(node);
node.parentNode.childNodes.splice(idx, 1);
node.parentNode = null;
}
};
exports.insertText = function(parentNode, text) {
if (parentNode.childNodes.length) {
const prevNode = parentNode.childNodes[parentNode.childNodes.length - 1];
if (prevNode.nodeName === '#text') {
prevNode.value += text;
return;
}
}
appendChild(parentNode, createTextNode(text));
};
exports.insertTextBefore = function(parentNode, text, referenceNode) {
const prevNode = parentNode.childNodes[parentNode.childNodes.indexOf(referenceNode) - 1];
if (prevNode && prevNode.nodeName === '#text') {
prevNode.value += text;
} else {
insertBefore(parentNode, createTextNode(text), referenceNode);
}
};
exports.adoptAttributes = function(recipient, attrs) {
const recipientAttrsMap = [];
for (let i = 0; i < recipient.attrs.length; i++) {
recipientAttrsMap.push(recipient.attrs[i].name);
}
for (let j = 0; j < attrs.length; j++) {
if (recipientAttrsMap.indexOf(attrs[j].name) === -1) {
recipient.attrs.push(attrs[j]);
}
}
};
//Tree traversing
exports.getFirstChild = function(node) {
return node.childNodes[0];
};
exports.getChildNodes = function(node) {
return node.childNodes;
};
exports.getParentNode = function(node) {
return node.parentNode;
};
exports.getAttrList = function(element) {
return element.attrs;
};
//Node data
exports.getTagName = function(element) {
return element.tagName;
};
exports.getNamespaceURI = function(element) {
return element.namespaceURI;
};
exports.getTextNodeContent = function(textNode) {
return textNode.value;
};
exports.getCommentNodeContent = function(commentNode) {
return commentNode.data;
};
exports.getDocumentTypeNodeName = function(doctypeNode) {
return doctypeNode.name;
};
exports.getDocumentTypeNodePublicId = function(doctypeNode) {
return doctypeNode.publicId;
};
exports.getDocumentTypeNodeSystemId = function(doctypeNode) {
return doctypeNode.systemId;
};
//Node types
exports.isTextNode = function(node) {
return node.nodeName === '#text';
};
exports.isCommentNode = function(node) {
return node.nodeName === '#comment';
};
exports.isDocumentTypeNode = function(node) {
return node.nodeName === '#documentType';
};
exports.isElementNode = function(node) {
return !!node.tagName;
};
// Source code location
exports.setNodeSourceCodeLocation = function(node, location) {
node.sourceCodeLocation = location;
};
exports.getNodeSourceCodeLocation = function(node) {
return node.sourceCodeLocation;
};
exports.updateNodeSourceCodeLocation = function(node, endLocation) {
node.sourceCodeLocation = Object.assign(node.sourceCodeLocation, endLocation);
};

View File

@ -0,0 +1,13 @@
'use strict';
module.exports = function mergeOptions(defaults, options) {
options = options || Object.create(null);
return [defaults, options].reduce((merged, optObj) => {
Object.keys(optObj).forEach(key => {
merged[key] = optObj[key];
});
return merged;
}, Object.create(null));
};

View File

@ -0,0 +1,39 @@
'use strict';
class Mixin {
constructor(host) {
const originalMethods = {};
const overriddenMethods = this._getOverriddenMethods(this, originalMethods);
for (const key of Object.keys(overriddenMethods)) {
if (typeof overriddenMethods[key] === 'function') {
originalMethods[key] = host[key];
host[key] = overriddenMethods[key];
}
}
}
_getOverriddenMethods() {
throw new Error('Not implemented');
}
}
Mixin.install = function(host, Ctor, opts) {
if (!host.__mixins) {
host.__mixins = [];
}
for (let i = 0; i < host.__mixins.length; i++) {
if (host.__mixins[i].constructor === Ctor) {
return host.__mixins[i];
}
}
const mixin = new Ctor(host, opts);
host.__mixins.push(mixin);
return mixin;
};
module.exports = Mixin;

View File

@ -0,0 +1,35 @@
{
"name": "parse5",
"description": "HTML parser and serializer.",
"version": "6.0.1",
"author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)",
"contributors": "https://github.com/inikulin/parse5/graphs/contributors",
"homepage": "https://github.com/inikulin/parse5",
"keywords": [
"html",
"parser",
"html5",
"WHATWG",
"specification",
"fast",
"html parser",
"html5 parser",
"htmlparser",
"parse5",
"serializer",
"html serializer",
"htmlserializer",
"parse",
"serialize"
],
"license": "MIT",
"main": "./lib/index.js",
"repository": {
"type": "git",
"url": "git://github.com/inikulin/parse5.git"
},
"files": [
"lib"
],
"gitHead": "37227a3429584903cbd1799dade995266fc2dbe6"
}

View File

@ -0,0 +1,27 @@
{
"name": "parse5-htmlparser2-tree-adapter",
"description": "htmlparser2 tree adapter for parse5.",
"version": "6.0.1",
"author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)",
"contributors": "https://github.com/inikulin/parse5/graphs/contributors",
"homepage": "https://github.com/inikulin/parse5",
"keywords": [
"parse5",
"parser",
"tree adapter",
"htmlparser2"
],
"license": "MIT",
"main": "./lib/index.js",
"dependencies": {
"parse5": "^6.0.1"
},
"repository": {
"type": "git",
"url": "git://github.com/inikulin/parse5.git"
},
"files": [
"lib"
],
"gitHead": "37227a3429584903cbd1799dade995266fc2dbe6"
}