forked from FINAKON/HelpProject
1. Initial Commit - a boiler plate code and POC to realize the concept of context sensitive help 2. Frontend code written in ReactJS 3. Backend code written in Java, Spring Boot Framework 4. Frontend Start: pre-requisites : node, npm npm run dev ==> to start the frontend vite server 5. Backend Start: pre-requisites : java, mvn mvn spring-boot:run ==> to start the backend server 6. Visit http://localhost:5173/ for basic demo of help, press F1 in textboxes 7. Visit http://localhost:5173/editor and enter "admin123" to add/modify texts. Happy Coding !!! Thank you, Bhargava.
2715 lines
105 KiB
JavaScript
2715 lines
105 KiB
JavaScript
"use strict";
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.Tokenizer = exports.TokenizerMode = void 0;
|
|
const preprocessor_js_1 = require("./preprocessor.js");
|
|
const unicode_js_1 = require("../common/unicode.js");
|
|
const token_js_1 = require("../common/token.js");
|
|
const decode_1 = require("entities/decode");
|
|
const error_codes_js_1 = require("../common/error-codes.js");
|
|
const html_js_1 = require("../common/html.js");
|
|
//States
|
|
var State;
|
|
(function (State) {
|
|
State[State["DATA"] = 0] = "DATA";
|
|
State[State["RCDATA"] = 1] = "RCDATA";
|
|
State[State["RAWTEXT"] = 2] = "RAWTEXT";
|
|
State[State["SCRIPT_DATA"] = 3] = "SCRIPT_DATA";
|
|
State[State["PLAINTEXT"] = 4] = "PLAINTEXT";
|
|
State[State["TAG_OPEN"] = 5] = "TAG_OPEN";
|
|
State[State["END_TAG_OPEN"] = 6] = "END_TAG_OPEN";
|
|
State[State["TAG_NAME"] = 7] = "TAG_NAME";
|
|
State[State["RCDATA_LESS_THAN_SIGN"] = 8] = "RCDATA_LESS_THAN_SIGN";
|
|
State[State["RCDATA_END_TAG_OPEN"] = 9] = "RCDATA_END_TAG_OPEN";
|
|
State[State["RCDATA_END_TAG_NAME"] = 10] = "RCDATA_END_TAG_NAME";
|
|
State[State["RAWTEXT_LESS_THAN_SIGN"] = 11] = "RAWTEXT_LESS_THAN_SIGN";
|
|
State[State["RAWTEXT_END_TAG_OPEN"] = 12] = "RAWTEXT_END_TAG_OPEN";
|
|
State[State["RAWTEXT_END_TAG_NAME"] = 13] = "RAWTEXT_END_TAG_NAME";
|
|
State[State["SCRIPT_DATA_LESS_THAN_SIGN"] = 14] = "SCRIPT_DATA_LESS_THAN_SIGN";
|
|
State[State["SCRIPT_DATA_END_TAG_OPEN"] = 15] = "SCRIPT_DATA_END_TAG_OPEN";
|
|
State[State["SCRIPT_DATA_END_TAG_NAME"] = 16] = "SCRIPT_DATA_END_TAG_NAME";
|
|
State[State["SCRIPT_DATA_ESCAPE_START"] = 17] = "SCRIPT_DATA_ESCAPE_START";
|
|
State[State["SCRIPT_DATA_ESCAPE_START_DASH"] = 18] = "SCRIPT_DATA_ESCAPE_START_DASH";
|
|
State[State["SCRIPT_DATA_ESCAPED"] = 19] = "SCRIPT_DATA_ESCAPED";
|
|
State[State["SCRIPT_DATA_ESCAPED_DASH"] = 20] = "SCRIPT_DATA_ESCAPED_DASH";
|
|
State[State["SCRIPT_DATA_ESCAPED_DASH_DASH"] = 21] = "SCRIPT_DATA_ESCAPED_DASH_DASH";
|
|
State[State["SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"] = 22] = "SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN";
|
|
State[State["SCRIPT_DATA_ESCAPED_END_TAG_OPEN"] = 23] = "SCRIPT_DATA_ESCAPED_END_TAG_OPEN";
|
|
State[State["SCRIPT_DATA_ESCAPED_END_TAG_NAME"] = 24] = "SCRIPT_DATA_ESCAPED_END_TAG_NAME";
|
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPE_START"] = 25] = "SCRIPT_DATA_DOUBLE_ESCAPE_START";
|
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPED"] = 26] = "SCRIPT_DATA_DOUBLE_ESCAPED";
|
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH"] = 27] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH";
|
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"] = 28] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH";
|
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"] = 29] = "SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN";
|
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPE_END"] = 30] = "SCRIPT_DATA_DOUBLE_ESCAPE_END";
|
|
State[State["BEFORE_ATTRIBUTE_NAME"] = 31] = "BEFORE_ATTRIBUTE_NAME";
|
|
State[State["ATTRIBUTE_NAME"] = 32] = "ATTRIBUTE_NAME";
|
|
State[State["AFTER_ATTRIBUTE_NAME"] = 33] = "AFTER_ATTRIBUTE_NAME";
|
|
State[State["BEFORE_ATTRIBUTE_VALUE"] = 34] = "BEFORE_ATTRIBUTE_VALUE";
|
|
State[State["ATTRIBUTE_VALUE_DOUBLE_QUOTED"] = 35] = "ATTRIBUTE_VALUE_DOUBLE_QUOTED";
|
|
State[State["ATTRIBUTE_VALUE_SINGLE_QUOTED"] = 36] = "ATTRIBUTE_VALUE_SINGLE_QUOTED";
|
|
State[State["ATTRIBUTE_VALUE_UNQUOTED"] = 37] = "ATTRIBUTE_VALUE_UNQUOTED";
|
|
State[State["AFTER_ATTRIBUTE_VALUE_QUOTED"] = 38] = "AFTER_ATTRIBUTE_VALUE_QUOTED";
|
|
State[State["SELF_CLOSING_START_TAG"] = 39] = "SELF_CLOSING_START_TAG";
|
|
State[State["BOGUS_COMMENT"] = 40] = "BOGUS_COMMENT";
|
|
State[State["MARKUP_DECLARATION_OPEN"] = 41] = "MARKUP_DECLARATION_OPEN";
|
|
State[State["COMMENT_START"] = 42] = "COMMENT_START";
|
|
State[State["COMMENT_START_DASH"] = 43] = "COMMENT_START_DASH";
|
|
State[State["COMMENT"] = 44] = "COMMENT";
|
|
State[State["COMMENT_LESS_THAN_SIGN"] = 45] = "COMMENT_LESS_THAN_SIGN";
|
|
State[State["COMMENT_LESS_THAN_SIGN_BANG"] = 46] = "COMMENT_LESS_THAN_SIGN_BANG";
|
|
State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH"] = 47] = "COMMENT_LESS_THAN_SIGN_BANG_DASH";
|
|
State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"] = 48] = "COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH";
|
|
State[State["COMMENT_END_DASH"] = 49] = "COMMENT_END_DASH";
|
|
State[State["COMMENT_END"] = 50] = "COMMENT_END";
|
|
State[State["COMMENT_END_BANG"] = 51] = "COMMENT_END_BANG";
|
|
State[State["DOCTYPE"] = 52] = "DOCTYPE";
|
|
State[State["BEFORE_DOCTYPE_NAME"] = 53] = "BEFORE_DOCTYPE_NAME";
|
|
State[State["DOCTYPE_NAME"] = 54] = "DOCTYPE_NAME";
|
|
State[State["AFTER_DOCTYPE_NAME"] = 55] = "AFTER_DOCTYPE_NAME";
|
|
State[State["AFTER_DOCTYPE_PUBLIC_KEYWORD"] = 56] = "AFTER_DOCTYPE_PUBLIC_KEYWORD";
|
|
State[State["BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"] = 57] = "BEFORE_DOCTYPE_PUBLIC_IDENTIFIER";
|
|
State[State["DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"] = 58] = "DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED";
|
|
State[State["DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"] = 59] = "DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED";
|
|
State[State["AFTER_DOCTYPE_PUBLIC_IDENTIFIER"] = 60] = "AFTER_DOCTYPE_PUBLIC_IDENTIFIER";
|
|
State[State["BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"] = 61] = "BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS";
|
|
State[State["AFTER_DOCTYPE_SYSTEM_KEYWORD"] = 62] = "AFTER_DOCTYPE_SYSTEM_KEYWORD";
|
|
State[State["BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"] = 63] = "BEFORE_DOCTYPE_SYSTEM_IDENTIFIER";
|
|
State[State["DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"] = 64] = "DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED";
|
|
State[State["DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"] = 65] = "DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED";
|
|
State[State["AFTER_DOCTYPE_SYSTEM_IDENTIFIER"] = 66] = "AFTER_DOCTYPE_SYSTEM_IDENTIFIER";
|
|
State[State["BOGUS_DOCTYPE"] = 67] = "BOGUS_DOCTYPE";
|
|
State[State["CDATA_SECTION"] = 68] = "CDATA_SECTION";
|
|
State[State["CDATA_SECTION_BRACKET"] = 69] = "CDATA_SECTION_BRACKET";
|
|
State[State["CDATA_SECTION_END"] = 70] = "CDATA_SECTION_END";
|
|
State[State["CHARACTER_REFERENCE"] = 71] = "CHARACTER_REFERENCE";
|
|
State[State["AMBIGUOUS_AMPERSAND"] = 72] = "AMBIGUOUS_AMPERSAND";
|
|
})(State || (State = {}));
|
|
//Tokenizer initial states for different modes
|
|
exports.TokenizerMode = {
|
|
DATA: State.DATA,
|
|
RCDATA: State.RCDATA,
|
|
RAWTEXT: State.RAWTEXT,
|
|
SCRIPT_DATA: State.SCRIPT_DATA,
|
|
PLAINTEXT: State.PLAINTEXT,
|
|
CDATA_SECTION: State.CDATA_SECTION,
|
|
};
|
|
//Utils
|
|
//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline
|
|
//this functions if they will be situated in another module due to context switch.
|
|
//Always perform inlining check before modifying this functions ('node --trace-inlining').
|
|
function isAsciiDigit(cp) {
|
|
return cp >= unicode_js_1.CODE_POINTS.DIGIT_0 && cp <= unicode_js_1.CODE_POINTS.DIGIT_9;
|
|
}
|
|
function isAsciiUpper(cp) {
|
|
return cp >= unicode_js_1.CODE_POINTS.LATIN_CAPITAL_A && cp <= unicode_js_1.CODE_POINTS.LATIN_CAPITAL_Z;
|
|
}
|
|
function isAsciiLower(cp) {
|
|
return cp >= unicode_js_1.CODE_POINTS.LATIN_SMALL_A && cp <= unicode_js_1.CODE_POINTS.LATIN_SMALL_Z;
|
|
}
|
|
function isAsciiLetter(cp) {
|
|
return isAsciiLower(cp) || isAsciiUpper(cp);
|
|
}
|
|
function isAsciiAlphaNumeric(cp) {
|
|
return isAsciiLetter(cp) || isAsciiDigit(cp);
|
|
}
|
|
function toAsciiLower(cp) {
|
|
return cp + 32;
|
|
}
|
|
function isWhitespace(cp) {
|
|
return cp === unicode_js_1.CODE_POINTS.SPACE || cp === unicode_js_1.CODE_POINTS.LINE_FEED || cp === unicode_js_1.CODE_POINTS.TABULATION || cp === unicode_js_1.CODE_POINTS.FORM_FEED;
|
|
}
|
|
function isScriptDataDoubleEscapeSequenceEnd(cp) {
|
|
return isWhitespace(cp) || cp === unicode_js_1.CODE_POINTS.SOLIDUS || cp === unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN;
|
|
}
|
|
function getErrorForNumericCharacterReference(code) {
|
|
if (code === unicode_js_1.CODE_POINTS.NULL) {
|
|
return error_codes_js_1.ERR.nullCharacterReference;
|
|
}
|
|
else if (code > 1114111) {
|
|
return error_codes_js_1.ERR.characterReferenceOutsideUnicodeRange;
|
|
}
|
|
else if ((0, unicode_js_1.isSurrogate)(code)) {
|
|
return error_codes_js_1.ERR.surrogateCharacterReference;
|
|
}
|
|
else if ((0, unicode_js_1.isUndefinedCodePoint)(code)) {
|
|
return error_codes_js_1.ERR.noncharacterCharacterReference;
|
|
}
|
|
else if ((0, unicode_js_1.isControlCodePoint)(code) || code === unicode_js_1.CODE_POINTS.CARRIAGE_RETURN) {
|
|
return error_codes_js_1.ERR.controlCharacterReference;
|
|
}
|
|
return null;
|
|
}
|
|
//Tokenizer
|
|
class Tokenizer {
|
|
constructor(options, handler) {
|
|
this.options = options;
|
|
this.handler = handler;
|
|
this.paused = false;
|
|
/** Ensures that the parsing loop isn't run multiple times at once. */
|
|
this.inLoop = false;
|
|
/**
|
|
* Indicates that the current adjusted node exists, is not an element in the HTML namespace,
|
|
* and that it is not an integration point for either MathML or HTML.
|
|
*
|
|
* @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction}
|
|
*/
|
|
this.inForeignNode = false;
|
|
this.lastStartTagName = '';
|
|
this.active = false;
|
|
this.state = State.DATA;
|
|
this.returnState = State.DATA;
|
|
this.entityStartPos = 0;
|
|
this.consumedAfterSnapshot = -1;
|
|
this.currentCharacterToken = null;
|
|
this.currentToken = null;
|
|
this.currentAttr = { name: '', value: '' };
|
|
this.preprocessor = new preprocessor_js_1.Preprocessor(handler);
|
|
this.currentLocation = this.getCurrentLocation(-1);
|
|
this.entityDecoder = new decode_1.EntityDecoder(decode_1.htmlDecodeTree, (cp, consumed) => {
|
|
// Note: Set `pos` _before_ flushing, as flushing might drop
|
|
// the current chunk and invalidate `entityStartPos`.
|
|
this.preprocessor.pos = this.entityStartPos + consumed - 1;
|
|
this._flushCodePointConsumedAsCharacterReference(cp);
|
|
}, handler.onParseError
|
|
? {
|
|
missingSemicolonAfterCharacterReference: () => {
|
|
this._err(error_codes_js_1.ERR.missingSemicolonAfterCharacterReference, 1);
|
|
},
|
|
absenceOfDigitsInNumericCharacterReference: (consumed) => {
|
|
this._err(error_codes_js_1.ERR.absenceOfDigitsInNumericCharacterReference, this.entityStartPos - this.preprocessor.pos + consumed);
|
|
},
|
|
validateNumericCharacterReference: (code) => {
|
|
const error = getErrorForNumericCharacterReference(code);
|
|
if (error)
|
|
this._err(error, 1);
|
|
},
|
|
}
|
|
: undefined);
|
|
}
|
|
//Errors
|
|
_err(code, cpOffset = 0) {
|
|
var _a, _b;
|
|
(_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code, cpOffset));
|
|
}
|
|
// NOTE: `offset` may never run across line boundaries.
|
|
getCurrentLocation(offset) {
|
|
if (!this.options.sourceCodeLocationInfo) {
|
|
return null;
|
|
}
|
|
return {
|
|
startLine: this.preprocessor.line,
|
|
startCol: this.preprocessor.col - offset,
|
|
startOffset: this.preprocessor.offset - offset,
|
|
endLine: -1,
|
|
endCol: -1,
|
|
endOffset: -1,
|
|
};
|
|
}
|
|
_runParsingLoop() {
|
|
if (this.inLoop)
|
|
return;
|
|
this.inLoop = true;
|
|
while (this.active && !this.paused) {
|
|
this.consumedAfterSnapshot = 0;
|
|
const cp = this._consume();
|
|
if (!this._ensureHibernation()) {
|
|
this._callState(cp);
|
|
}
|
|
}
|
|
this.inLoop = false;
|
|
}
|
|
//API
|
|
pause() {
|
|
this.paused = true;
|
|
}
|
|
resume(writeCallback) {
|
|
if (!this.paused) {
|
|
throw new Error('Parser was already resumed');
|
|
}
|
|
this.paused = false;
|
|
// Necessary for synchronous resume.
|
|
if (this.inLoop)
|
|
return;
|
|
this._runParsingLoop();
|
|
if (!this.paused) {
|
|
writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback();
|
|
}
|
|
}
|
|
write(chunk, isLastChunk, writeCallback) {
|
|
this.active = true;
|
|
this.preprocessor.write(chunk, isLastChunk);
|
|
this._runParsingLoop();
|
|
if (!this.paused) {
|
|
writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback();
|
|
}
|
|
}
|
|
insertHtmlAtCurrentPos(chunk) {
|
|
this.active = true;
|
|
this.preprocessor.insertHtmlAtCurrentPos(chunk);
|
|
this._runParsingLoop();
|
|
}
|
|
//Hibernation
|
|
_ensureHibernation() {
|
|
if (this.preprocessor.endOfChunkHit) {
|
|
this.preprocessor.retreat(this.consumedAfterSnapshot);
|
|
this.consumedAfterSnapshot = 0;
|
|
this.active = false;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
//Consumption
|
|
_consume() {
|
|
this.consumedAfterSnapshot++;
|
|
return this.preprocessor.advance();
|
|
}
|
|
_advanceBy(count) {
|
|
this.consumedAfterSnapshot += count;
|
|
for (let i = 0; i < count; i++) {
|
|
this.preprocessor.advance();
|
|
}
|
|
}
|
|
_consumeSequenceIfMatch(pattern, caseSensitive) {
|
|
if (this.preprocessor.startsWith(pattern, caseSensitive)) {
|
|
// We will already have consumed one character before calling this method.
|
|
this._advanceBy(pattern.length - 1);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
//Token creation
|
|
_createStartTagToken() {
|
|
this.currentToken = {
|
|
type: token_js_1.TokenType.START_TAG,
|
|
tagName: '',
|
|
tagID: html_js_1.TAG_ID.UNKNOWN,
|
|
selfClosing: false,
|
|
ackSelfClosing: false,
|
|
attrs: [],
|
|
location: this.getCurrentLocation(1),
|
|
};
|
|
}
|
|
_createEndTagToken() {
|
|
this.currentToken = {
|
|
type: token_js_1.TokenType.END_TAG,
|
|
tagName: '',
|
|
tagID: html_js_1.TAG_ID.UNKNOWN,
|
|
selfClosing: false,
|
|
ackSelfClosing: false,
|
|
attrs: [],
|
|
location: this.getCurrentLocation(2),
|
|
};
|
|
}
|
|
_createCommentToken(offset) {
|
|
this.currentToken = {
|
|
type: token_js_1.TokenType.COMMENT,
|
|
data: '',
|
|
location: this.getCurrentLocation(offset),
|
|
};
|
|
}
|
|
_createDoctypeToken(initialName) {
|
|
this.currentToken = {
|
|
type: token_js_1.TokenType.DOCTYPE,
|
|
name: initialName,
|
|
forceQuirks: false,
|
|
publicId: null,
|
|
systemId: null,
|
|
location: this.currentLocation,
|
|
};
|
|
}
|
|
_createCharacterToken(type, chars) {
|
|
this.currentCharacterToken = {
|
|
type,
|
|
chars,
|
|
location: this.currentLocation,
|
|
};
|
|
}
|
|
//Tag attributes
|
|
_createAttr(attrNameFirstCh) {
|
|
this.currentAttr = {
|
|
name: attrNameFirstCh,
|
|
value: '',
|
|
};
|
|
this.currentLocation = this.getCurrentLocation(0);
|
|
}
|
|
_leaveAttrName() {
|
|
var _a;
|
|
var _b;
|
|
const token = this.currentToken;
|
|
if ((0, token_js_1.getTokenAttr)(token, this.currentAttr.name) === null) {
|
|
token.attrs.push(this.currentAttr);
|
|
if (token.location && this.currentLocation) {
|
|
const attrLocations = ((_a = (_b = token.location).attrs) !== null && _a !== void 0 ? _a : (_b.attrs = Object.create(null)));
|
|
attrLocations[this.currentAttr.name] = this.currentLocation;
|
|
// Set end location
|
|
this._leaveAttrValue();
|
|
}
|
|
}
|
|
else {
|
|
this._err(error_codes_js_1.ERR.duplicateAttribute);
|
|
}
|
|
}
|
|
_leaveAttrValue() {
|
|
if (this.currentLocation) {
|
|
this.currentLocation.endLine = this.preprocessor.line;
|
|
this.currentLocation.endCol = this.preprocessor.col;
|
|
this.currentLocation.endOffset = this.preprocessor.offset;
|
|
}
|
|
}
|
|
//Token emission
|
|
prepareToken(ct) {
|
|
this._emitCurrentCharacterToken(ct.location);
|
|
this.currentToken = null;
|
|
if (ct.location) {
|
|
ct.location.endLine = this.preprocessor.line;
|
|
ct.location.endCol = this.preprocessor.col + 1;
|
|
ct.location.endOffset = this.preprocessor.offset + 1;
|
|
}
|
|
this.currentLocation = this.getCurrentLocation(-1);
|
|
}
|
|
emitCurrentTagToken() {
|
|
const ct = this.currentToken;
|
|
this.prepareToken(ct);
|
|
ct.tagID = (0, html_js_1.getTagID)(ct.tagName);
|
|
if (ct.type === token_js_1.TokenType.START_TAG) {
|
|
this.lastStartTagName = ct.tagName;
|
|
this.handler.onStartTag(ct);
|
|
}
|
|
else {
|
|
if (ct.attrs.length > 0) {
|
|
this._err(error_codes_js_1.ERR.endTagWithAttributes);
|
|
}
|
|
if (ct.selfClosing) {
|
|
this._err(error_codes_js_1.ERR.endTagWithTrailingSolidus);
|
|
}
|
|
this.handler.onEndTag(ct);
|
|
}
|
|
this.preprocessor.dropParsedChunk();
|
|
}
|
|
emitCurrentComment(ct) {
|
|
this.prepareToken(ct);
|
|
this.handler.onComment(ct);
|
|
this.preprocessor.dropParsedChunk();
|
|
}
|
|
emitCurrentDoctype(ct) {
|
|
this.prepareToken(ct);
|
|
this.handler.onDoctype(ct);
|
|
this.preprocessor.dropParsedChunk();
|
|
}
|
|
_emitCurrentCharacterToken(nextLocation) {
|
|
if (this.currentCharacterToken) {
|
|
//NOTE: if we have a pending character token, make it's end location equal to the
|
|
//current token's start location.
|
|
if (nextLocation && this.currentCharacterToken.location) {
|
|
this.currentCharacterToken.location.endLine = nextLocation.startLine;
|
|
this.currentCharacterToken.location.endCol = nextLocation.startCol;
|
|
this.currentCharacterToken.location.endOffset = nextLocation.startOffset;
|
|
}
|
|
switch (this.currentCharacterToken.type) {
|
|
case token_js_1.TokenType.CHARACTER: {
|
|
this.handler.onCharacter(this.currentCharacterToken);
|
|
break;
|
|
}
|
|
case token_js_1.TokenType.NULL_CHARACTER: {
|
|
this.handler.onNullCharacter(this.currentCharacterToken);
|
|
break;
|
|
}
|
|
case token_js_1.TokenType.WHITESPACE_CHARACTER: {
|
|
this.handler.onWhitespaceCharacter(this.currentCharacterToken);
|
|
break;
|
|
}
|
|
}
|
|
this.currentCharacterToken = null;
|
|
}
|
|
}
|
|
_emitEOFToken() {
|
|
const location = this.getCurrentLocation(0);
|
|
if (location) {
|
|
location.endLine = location.startLine;
|
|
location.endCol = location.startCol;
|
|
location.endOffset = location.startOffset;
|
|
}
|
|
this._emitCurrentCharacterToken(location);
|
|
this.handler.onEof({ type: token_js_1.TokenType.EOF, location });
|
|
this.active = false;
|
|
}
|
|
//Characters emission
|
|
//OPTIMIZATION: The specification uses only one type of character token (one token per character).
|
|
//This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters.
|
|
//If we have a sequence of characters that belong to the same group, the parser can process it
|
|
//as a single solid character token.
|
|
//So, there are 3 types of character tokens in parse5:
|
|
//1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000')
|
|
//2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n \r\t \f')
|
|
//3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
|
|
_appendCharToCurrentCharacterToken(type, ch) {
|
|
if (this.currentCharacterToken) {
|
|
if (this.currentCharacterToken.type === type) {
|
|
this.currentCharacterToken.chars += ch;
|
|
return;
|
|
}
|
|
else {
|
|
this.currentLocation = this.getCurrentLocation(0);
|
|
this._emitCurrentCharacterToken(this.currentLocation);
|
|
this.preprocessor.dropParsedChunk();
|
|
}
|
|
}
|
|
this._createCharacterToken(type, ch);
|
|
}
|
|
_emitCodePoint(cp) {
|
|
const type = isWhitespace(cp)
|
|
? token_js_1.TokenType.WHITESPACE_CHARACTER
|
|
: cp === unicode_js_1.CODE_POINTS.NULL
|
|
? token_js_1.TokenType.NULL_CHARACTER
|
|
: token_js_1.TokenType.CHARACTER;
|
|
this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp));
|
|
}
|
|
//NOTE: used when we emit characters explicitly.
|
|
//This is always for non-whitespace and non-null characters, which allows us to avoid additional checks.
|
|
_emitChars(ch) {
|
|
this._appendCharToCurrentCharacterToken(token_js_1.TokenType.CHARACTER, ch);
|
|
}
|
|
// Character reference helpers
|
|
_startCharacterReference() {
|
|
this.returnState = this.state;
|
|
this.state = State.CHARACTER_REFERENCE;
|
|
this.entityStartPos = this.preprocessor.pos;
|
|
this.entityDecoder.startEntity(this._isCharacterReferenceInAttribute() ? decode_1.DecodingMode.Attribute : decode_1.DecodingMode.Legacy);
|
|
}
|
|
_isCharacterReferenceInAttribute() {
|
|
return (this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED ||
|
|
this.returnState === State.ATTRIBUTE_VALUE_SINGLE_QUOTED ||
|
|
this.returnState === State.ATTRIBUTE_VALUE_UNQUOTED);
|
|
}
|
|
_flushCodePointConsumedAsCharacterReference(cp) {
|
|
if (this._isCharacterReferenceInAttribute()) {
|
|
this.currentAttr.value += String.fromCodePoint(cp);
|
|
}
|
|
else {
|
|
this._emitCodePoint(cp);
|
|
}
|
|
}
|
|
// Calling states this way turns out to be much faster than any other approach.
|
|
_callState(cp) {
|
|
switch (this.state) {
|
|
case State.DATA: {
|
|
this._stateData(cp);
|
|
break;
|
|
}
|
|
case State.RCDATA: {
|
|
this._stateRcdata(cp);
|
|
break;
|
|
}
|
|
case State.RAWTEXT: {
|
|
this._stateRawtext(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA: {
|
|
this._stateScriptData(cp);
|
|
break;
|
|
}
|
|
case State.PLAINTEXT: {
|
|
this._statePlaintext(cp);
|
|
break;
|
|
}
|
|
case State.TAG_OPEN: {
|
|
this._stateTagOpen(cp);
|
|
break;
|
|
}
|
|
case State.END_TAG_OPEN: {
|
|
this._stateEndTagOpen(cp);
|
|
break;
|
|
}
|
|
case State.TAG_NAME: {
|
|
this._stateTagName(cp);
|
|
break;
|
|
}
|
|
case State.RCDATA_LESS_THAN_SIGN: {
|
|
this._stateRcdataLessThanSign(cp);
|
|
break;
|
|
}
|
|
case State.RCDATA_END_TAG_OPEN: {
|
|
this._stateRcdataEndTagOpen(cp);
|
|
break;
|
|
}
|
|
case State.RCDATA_END_TAG_NAME: {
|
|
this._stateRcdataEndTagName(cp);
|
|
break;
|
|
}
|
|
case State.RAWTEXT_LESS_THAN_SIGN: {
|
|
this._stateRawtextLessThanSign(cp);
|
|
break;
|
|
}
|
|
case State.RAWTEXT_END_TAG_OPEN: {
|
|
this._stateRawtextEndTagOpen(cp);
|
|
break;
|
|
}
|
|
case State.RAWTEXT_END_TAG_NAME: {
|
|
this._stateRawtextEndTagName(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_LESS_THAN_SIGN: {
|
|
this._stateScriptDataLessThanSign(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_END_TAG_OPEN: {
|
|
this._stateScriptDataEndTagOpen(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_END_TAG_NAME: {
|
|
this._stateScriptDataEndTagName(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_ESCAPE_START: {
|
|
this._stateScriptDataEscapeStart(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_ESCAPE_START_DASH: {
|
|
this._stateScriptDataEscapeStartDash(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_ESCAPED: {
|
|
this._stateScriptDataEscaped(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_ESCAPED_DASH: {
|
|
this._stateScriptDataEscapedDash(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_ESCAPED_DASH_DASH: {
|
|
this._stateScriptDataEscapedDashDash(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: {
|
|
this._stateScriptDataEscapedLessThanSign(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN: {
|
|
this._stateScriptDataEscapedEndTagOpen(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_ESCAPED_END_TAG_NAME: {
|
|
this._stateScriptDataEscapedEndTagName(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_DOUBLE_ESCAPE_START: {
|
|
this._stateScriptDataDoubleEscapeStart(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_DOUBLE_ESCAPED: {
|
|
this._stateScriptDataDoubleEscaped(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH: {
|
|
this._stateScriptDataDoubleEscapedDash(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: {
|
|
this._stateScriptDataDoubleEscapedDashDash(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: {
|
|
this._stateScriptDataDoubleEscapedLessThanSign(cp);
|
|
break;
|
|
}
|
|
case State.SCRIPT_DATA_DOUBLE_ESCAPE_END: {
|
|
this._stateScriptDataDoubleEscapeEnd(cp);
|
|
break;
|
|
}
|
|
case State.BEFORE_ATTRIBUTE_NAME: {
|
|
this._stateBeforeAttributeName(cp);
|
|
break;
|
|
}
|
|
case State.ATTRIBUTE_NAME: {
|
|
this._stateAttributeName(cp);
|
|
break;
|
|
}
|
|
case State.AFTER_ATTRIBUTE_NAME: {
|
|
this._stateAfterAttributeName(cp);
|
|
break;
|
|
}
|
|
case State.BEFORE_ATTRIBUTE_VALUE: {
|
|
this._stateBeforeAttributeValue(cp);
|
|
break;
|
|
}
|
|
case State.ATTRIBUTE_VALUE_DOUBLE_QUOTED: {
|
|
this._stateAttributeValueDoubleQuoted(cp);
|
|
break;
|
|
}
|
|
case State.ATTRIBUTE_VALUE_SINGLE_QUOTED: {
|
|
this._stateAttributeValueSingleQuoted(cp);
|
|
break;
|
|
}
|
|
case State.ATTRIBUTE_VALUE_UNQUOTED: {
|
|
this._stateAttributeValueUnquoted(cp);
|
|
break;
|
|
}
|
|
case State.AFTER_ATTRIBUTE_VALUE_QUOTED: {
|
|
this._stateAfterAttributeValueQuoted(cp);
|
|
break;
|
|
}
|
|
case State.SELF_CLOSING_START_TAG: {
|
|
this._stateSelfClosingStartTag(cp);
|
|
break;
|
|
}
|
|
case State.BOGUS_COMMENT: {
|
|
this._stateBogusComment(cp);
|
|
break;
|
|
}
|
|
case State.MARKUP_DECLARATION_OPEN: {
|
|
this._stateMarkupDeclarationOpen(cp);
|
|
break;
|
|
}
|
|
case State.COMMENT_START: {
|
|
this._stateCommentStart(cp);
|
|
break;
|
|
}
|
|
case State.COMMENT_START_DASH: {
|
|
this._stateCommentStartDash(cp);
|
|
break;
|
|
}
|
|
case State.COMMENT: {
|
|
this._stateComment(cp);
|
|
break;
|
|
}
|
|
case State.COMMENT_LESS_THAN_SIGN: {
|
|
this._stateCommentLessThanSign(cp);
|
|
break;
|
|
}
|
|
case State.COMMENT_LESS_THAN_SIGN_BANG: {
|
|
this._stateCommentLessThanSignBang(cp);
|
|
break;
|
|
}
|
|
case State.COMMENT_LESS_THAN_SIGN_BANG_DASH: {
|
|
this._stateCommentLessThanSignBangDash(cp);
|
|
break;
|
|
}
|
|
case State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: {
|
|
this._stateCommentLessThanSignBangDashDash(cp);
|
|
break;
|
|
}
|
|
case State.COMMENT_END_DASH: {
|
|
this._stateCommentEndDash(cp);
|
|
break;
|
|
}
|
|
case State.COMMENT_END: {
|
|
this._stateCommentEnd(cp);
|
|
break;
|
|
}
|
|
case State.COMMENT_END_BANG: {
|
|
this._stateCommentEndBang(cp);
|
|
break;
|
|
}
|
|
case State.DOCTYPE: {
|
|
this._stateDoctype(cp);
|
|
break;
|
|
}
|
|
case State.BEFORE_DOCTYPE_NAME: {
|
|
this._stateBeforeDoctypeName(cp);
|
|
break;
|
|
}
|
|
case State.DOCTYPE_NAME: {
|
|
this._stateDoctypeName(cp);
|
|
break;
|
|
}
|
|
case State.AFTER_DOCTYPE_NAME: {
|
|
this._stateAfterDoctypeName(cp);
|
|
break;
|
|
}
|
|
case State.AFTER_DOCTYPE_PUBLIC_KEYWORD: {
|
|
this._stateAfterDoctypePublicKeyword(cp);
|
|
break;
|
|
}
|
|
case State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: {
|
|
this._stateBeforeDoctypePublicIdentifier(cp);
|
|
break;
|
|
}
|
|
case State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: {
|
|
this._stateDoctypePublicIdentifierDoubleQuoted(cp);
|
|
break;
|
|
}
|
|
case State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
|
|
this._stateDoctypePublicIdentifierSingleQuoted(cp);
|
|
break;
|
|
}
|
|
case State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER: {
|
|
this._stateAfterDoctypePublicIdentifier(cp);
|
|
break;
|
|
}
|
|
case State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: {
|
|
this._stateBetweenDoctypePublicAndSystemIdentifiers(cp);
|
|
break;
|
|
}
|
|
case State.AFTER_DOCTYPE_SYSTEM_KEYWORD: {
|
|
this._stateAfterDoctypeSystemKeyword(cp);
|
|
break;
|
|
}
|
|
case State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: {
|
|
this._stateBeforeDoctypeSystemIdentifier(cp);
|
|
break;
|
|
}
|
|
case State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: {
|
|
this._stateDoctypeSystemIdentifierDoubleQuoted(cp);
|
|
break;
|
|
}
|
|
case State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
|
|
this._stateDoctypeSystemIdentifierSingleQuoted(cp);
|
|
break;
|
|
}
|
|
case State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
|
|
this._stateAfterDoctypeSystemIdentifier(cp);
|
|
break;
|
|
}
|
|
case State.BOGUS_DOCTYPE: {
|
|
this._stateBogusDoctype(cp);
|
|
break;
|
|
}
|
|
case State.CDATA_SECTION: {
|
|
this._stateCdataSection(cp);
|
|
break;
|
|
}
|
|
case State.CDATA_SECTION_BRACKET: {
|
|
this._stateCdataSectionBracket(cp);
|
|
break;
|
|
}
|
|
case State.CDATA_SECTION_END: {
|
|
this._stateCdataSectionEnd(cp);
|
|
break;
|
|
}
|
|
case State.CHARACTER_REFERENCE: {
|
|
this._stateCharacterReference();
|
|
break;
|
|
}
|
|
case State.AMBIGUOUS_AMPERSAND: {
|
|
this._stateAmbiguousAmpersand(cp);
|
|
break;
|
|
}
|
|
default: {
|
|
throw new Error('Unknown state');
|
|
}
|
|
}
|
|
}
|
|
// State machine
|
|
// Data state
|
|
//------------------------------------------------------------------
|
|
_stateData(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
|
|
this.state = State.TAG_OPEN;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.AMPERSAND: {
|
|
this._startCharacterReference();
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this._emitCodePoint(cp);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._emitCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// RCDATA state
|
|
//------------------------------------------------------------------
|
|
_stateRcdata(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.AMPERSAND: {
|
|
this._startCharacterReference();
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
|
|
this.state = State.RCDATA_LESS_THAN_SIGN;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._emitCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// RAWTEXT state
|
|
//------------------------------------------------------------------
|
|
_stateRawtext(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
|
|
this.state = State.RAWTEXT_LESS_THAN_SIGN;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._emitCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// Script data state
|
|
//------------------------------------------------------------------
|
|
_stateScriptData(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
|
|
this.state = State.SCRIPT_DATA_LESS_THAN_SIGN;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._emitCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// PLAINTEXT state
|
|
//------------------------------------------------------------------
|
|
_statePlaintext(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._emitCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// Tag open state
|
|
//------------------------------------------------------------------
|
|
_stateTagOpen(cp) {
|
|
if (isAsciiLetter(cp)) {
|
|
this._createStartTagToken();
|
|
this.state = State.TAG_NAME;
|
|
this._stateTagName(cp);
|
|
}
|
|
else
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.EXCLAMATION_MARK: {
|
|
this.state = State.MARKUP_DECLARATION_OPEN;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.SOLIDUS: {
|
|
this.state = State.END_TAG_OPEN;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.QUESTION_MARK: {
|
|
this._err(error_codes_js_1.ERR.unexpectedQuestionMarkInsteadOfTagName);
|
|
this._createCommentToken(1);
|
|
this.state = State.BOGUS_COMMENT;
|
|
this._stateBogusComment(cp);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofBeforeTagName);
|
|
this._emitChars('<');
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._err(error_codes_js_1.ERR.invalidFirstCharacterOfTagName);
|
|
this._emitChars('<');
|
|
this.state = State.DATA;
|
|
this._stateData(cp);
|
|
}
|
|
}
|
|
}
|
|
// End tag open state
|
|
//------------------------------------------------------------------
|
|
_stateEndTagOpen(cp) {
|
|
if (isAsciiLetter(cp)) {
|
|
this._createEndTagToken();
|
|
this.state = State.TAG_NAME;
|
|
this._stateTagName(cp);
|
|
}
|
|
else
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.missingEndTagName);
|
|
this.state = State.DATA;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofBeforeTagName);
|
|
this._emitChars('</');
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._err(error_codes_js_1.ERR.invalidFirstCharacterOfTagName);
|
|
this._createCommentToken(2);
|
|
this.state = State.BOGUS_COMMENT;
|
|
this._stateBogusComment(cp);
|
|
}
|
|
}
|
|
}
|
|
// Tag name state
|
|
//------------------------------------------------------------------
|
|
_stateTagName(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
this.state = State.BEFORE_ATTRIBUTE_NAME;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.SOLIDUS: {
|
|
this.state = State.SELF_CLOSING_START_TAG;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this.state = State.DATA;
|
|
this.emitCurrentTagToken();
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
token.tagName += unicode_js_1.REPLACEMENT_CHARACTER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInTag);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
token.tagName += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
|
|
}
|
|
}
|
|
}
|
|
// RCDATA less-than sign state
|
|
//------------------------------------------------------------------
|
|
_stateRcdataLessThanSign(cp) {
|
|
if (cp === unicode_js_1.CODE_POINTS.SOLIDUS) {
|
|
this.state = State.RCDATA_END_TAG_OPEN;
|
|
}
|
|
else {
|
|
this._emitChars('<');
|
|
this.state = State.RCDATA;
|
|
this._stateRcdata(cp);
|
|
}
|
|
}
|
|
// RCDATA end tag open state
|
|
//------------------------------------------------------------------
|
|
_stateRcdataEndTagOpen(cp) {
|
|
if (isAsciiLetter(cp)) {
|
|
this.state = State.RCDATA_END_TAG_NAME;
|
|
this._stateRcdataEndTagName(cp);
|
|
}
|
|
else {
|
|
this._emitChars('</');
|
|
this.state = State.RCDATA;
|
|
this._stateRcdata(cp);
|
|
}
|
|
}
|
|
handleSpecialEndTag(_cp) {
|
|
if (!this.preprocessor.startsWith(this.lastStartTagName, false)) {
|
|
return !this._ensureHibernation();
|
|
}
|
|
this._createEndTagToken();
|
|
const token = this.currentToken;
|
|
token.tagName = this.lastStartTagName;
|
|
const cp = this.preprocessor.peek(this.lastStartTagName.length);
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
this._advanceBy(this.lastStartTagName.length);
|
|
this.state = State.BEFORE_ATTRIBUTE_NAME;
|
|
return false;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.SOLIDUS: {
|
|
this._advanceBy(this.lastStartTagName.length);
|
|
this.state = State.SELF_CLOSING_START_TAG;
|
|
return false;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._advanceBy(this.lastStartTagName.length);
|
|
this.emitCurrentTagToken();
|
|
this.state = State.DATA;
|
|
return false;
|
|
}
|
|
default: {
|
|
return !this._ensureHibernation();
|
|
}
|
|
}
|
|
}
|
|
// RCDATA end tag name state
|
|
//------------------------------------------------------------------
|
|
_stateRcdataEndTagName(cp) {
|
|
if (this.handleSpecialEndTag(cp)) {
|
|
this._emitChars('</');
|
|
this.state = State.RCDATA;
|
|
this._stateRcdata(cp);
|
|
}
|
|
}
|
|
// RAWTEXT less-than sign state
|
|
//------------------------------------------------------------------
|
|
_stateRawtextLessThanSign(cp) {
|
|
if (cp === unicode_js_1.CODE_POINTS.SOLIDUS) {
|
|
this.state = State.RAWTEXT_END_TAG_OPEN;
|
|
}
|
|
else {
|
|
this._emitChars('<');
|
|
this.state = State.RAWTEXT;
|
|
this._stateRawtext(cp);
|
|
}
|
|
}
|
|
// RAWTEXT end tag open state
|
|
//------------------------------------------------------------------
|
|
_stateRawtextEndTagOpen(cp) {
|
|
if (isAsciiLetter(cp)) {
|
|
this.state = State.RAWTEXT_END_TAG_NAME;
|
|
this._stateRawtextEndTagName(cp);
|
|
}
|
|
else {
|
|
this._emitChars('</');
|
|
this.state = State.RAWTEXT;
|
|
this._stateRawtext(cp);
|
|
}
|
|
}
|
|
// RAWTEXT end tag name state
|
|
//------------------------------------------------------------------
|
|
_stateRawtextEndTagName(cp) {
|
|
if (this.handleSpecialEndTag(cp)) {
|
|
this._emitChars('</');
|
|
this.state = State.RAWTEXT;
|
|
this._stateRawtext(cp);
|
|
}
|
|
}
|
|
// Script data less-than sign state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataLessThanSign(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SOLIDUS: {
|
|
this.state = State.SCRIPT_DATA_END_TAG_OPEN;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EXCLAMATION_MARK: {
|
|
this.state = State.SCRIPT_DATA_ESCAPE_START;
|
|
this._emitChars('<!');
|
|
break;
|
|
}
|
|
default: {
|
|
this._emitChars('<');
|
|
this.state = State.SCRIPT_DATA;
|
|
this._stateScriptData(cp);
|
|
}
|
|
}
|
|
}
|
|
// Script data end tag open state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataEndTagOpen(cp) {
|
|
if (isAsciiLetter(cp)) {
|
|
this.state = State.SCRIPT_DATA_END_TAG_NAME;
|
|
this._stateScriptDataEndTagName(cp);
|
|
}
|
|
else {
|
|
this._emitChars('</');
|
|
this.state = State.SCRIPT_DATA;
|
|
this._stateScriptData(cp);
|
|
}
|
|
}
|
|
// Script data end tag name state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataEndTagName(cp) {
|
|
if (this.handleSpecialEndTag(cp)) {
|
|
this._emitChars('</');
|
|
this.state = State.SCRIPT_DATA;
|
|
this._stateScriptData(cp);
|
|
}
|
|
}
|
|
// Script data escape start state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataEscapeStart(cp) {
|
|
if (cp === unicode_js_1.CODE_POINTS.HYPHEN_MINUS) {
|
|
this.state = State.SCRIPT_DATA_ESCAPE_START_DASH;
|
|
this._emitChars('-');
|
|
}
|
|
else {
|
|
this.state = State.SCRIPT_DATA;
|
|
this._stateScriptData(cp);
|
|
}
|
|
}
|
|
// Script data escape start dash state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataEscapeStartDash(cp) {
|
|
if (cp === unicode_js_1.CODE_POINTS.HYPHEN_MINUS) {
|
|
this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
|
|
this._emitChars('-');
|
|
}
|
|
else {
|
|
this.state = State.SCRIPT_DATA;
|
|
this._stateScriptData(cp);
|
|
}
|
|
}
|
|
// Script data escaped state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataEscaped(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
|
|
this.state = State.SCRIPT_DATA_ESCAPED_DASH;
|
|
this._emitChars('-');
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
|
|
this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInScriptHtmlCommentLikeText);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._emitCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// Script data escaped dash state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataEscapedDash(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
|
|
this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
|
|
this._emitChars('-');
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
|
|
this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
|
this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInScriptHtmlCommentLikeText);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
|
this._emitCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// Script data escaped dash dash state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataEscapedDashDash(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
|
|
this._emitChars('-');
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
|
|
this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this.state = State.SCRIPT_DATA;
|
|
this._emitChars('>');
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
|
this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInScriptHtmlCommentLikeText);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
|
this._emitCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// Script data escaped less-than sign state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataEscapedLessThanSign(cp) {
|
|
if (cp === unicode_js_1.CODE_POINTS.SOLIDUS) {
|
|
this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN;
|
|
}
|
|
else if (isAsciiLetter(cp)) {
|
|
this._emitChars('<');
|
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_START;
|
|
this._stateScriptDataDoubleEscapeStart(cp);
|
|
}
|
|
else {
|
|
this._emitChars('<');
|
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
|
this._stateScriptDataEscaped(cp);
|
|
}
|
|
}
|
|
// Script data escaped end tag open state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataEscapedEndTagOpen(cp) {
|
|
if (isAsciiLetter(cp)) {
|
|
this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_NAME;
|
|
this._stateScriptDataEscapedEndTagName(cp);
|
|
}
|
|
else {
|
|
this._emitChars('</');
|
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
|
this._stateScriptDataEscaped(cp);
|
|
}
|
|
}
|
|
// Script data escaped end tag name state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataEscapedEndTagName(cp) {
|
|
if (this.handleSpecialEndTag(cp)) {
|
|
this._emitChars('</');
|
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
|
this._stateScriptDataEscaped(cp);
|
|
}
|
|
}
|
|
// Script data double escape start state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataDoubleEscapeStart(cp) {
|
|
if (this.preprocessor.startsWith(unicode_js_1.SEQUENCES.SCRIPT, false) &&
|
|
isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek(unicode_js_1.SEQUENCES.SCRIPT.length))) {
|
|
this._emitCodePoint(cp);
|
|
for (let i = 0; i < unicode_js_1.SEQUENCES.SCRIPT.length; i++) {
|
|
this._emitCodePoint(this._consume());
|
|
}
|
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
}
|
|
else if (!this._ensureHibernation()) {
|
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
|
this._stateScriptDataEscaped(cp);
|
|
}
|
|
}
|
|
// Script data double escaped state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataDoubleEscaped(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
|
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
|
|
this._emitChars('-');
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
|
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
|
|
this._emitChars('<');
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInScriptHtmlCommentLikeText);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._emitCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// Script data double escaped dash state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataDoubleEscapedDash(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
|
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
|
|
this._emitChars('-');
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
|
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
|
|
this._emitChars('<');
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInScriptHtmlCommentLikeText);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
this._emitCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// Script data double escaped dash dash state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataDoubleEscapedDashDash(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
|
|
this._emitChars('-');
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
|
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
|
|
this._emitChars('<');
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this.state = State.SCRIPT_DATA;
|
|
this._emitChars('>');
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInScriptHtmlCommentLikeText);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
this._emitCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// Script data double escaped less-than sign state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataDoubleEscapedLessThanSign(cp) {
|
|
if (cp === unicode_js_1.CODE_POINTS.SOLIDUS) {
|
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_END;
|
|
this._emitChars('/');
|
|
}
|
|
else {
|
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
this._stateScriptDataDoubleEscaped(cp);
|
|
}
|
|
}
|
|
// Script data double escape end state
|
|
//------------------------------------------------------------------
|
|
_stateScriptDataDoubleEscapeEnd(cp) {
|
|
if (this.preprocessor.startsWith(unicode_js_1.SEQUENCES.SCRIPT, false) &&
|
|
isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek(unicode_js_1.SEQUENCES.SCRIPT.length))) {
|
|
this._emitCodePoint(cp);
|
|
for (let i = 0; i < unicode_js_1.SEQUENCES.SCRIPT.length; i++) {
|
|
this._emitCodePoint(this._consume());
|
|
}
|
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
|
}
|
|
else if (!this._ensureHibernation()) {
|
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
this._stateScriptDataDoubleEscaped(cp);
|
|
}
|
|
}
|
|
// Before attribute name state
|
|
//------------------------------------------------------------------
|
|
_stateBeforeAttributeName(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
// Ignore whitespace
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.SOLIDUS:
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN:
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this.state = State.AFTER_ATTRIBUTE_NAME;
|
|
this._stateAfterAttributeName(cp);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EQUALS_SIGN: {
|
|
this._err(error_codes_js_1.ERR.unexpectedEqualsSignBeforeAttributeName);
|
|
this._createAttr('=');
|
|
this.state = State.ATTRIBUTE_NAME;
|
|
break;
|
|
}
|
|
default: {
|
|
this._createAttr('');
|
|
this.state = State.ATTRIBUTE_NAME;
|
|
this._stateAttributeName(cp);
|
|
}
|
|
}
|
|
}
|
|
// Attribute name state
|
|
//------------------------------------------------------------------
|
|
_stateAttributeName(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED:
|
|
case unicode_js_1.CODE_POINTS.SOLIDUS:
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN:
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._leaveAttrName();
|
|
this.state = State.AFTER_ATTRIBUTE_NAME;
|
|
this._stateAfterAttributeName(cp);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EQUALS_SIGN: {
|
|
this._leaveAttrName();
|
|
this.state = State.BEFORE_ATTRIBUTE_VALUE;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.QUOTATION_MARK:
|
|
case unicode_js_1.CODE_POINTS.APOSTROPHE:
|
|
case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.unexpectedCharacterInAttributeName);
|
|
this.currentAttr.name += String.fromCodePoint(cp);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this.currentAttr.name += unicode_js_1.REPLACEMENT_CHARACTER;
|
|
break;
|
|
}
|
|
default: {
|
|
this.currentAttr.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
|
|
}
|
|
}
|
|
}
|
|
// After attribute name state
|
|
//------------------------------------------------------------------
|
|
_stateAfterAttributeName(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
// Ignore whitespace
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.SOLIDUS: {
|
|
this.state = State.SELF_CLOSING_START_TAG;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EQUALS_SIGN: {
|
|
this.state = State.BEFORE_ATTRIBUTE_VALUE;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this.state = State.DATA;
|
|
this.emitCurrentTagToken();
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInTag);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._createAttr('');
|
|
this.state = State.ATTRIBUTE_NAME;
|
|
this._stateAttributeName(cp);
|
|
}
|
|
}
|
|
}
|
|
// Before attribute value state
|
|
//------------------------------------------------------------------
|
|
_stateBeforeAttributeValue(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
// Ignore whitespace
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
|
|
this.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.APOSTROPHE: {
|
|
this.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.missingAttributeValue);
|
|
this.state = State.DATA;
|
|
this.emitCurrentTagToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this.state = State.ATTRIBUTE_VALUE_UNQUOTED;
|
|
this._stateAttributeValueUnquoted(cp);
|
|
}
|
|
}
|
|
}
|
|
// Attribute value (double-quoted) state
|
|
//------------------------------------------------------------------
|
|
_stateAttributeValueDoubleQuoted(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
|
|
this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.AMPERSAND: {
|
|
this._startCharacterReference();
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this.currentAttr.value += unicode_js_1.REPLACEMENT_CHARACTER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInTag);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this.currentAttr.value += String.fromCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// Attribute value (single-quoted) state
|
|
//------------------------------------------------------------------
|
|
_stateAttributeValueSingleQuoted(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.APOSTROPHE: {
|
|
this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.AMPERSAND: {
|
|
this._startCharacterReference();
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this.currentAttr.value += unicode_js_1.REPLACEMENT_CHARACTER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInTag);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this.currentAttr.value += String.fromCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// Attribute value (unquoted) state
|
|
//------------------------------------------------------------------
|
|
_stateAttributeValueUnquoted(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
this._leaveAttrValue();
|
|
this.state = State.BEFORE_ATTRIBUTE_NAME;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.AMPERSAND: {
|
|
this._startCharacterReference();
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._leaveAttrValue();
|
|
this.state = State.DATA;
|
|
this.emitCurrentTagToken();
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this.currentAttr.value += unicode_js_1.REPLACEMENT_CHARACTER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.QUOTATION_MARK:
|
|
case unicode_js_1.CODE_POINTS.APOSTROPHE:
|
|
case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN:
|
|
case unicode_js_1.CODE_POINTS.EQUALS_SIGN:
|
|
case unicode_js_1.CODE_POINTS.GRAVE_ACCENT: {
|
|
this._err(error_codes_js_1.ERR.unexpectedCharacterInUnquotedAttributeValue);
|
|
this.currentAttr.value += String.fromCodePoint(cp);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInTag);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this.currentAttr.value += String.fromCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// After attribute value (quoted) state
|
|
//------------------------------------------------------------------
|
|
_stateAfterAttributeValueQuoted(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
this._leaveAttrValue();
|
|
this.state = State.BEFORE_ATTRIBUTE_NAME;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.SOLIDUS: {
|
|
this._leaveAttrValue();
|
|
this.state = State.SELF_CLOSING_START_TAG;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._leaveAttrValue();
|
|
this.state = State.DATA;
|
|
this.emitCurrentTagToken();
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInTag);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._err(error_codes_js_1.ERR.missingWhitespaceBetweenAttributes);
|
|
this.state = State.BEFORE_ATTRIBUTE_NAME;
|
|
this._stateBeforeAttributeName(cp);
|
|
}
|
|
}
|
|
}
|
|
// Self-closing start tag state
|
|
//------------------------------------------------------------------
|
|
_stateSelfClosingStartTag(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
const token = this.currentToken;
|
|
token.selfClosing = true;
|
|
this.state = State.DATA;
|
|
this.emitCurrentTagToken();
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInTag);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._err(error_codes_js_1.ERR.unexpectedSolidusInTag);
|
|
this.state = State.BEFORE_ATTRIBUTE_NAME;
|
|
this._stateBeforeAttributeName(cp);
|
|
}
|
|
}
|
|
}
|
|
// Bogus comment state
|
|
//------------------------------------------------------------------
|
|
_stateBogusComment(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this.state = State.DATA;
|
|
this.emitCurrentComment(token);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this.emitCurrentComment(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
token.data += unicode_js_1.REPLACEMENT_CHARACTER;
|
|
break;
|
|
}
|
|
default: {
|
|
token.data += String.fromCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// Markup declaration open state
|
|
//------------------------------------------------------------------
|
|
_stateMarkupDeclarationOpen(cp) {
|
|
if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.DASH_DASH, true)) {
|
|
this._createCommentToken(unicode_js_1.SEQUENCES.DASH_DASH.length + 1);
|
|
this.state = State.COMMENT_START;
|
|
}
|
|
else if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.DOCTYPE, false)) {
|
|
// NOTE: Doctypes tokens are created without fixed offsets. We keep track of the moment a doctype *might* start here.
|
|
this.currentLocation = this.getCurrentLocation(unicode_js_1.SEQUENCES.DOCTYPE.length + 1);
|
|
this.state = State.DOCTYPE;
|
|
}
|
|
else if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.CDATA_START, true)) {
|
|
if (this.inForeignNode) {
|
|
this.state = State.CDATA_SECTION;
|
|
}
|
|
else {
|
|
this._err(error_codes_js_1.ERR.cdataInHtmlContent);
|
|
this._createCommentToken(unicode_js_1.SEQUENCES.CDATA_START.length + 1);
|
|
this.currentToken.data = '[CDATA[';
|
|
this.state = State.BOGUS_COMMENT;
|
|
}
|
|
}
|
|
//NOTE: Sequence lookups can be abrupted by hibernation. In that case, lookup
|
|
//results are no longer valid and we will need to start over.
|
|
else if (!this._ensureHibernation()) {
|
|
this._err(error_codes_js_1.ERR.incorrectlyOpenedComment);
|
|
this._createCommentToken(2);
|
|
this.state = State.BOGUS_COMMENT;
|
|
this._stateBogusComment(cp);
|
|
}
|
|
}
|
|
// Comment start state
|
|
//------------------------------------------------------------------
|
|
_stateCommentStart(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
|
|
this.state = State.COMMENT_START_DASH;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.abruptClosingOfEmptyComment);
|
|
this.state = State.DATA;
|
|
const token = this.currentToken;
|
|
this.emitCurrentComment(token);
|
|
break;
|
|
}
|
|
default: {
|
|
this.state = State.COMMENT;
|
|
this._stateComment(cp);
|
|
}
|
|
}
|
|
}
|
|
// Comment start dash state
|
|
//------------------------------------------------------------------
|
|
_stateCommentStartDash(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
|
|
this.state = State.COMMENT_END;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.abruptClosingOfEmptyComment);
|
|
this.state = State.DATA;
|
|
this.emitCurrentComment(token);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInComment);
|
|
this.emitCurrentComment(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
token.data += '-';
|
|
this.state = State.COMMENT;
|
|
this._stateComment(cp);
|
|
}
|
|
}
|
|
}
|
|
// Comment state
|
|
//------------------------------------------------------------------
|
|
_stateComment(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
|
|
this.state = State.COMMENT_END_DASH;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
|
|
token.data += '<';
|
|
this.state = State.COMMENT_LESS_THAN_SIGN;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
token.data += unicode_js_1.REPLACEMENT_CHARACTER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInComment);
|
|
this.emitCurrentComment(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
token.data += String.fromCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// Comment less-than sign state
|
|
//------------------------------------------------------------------
|
|
_stateCommentLessThanSign(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.EXCLAMATION_MARK: {
|
|
token.data += '!';
|
|
this.state = State.COMMENT_LESS_THAN_SIGN_BANG;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: {
|
|
token.data += '<';
|
|
break;
|
|
}
|
|
default: {
|
|
this.state = State.COMMENT;
|
|
this._stateComment(cp);
|
|
}
|
|
}
|
|
}
|
|
// Comment less-than sign bang state
|
|
//------------------------------------------------------------------
|
|
_stateCommentLessThanSignBang(cp) {
|
|
if (cp === unicode_js_1.CODE_POINTS.HYPHEN_MINUS) {
|
|
this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH;
|
|
}
|
|
else {
|
|
this.state = State.COMMENT;
|
|
this._stateComment(cp);
|
|
}
|
|
}
|
|
// Comment less-than sign bang dash state
|
|
//------------------------------------------------------------------
|
|
_stateCommentLessThanSignBangDash(cp) {
|
|
if (cp === unicode_js_1.CODE_POINTS.HYPHEN_MINUS) {
|
|
this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH;
|
|
}
|
|
else {
|
|
this.state = State.COMMENT_END_DASH;
|
|
this._stateCommentEndDash(cp);
|
|
}
|
|
}
|
|
// Comment less-than sign bang dash dash state
|
|
//------------------------------------------------------------------
|
|
_stateCommentLessThanSignBangDashDash(cp) {
|
|
if (cp !== unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN && cp !== unicode_js_1.CODE_POINTS.EOF) {
|
|
this._err(error_codes_js_1.ERR.nestedComment);
|
|
}
|
|
this.state = State.COMMENT_END;
|
|
this._stateCommentEnd(cp);
|
|
}
|
|
// Comment end dash state
|
|
//------------------------------------------------------------------
|
|
_stateCommentEndDash(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
|
|
this.state = State.COMMENT_END;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInComment);
|
|
this.emitCurrentComment(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
token.data += '-';
|
|
this.state = State.COMMENT;
|
|
this._stateComment(cp);
|
|
}
|
|
}
|
|
}
|
|
// Comment end state
|
|
//------------------------------------------------------------------
|
|
_stateCommentEnd(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this.state = State.DATA;
|
|
this.emitCurrentComment(token);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EXCLAMATION_MARK: {
|
|
this.state = State.COMMENT_END_BANG;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
|
|
token.data += '-';
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInComment);
|
|
this.emitCurrentComment(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
token.data += '--';
|
|
this.state = State.COMMENT;
|
|
this._stateComment(cp);
|
|
}
|
|
}
|
|
}
|
|
// Comment end bang state
|
|
//------------------------------------------------------------------
|
|
_stateCommentEndBang(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: {
|
|
token.data += '--!';
|
|
this.state = State.COMMENT_END_DASH;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.incorrectlyClosedComment);
|
|
this.state = State.DATA;
|
|
this.emitCurrentComment(token);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInComment);
|
|
this.emitCurrentComment(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
token.data += '--!';
|
|
this.state = State.COMMENT;
|
|
this._stateComment(cp);
|
|
}
|
|
}
|
|
}
|
|
// DOCTYPE state
|
|
//------------------------------------------------------------------
|
|
_stateDoctype(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
this.state = State.BEFORE_DOCTYPE_NAME;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this.state = State.BEFORE_DOCTYPE_NAME;
|
|
this._stateBeforeDoctypeName(cp);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
this._createDoctypeToken(null);
|
|
const token = this.currentToken;
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._err(error_codes_js_1.ERR.missingWhitespaceBeforeDoctypeName);
|
|
this.state = State.BEFORE_DOCTYPE_NAME;
|
|
this._stateBeforeDoctypeName(cp);
|
|
}
|
|
}
|
|
}
|
|
// Before DOCTYPE name state
|
|
//------------------------------------------------------------------
|
|
_stateBeforeDoctypeName(cp) {
|
|
if (isAsciiUpper(cp)) {
|
|
this._createDoctypeToken(String.fromCharCode(toAsciiLower(cp)));
|
|
this.state = State.DOCTYPE_NAME;
|
|
}
|
|
else
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
// Ignore whitespace
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
this._createDoctypeToken(unicode_js_1.REPLACEMENT_CHARACTER);
|
|
this.state = State.DOCTYPE_NAME;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.missingDoctypeName);
|
|
this._createDoctypeToken(null);
|
|
const token = this.currentToken;
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this.state = State.DATA;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
this._createDoctypeToken(null);
|
|
const token = this.currentToken;
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._createDoctypeToken(String.fromCodePoint(cp));
|
|
this.state = State.DOCTYPE_NAME;
|
|
}
|
|
}
|
|
}
|
|
// DOCTYPE name state
|
|
//------------------------------------------------------------------
|
|
_stateDoctypeName(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
this.state = State.AFTER_DOCTYPE_NAME;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this.state = State.DATA;
|
|
this.emitCurrentDoctype(token);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
token.name += unicode_js_1.REPLACEMENT_CHARACTER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
token.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
|
|
}
|
|
}
|
|
}
|
|
// After DOCTYPE name state
|
|
//------------------------------------------------------------------
|
|
_stateAfterDoctypeName(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
// Ignore whitespace
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this.state = State.DATA;
|
|
this.emitCurrentDoctype(token);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.PUBLIC, false)) {
|
|
this.state = State.AFTER_DOCTYPE_PUBLIC_KEYWORD;
|
|
}
|
|
else if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.SYSTEM, false)) {
|
|
this.state = State.AFTER_DOCTYPE_SYSTEM_KEYWORD;
|
|
}
|
|
//NOTE: sequence lookup can be abrupted by hibernation. In that case lookup
|
|
//results are no longer valid and we will need to start over.
|
|
else if (!this._ensureHibernation()) {
|
|
this._err(error_codes_js_1.ERR.invalidCharacterSequenceAfterDoctypeName);
|
|
token.forceQuirks = true;
|
|
this.state = State.BOGUS_DOCTYPE;
|
|
this._stateBogusDoctype(cp);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// After DOCTYPE public keyword state
|
|
//------------------------------------------------------------------
|
|
_stateAfterDoctypePublicKeyword(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
this.state = State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
|
|
this._err(error_codes_js_1.ERR.missingWhitespaceAfterDoctypePublicKeyword);
|
|
token.publicId = '';
|
|
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.APOSTROPHE: {
|
|
this._err(error_codes_js_1.ERR.missingWhitespaceAfterDoctypePublicKeyword);
|
|
token.publicId = '';
|
|
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.missingDoctypePublicIdentifier);
|
|
token.forceQuirks = true;
|
|
this.state = State.DATA;
|
|
this.emitCurrentDoctype(token);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypePublicIdentifier);
|
|
token.forceQuirks = true;
|
|
this.state = State.BOGUS_DOCTYPE;
|
|
this._stateBogusDoctype(cp);
|
|
}
|
|
}
|
|
}
|
|
// Before DOCTYPE public identifier state
|
|
//------------------------------------------------------------------
|
|
_stateBeforeDoctypePublicIdentifier(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
// Ignore whitespace
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
|
|
token.publicId = '';
|
|
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.APOSTROPHE: {
|
|
token.publicId = '';
|
|
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.missingDoctypePublicIdentifier);
|
|
token.forceQuirks = true;
|
|
this.state = State.DATA;
|
|
this.emitCurrentDoctype(token);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypePublicIdentifier);
|
|
token.forceQuirks = true;
|
|
this.state = State.BOGUS_DOCTYPE;
|
|
this._stateBogusDoctype(cp);
|
|
}
|
|
}
|
|
}
|
|
// DOCTYPE public identifier (double-quoted) state
|
|
//------------------------------------------------------------------
|
|
_stateDoctypePublicIdentifierDoubleQuoted(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
|
|
this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
token.publicId += unicode_js_1.REPLACEMENT_CHARACTER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.abruptDoctypePublicIdentifier);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this.state = State.DATA;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
token.publicId += String.fromCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// DOCTYPE public identifier (single-quoted) state
|
|
//------------------------------------------------------------------
|
|
_stateDoctypePublicIdentifierSingleQuoted(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.APOSTROPHE: {
|
|
this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
token.publicId += unicode_js_1.REPLACEMENT_CHARACTER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.abruptDoctypePublicIdentifier);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this.state = State.DATA;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
token.publicId += String.fromCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// After DOCTYPE public identifier state
|
|
//------------------------------------------------------------------
|
|
_stateAfterDoctypePublicIdentifier(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
this.state = State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this.state = State.DATA;
|
|
this.emitCurrentDoctype(token);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
|
|
this._err(error_codes_js_1.ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
|
|
token.systemId = '';
|
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.APOSTROPHE: {
|
|
this._err(error_codes_js_1.ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
|
|
token.systemId = '';
|
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypeSystemIdentifier);
|
|
token.forceQuirks = true;
|
|
this.state = State.BOGUS_DOCTYPE;
|
|
this._stateBogusDoctype(cp);
|
|
}
|
|
}
|
|
}
|
|
// Between DOCTYPE public and system identifiers state
|
|
//------------------------------------------------------------------
|
|
_stateBetweenDoctypePublicAndSystemIdentifiers(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
// Ignore whitespace
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this.emitCurrentDoctype(token);
|
|
this.state = State.DATA;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
|
|
token.systemId = '';
|
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.APOSTROPHE: {
|
|
token.systemId = '';
|
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypeSystemIdentifier);
|
|
token.forceQuirks = true;
|
|
this.state = State.BOGUS_DOCTYPE;
|
|
this._stateBogusDoctype(cp);
|
|
}
|
|
}
|
|
}
|
|
// After DOCTYPE system keyword state
|
|
//------------------------------------------------------------------
|
|
_stateAfterDoctypeSystemKeyword(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
this.state = State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
|
|
this._err(error_codes_js_1.ERR.missingWhitespaceAfterDoctypeSystemKeyword);
|
|
token.systemId = '';
|
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.APOSTROPHE: {
|
|
this._err(error_codes_js_1.ERR.missingWhitespaceAfterDoctypeSystemKeyword);
|
|
token.systemId = '';
|
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.missingDoctypeSystemIdentifier);
|
|
token.forceQuirks = true;
|
|
this.state = State.DATA;
|
|
this.emitCurrentDoctype(token);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypeSystemIdentifier);
|
|
token.forceQuirks = true;
|
|
this.state = State.BOGUS_DOCTYPE;
|
|
this._stateBogusDoctype(cp);
|
|
}
|
|
}
|
|
}
|
|
// Before DOCTYPE system identifier state
|
|
//------------------------------------------------------------------
|
|
_stateBeforeDoctypeSystemIdentifier(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
// Ignore whitespace
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
|
|
token.systemId = '';
|
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.APOSTROPHE: {
|
|
token.systemId = '';
|
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.missingDoctypeSystemIdentifier);
|
|
token.forceQuirks = true;
|
|
this.state = State.DATA;
|
|
this.emitCurrentDoctype(token);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypeSystemIdentifier);
|
|
token.forceQuirks = true;
|
|
this.state = State.BOGUS_DOCTYPE;
|
|
this._stateBogusDoctype(cp);
|
|
}
|
|
}
|
|
}
|
|
// DOCTYPE system identifier (double-quoted) state
|
|
//------------------------------------------------------------------
|
|
_stateDoctypeSystemIdentifierDoubleQuoted(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.QUOTATION_MARK: {
|
|
this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
token.systemId += unicode_js_1.REPLACEMENT_CHARACTER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.abruptDoctypeSystemIdentifier);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this.state = State.DATA;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
token.systemId += String.fromCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// DOCTYPE system identifier (single-quoted) state
|
|
//------------------------------------------------------------------
|
|
_stateDoctypeSystemIdentifierSingleQuoted(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.APOSTROPHE: {
|
|
this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
token.systemId += unicode_js_1.REPLACEMENT_CHARACTER;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this._err(error_codes_js_1.ERR.abruptDoctypeSystemIdentifier);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this.state = State.DATA;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
token.systemId += String.fromCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// After DOCTYPE system identifier state
|
|
//------------------------------------------------------------------
|
|
_stateAfterDoctypeSystemIdentifier(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.SPACE:
|
|
case unicode_js_1.CODE_POINTS.LINE_FEED:
|
|
case unicode_js_1.CODE_POINTS.TABULATION:
|
|
case unicode_js_1.CODE_POINTS.FORM_FEED: {
|
|
// Ignore whitespace
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this.emitCurrentDoctype(token);
|
|
this.state = State.DATA;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInDoctype);
|
|
token.forceQuirks = true;
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._err(error_codes_js_1.ERR.unexpectedCharacterAfterDoctypeSystemIdentifier);
|
|
this.state = State.BOGUS_DOCTYPE;
|
|
this._stateBogusDoctype(cp);
|
|
}
|
|
}
|
|
}
|
|
// Bogus DOCTYPE state
|
|
//------------------------------------------------------------------
|
|
_stateBogusDoctype(cp) {
|
|
const token = this.currentToken;
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this.emitCurrentDoctype(token);
|
|
this.state = State.DATA;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.NULL: {
|
|
this._err(error_codes_js_1.ERR.unexpectedNullCharacter);
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this.emitCurrentDoctype(token);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default:
|
|
// Do nothing
|
|
}
|
|
}
|
|
// CDATA section state
|
|
//------------------------------------------------------------------
|
|
_stateCdataSection(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.RIGHT_SQUARE_BRACKET: {
|
|
this.state = State.CDATA_SECTION_BRACKET;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.EOF: {
|
|
this._err(error_codes_js_1.ERR.eofInCdata);
|
|
this._emitEOFToken();
|
|
break;
|
|
}
|
|
default: {
|
|
this._emitCodePoint(cp);
|
|
}
|
|
}
|
|
}
|
|
// CDATA section bracket state
|
|
//------------------------------------------------------------------
|
|
_stateCdataSectionBracket(cp) {
|
|
if (cp === unicode_js_1.CODE_POINTS.RIGHT_SQUARE_BRACKET) {
|
|
this.state = State.CDATA_SECTION_END;
|
|
}
|
|
else {
|
|
this._emitChars(']');
|
|
this.state = State.CDATA_SECTION;
|
|
this._stateCdataSection(cp);
|
|
}
|
|
}
|
|
// CDATA section end state
|
|
//------------------------------------------------------------------
|
|
_stateCdataSectionEnd(cp) {
|
|
switch (cp) {
|
|
case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: {
|
|
this.state = State.DATA;
|
|
break;
|
|
}
|
|
case unicode_js_1.CODE_POINTS.RIGHT_SQUARE_BRACKET: {
|
|
this._emitChars(']');
|
|
break;
|
|
}
|
|
default: {
|
|
this._emitChars(']]');
|
|
this.state = State.CDATA_SECTION;
|
|
this._stateCdataSection(cp);
|
|
}
|
|
}
|
|
}
|
|
// Character reference state
|
|
//------------------------------------------------------------------
|
|
_stateCharacterReference() {
|
|
let length = this.entityDecoder.write(this.preprocessor.html, this.preprocessor.pos);
|
|
if (length < 0) {
|
|
if (this.preprocessor.lastChunkWritten) {
|
|
length = this.entityDecoder.end();
|
|
}
|
|
else {
|
|
// Wait for the rest of the entity.
|
|
this.active = false;
|
|
// Mark the entire buffer as read.
|
|
this.preprocessor.pos = this.preprocessor.html.length - 1;
|
|
this.consumedAfterSnapshot = 0;
|
|
this.preprocessor.endOfChunkHit = true;
|
|
return;
|
|
}
|
|
}
|
|
if (length === 0) {
|
|
// This was not a valid entity. Go back to the beginning, and
|
|
// figure out what to do.
|
|
this.preprocessor.pos = this.entityStartPos;
|
|
this._flushCodePointConsumedAsCharacterReference(unicode_js_1.CODE_POINTS.AMPERSAND);
|
|
this.state =
|
|
!this._isCharacterReferenceInAttribute() && isAsciiAlphaNumeric(this.preprocessor.peek(1))
|
|
? State.AMBIGUOUS_AMPERSAND
|
|
: this.returnState;
|
|
}
|
|
else {
|
|
// We successfully parsed an entity. Switch to the return state.
|
|
this.state = this.returnState;
|
|
}
|
|
}
|
|
// Ambiguos ampersand state
|
|
//------------------------------------------------------------------
|
|
_stateAmbiguousAmpersand(cp) {
|
|
if (isAsciiAlphaNumeric(cp)) {
|
|
this._flushCodePointConsumedAsCharacterReference(cp);
|
|
}
|
|
else {
|
|
if (cp === unicode_js_1.CODE_POINTS.SEMICOLON) {
|
|
this._err(error_codes_js_1.ERR.unknownNamedCharacterReference);
|
|
}
|
|
this.state = this.returnState;
|
|
this._callState(cp);
|
|
}
|
|
}
|
|
}
|
|
exports.Tokenizer = Tokenizer;
|