Bhargava 6063bd1724 Help Project:
1. Initial Commit - a boiler plate code and POC to realize the concept of context
sensitive help
2. Frontend code written in ReactJS
3. Backend code written in Java, Spring Boot Framework
4. Frontend Start:
        pre-requisites : node, npm
	npm run dev  ==> to start the frontend vite server
5. Backend Start:
	pre-requisites : java, mvn
        mvn spring-boot:run  ==> to start the backend server
6. Visit http://localhost:5173/ for basic demo of help, press F1 in textboxes
7. Visit http://localhost:5173/editor and enter "admin123" to add/modify texts.

Happy Coding !!!

Thank you,
Bhargava.
2025-07-04 15:54:13 +05:30

2711 lines
96 KiB
JavaScript

import { Preprocessor } from './preprocessor.js';
import { CODE_POINTS as $, SEQUENCES as $$, REPLACEMENT_CHARACTER, isSurrogate, isUndefinedCodePoint, isControlCodePoint, } from '../common/unicode.js';
import { TokenType, getTokenAttr, } from '../common/token.js';
import { htmlDecodeTree, EntityDecoder, DecodingMode } from 'entities/decode';
import { ERR } from '../common/error-codes.js';
import { TAG_ID, getTagID } from '../common/html.js';
//States
var State;
(function (State) {
State[State["DATA"] = 0] = "DATA";
State[State["RCDATA"] = 1] = "RCDATA";
State[State["RAWTEXT"] = 2] = "RAWTEXT";
State[State["SCRIPT_DATA"] = 3] = "SCRIPT_DATA";
State[State["PLAINTEXT"] = 4] = "PLAINTEXT";
State[State["TAG_OPEN"] = 5] = "TAG_OPEN";
State[State["END_TAG_OPEN"] = 6] = "END_TAG_OPEN";
State[State["TAG_NAME"] = 7] = "TAG_NAME";
State[State["RCDATA_LESS_THAN_SIGN"] = 8] = "RCDATA_LESS_THAN_SIGN";
State[State["RCDATA_END_TAG_OPEN"] = 9] = "RCDATA_END_TAG_OPEN";
State[State["RCDATA_END_TAG_NAME"] = 10] = "RCDATA_END_TAG_NAME";
State[State["RAWTEXT_LESS_THAN_SIGN"] = 11] = "RAWTEXT_LESS_THAN_SIGN";
State[State["RAWTEXT_END_TAG_OPEN"] = 12] = "RAWTEXT_END_TAG_OPEN";
State[State["RAWTEXT_END_TAG_NAME"] = 13] = "RAWTEXT_END_TAG_NAME";
State[State["SCRIPT_DATA_LESS_THAN_SIGN"] = 14] = "SCRIPT_DATA_LESS_THAN_SIGN";
State[State["SCRIPT_DATA_END_TAG_OPEN"] = 15] = "SCRIPT_DATA_END_TAG_OPEN";
State[State["SCRIPT_DATA_END_TAG_NAME"] = 16] = "SCRIPT_DATA_END_TAG_NAME";
State[State["SCRIPT_DATA_ESCAPE_START"] = 17] = "SCRIPT_DATA_ESCAPE_START";
State[State["SCRIPT_DATA_ESCAPE_START_DASH"] = 18] = "SCRIPT_DATA_ESCAPE_START_DASH";
State[State["SCRIPT_DATA_ESCAPED"] = 19] = "SCRIPT_DATA_ESCAPED";
State[State["SCRIPT_DATA_ESCAPED_DASH"] = 20] = "SCRIPT_DATA_ESCAPED_DASH";
State[State["SCRIPT_DATA_ESCAPED_DASH_DASH"] = 21] = "SCRIPT_DATA_ESCAPED_DASH_DASH";
State[State["SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"] = 22] = "SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN";
State[State["SCRIPT_DATA_ESCAPED_END_TAG_OPEN"] = 23] = "SCRIPT_DATA_ESCAPED_END_TAG_OPEN";
State[State["SCRIPT_DATA_ESCAPED_END_TAG_NAME"] = 24] = "SCRIPT_DATA_ESCAPED_END_TAG_NAME";
State[State["SCRIPT_DATA_DOUBLE_ESCAPE_START"] = 25] = "SCRIPT_DATA_DOUBLE_ESCAPE_START";
State[State["SCRIPT_DATA_DOUBLE_ESCAPED"] = 26] = "SCRIPT_DATA_DOUBLE_ESCAPED";
State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH"] = 27] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH";
State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"] = 28] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH";
State[State["SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"] = 29] = "SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN";
State[State["SCRIPT_DATA_DOUBLE_ESCAPE_END"] = 30] = "SCRIPT_DATA_DOUBLE_ESCAPE_END";
State[State["BEFORE_ATTRIBUTE_NAME"] = 31] = "BEFORE_ATTRIBUTE_NAME";
State[State["ATTRIBUTE_NAME"] = 32] = "ATTRIBUTE_NAME";
State[State["AFTER_ATTRIBUTE_NAME"] = 33] = "AFTER_ATTRIBUTE_NAME";
State[State["BEFORE_ATTRIBUTE_VALUE"] = 34] = "BEFORE_ATTRIBUTE_VALUE";
State[State["ATTRIBUTE_VALUE_DOUBLE_QUOTED"] = 35] = "ATTRIBUTE_VALUE_DOUBLE_QUOTED";
State[State["ATTRIBUTE_VALUE_SINGLE_QUOTED"] = 36] = "ATTRIBUTE_VALUE_SINGLE_QUOTED";
State[State["ATTRIBUTE_VALUE_UNQUOTED"] = 37] = "ATTRIBUTE_VALUE_UNQUOTED";
State[State["AFTER_ATTRIBUTE_VALUE_QUOTED"] = 38] = "AFTER_ATTRIBUTE_VALUE_QUOTED";
State[State["SELF_CLOSING_START_TAG"] = 39] = "SELF_CLOSING_START_TAG";
State[State["BOGUS_COMMENT"] = 40] = "BOGUS_COMMENT";
State[State["MARKUP_DECLARATION_OPEN"] = 41] = "MARKUP_DECLARATION_OPEN";
State[State["COMMENT_START"] = 42] = "COMMENT_START";
State[State["COMMENT_START_DASH"] = 43] = "COMMENT_START_DASH";
State[State["COMMENT"] = 44] = "COMMENT";
State[State["COMMENT_LESS_THAN_SIGN"] = 45] = "COMMENT_LESS_THAN_SIGN";
State[State["COMMENT_LESS_THAN_SIGN_BANG"] = 46] = "COMMENT_LESS_THAN_SIGN_BANG";
State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH"] = 47] = "COMMENT_LESS_THAN_SIGN_BANG_DASH";
State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"] = 48] = "COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH";
State[State["COMMENT_END_DASH"] = 49] = "COMMENT_END_DASH";
State[State["COMMENT_END"] = 50] = "COMMENT_END";
State[State["COMMENT_END_BANG"] = 51] = "COMMENT_END_BANG";
State[State["DOCTYPE"] = 52] = "DOCTYPE";
State[State["BEFORE_DOCTYPE_NAME"] = 53] = "BEFORE_DOCTYPE_NAME";
State[State["DOCTYPE_NAME"] = 54] = "DOCTYPE_NAME";
State[State["AFTER_DOCTYPE_NAME"] = 55] = "AFTER_DOCTYPE_NAME";
State[State["AFTER_DOCTYPE_PUBLIC_KEYWORD"] = 56] = "AFTER_DOCTYPE_PUBLIC_KEYWORD";
State[State["BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"] = 57] = "BEFORE_DOCTYPE_PUBLIC_IDENTIFIER";
State[State["DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"] = 58] = "DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED";
State[State["DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"] = 59] = "DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED";
State[State["AFTER_DOCTYPE_PUBLIC_IDENTIFIER"] = 60] = "AFTER_DOCTYPE_PUBLIC_IDENTIFIER";
State[State["BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"] = 61] = "BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS";
State[State["AFTER_DOCTYPE_SYSTEM_KEYWORD"] = 62] = "AFTER_DOCTYPE_SYSTEM_KEYWORD";
State[State["BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"] = 63] = "BEFORE_DOCTYPE_SYSTEM_IDENTIFIER";
State[State["DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"] = 64] = "DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED";
State[State["DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"] = 65] = "DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED";
State[State["AFTER_DOCTYPE_SYSTEM_IDENTIFIER"] = 66] = "AFTER_DOCTYPE_SYSTEM_IDENTIFIER";
State[State["BOGUS_DOCTYPE"] = 67] = "BOGUS_DOCTYPE";
State[State["CDATA_SECTION"] = 68] = "CDATA_SECTION";
State[State["CDATA_SECTION_BRACKET"] = 69] = "CDATA_SECTION_BRACKET";
State[State["CDATA_SECTION_END"] = 70] = "CDATA_SECTION_END";
State[State["CHARACTER_REFERENCE"] = 71] = "CHARACTER_REFERENCE";
State[State["AMBIGUOUS_AMPERSAND"] = 72] = "AMBIGUOUS_AMPERSAND";
})(State || (State = {}));
//Tokenizer initial states for different modes
export const TokenizerMode = {
DATA: State.DATA,
RCDATA: State.RCDATA,
RAWTEXT: State.RAWTEXT,
SCRIPT_DATA: State.SCRIPT_DATA,
PLAINTEXT: State.PLAINTEXT,
CDATA_SECTION: State.CDATA_SECTION,
};
//Utils
//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline
//this functions if they will be situated in another module due to context switch.
//Always perform inlining check before modifying this functions ('node --trace-inlining').
function isAsciiDigit(cp) {
return cp >= $.DIGIT_0 && cp <= $.DIGIT_9;
}
function isAsciiUpper(cp) {
return cp >= $.LATIN_CAPITAL_A && cp <= $.LATIN_CAPITAL_Z;
}
function isAsciiLower(cp) {
return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_Z;
}
function isAsciiLetter(cp) {
return isAsciiLower(cp) || isAsciiUpper(cp);
}
function isAsciiAlphaNumeric(cp) {
return isAsciiLetter(cp) || isAsciiDigit(cp);
}
function toAsciiLower(cp) {
return cp + 32;
}
function isWhitespace(cp) {
return cp === $.SPACE || cp === $.LINE_FEED || cp === $.TABULATION || cp === $.FORM_FEED;
}
function isScriptDataDoubleEscapeSequenceEnd(cp) {
return isWhitespace(cp) || cp === $.SOLIDUS || cp === $.GREATER_THAN_SIGN;
}
function getErrorForNumericCharacterReference(code) {
if (code === $.NULL) {
return ERR.nullCharacterReference;
}
else if (code > 1114111) {
return ERR.characterReferenceOutsideUnicodeRange;
}
else if (isSurrogate(code)) {
return ERR.surrogateCharacterReference;
}
else if (isUndefinedCodePoint(code)) {
return ERR.noncharacterCharacterReference;
}
else if (isControlCodePoint(code) || code === $.CARRIAGE_RETURN) {
return ERR.controlCharacterReference;
}
return null;
}
//Tokenizer
export class Tokenizer {
constructor(options, handler) {
this.options = options;
this.handler = handler;
this.paused = false;
/** Ensures that the parsing loop isn't run multiple times at once. */
this.inLoop = false;
/**
* Indicates that the current adjusted node exists, is not an element in the HTML namespace,
* and that it is not an integration point for either MathML or HTML.
*
* @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction}
*/
this.inForeignNode = false;
this.lastStartTagName = '';
this.active = false;
this.state = State.DATA;
this.returnState = State.DATA;
this.entityStartPos = 0;
this.consumedAfterSnapshot = -1;
this.currentCharacterToken = null;
this.currentToken = null;
this.currentAttr = { name: '', value: '' };
this.preprocessor = new Preprocessor(handler);
this.currentLocation = this.getCurrentLocation(-1);
this.entityDecoder = new EntityDecoder(htmlDecodeTree, (cp, consumed) => {
// Note: Set `pos` _before_ flushing, as flushing might drop
// the current chunk and invalidate `entityStartPos`.
this.preprocessor.pos = this.entityStartPos + consumed - 1;
this._flushCodePointConsumedAsCharacterReference(cp);
}, handler.onParseError
? {
missingSemicolonAfterCharacterReference: () => {
this._err(ERR.missingSemicolonAfterCharacterReference, 1);
},
absenceOfDigitsInNumericCharacterReference: (consumed) => {
this._err(ERR.absenceOfDigitsInNumericCharacterReference, this.entityStartPos - this.preprocessor.pos + consumed);
},
validateNumericCharacterReference: (code) => {
const error = getErrorForNumericCharacterReference(code);
if (error)
this._err(error, 1);
},
}
: undefined);
}
//Errors
_err(code, cpOffset = 0) {
var _a, _b;
(_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code, cpOffset));
}
// NOTE: `offset` may never run across line boundaries.
getCurrentLocation(offset) {
if (!this.options.sourceCodeLocationInfo) {
return null;
}
return {
startLine: this.preprocessor.line,
startCol: this.preprocessor.col - offset,
startOffset: this.preprocessor.offset - offset,
endLine: -1,
endCol: -1,
endOffset: -1,
};
}
_runParsingLoop() {
if (this.inLoop)
return;
this.inLoop = true;
while (this.active && !this.paused) {
this.consumedAfterSnapshot = 0;
const cp = this._consume();
if (!this._ensureHibernation()) {
this._callState(cp);
}
}
this.inLoop = false;
}
//API
pause() {
this.paused = true;
}
resume(writeCallback) {
if (!this.paused) {
throw new Error('Parser was already resumed');
}
this.paused = false;
// Necessary for synchronous resume.
if (this.inLoop)
return;
this._runParsingLoop();
if (!this.paused) {
writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback();
}
}
write(chunk, isLastChunk, writeCallback) {
this.active = true;
this.preprocessor.write(chunk, isLastChunk);
this._runParsingLoop();
if (!this.paused) {
writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback();
}
}
insertHtmlAtCurrentPos(chunk) {
this.active = true;
this.preprocessor.insertHtmlAtCurrentPos(chunk);
this._runParsingLoop();
}
//Hibernation
_ensureHibernation() {
if (this.preprocessor.endOfChunkHit) {
this.preprocessor.retreat(this.consumedAfterSnapshot);
this.consumedAfterSnapshot = 0;
this.active = false;
return true;
}
return false;
}
//Consumption
_consume() {
this.consumedAfterSnapshot++;
return this.preprocessor.advance();
}
_advanceBy(count) {
this.consumedAfterSnapshot += count;
for (let i = 0; i < count; i++) {
this.preprocessor.advance();
}
}
_consumeSequenceIfMatch(pattern, caseSensitive) {
if (this.preprocessor.startsWith(pattern, caseSensitive)) {
// We will already have consumed one character before calling this method.
this._advanceBy(pattern.length - 1);
return true;
}
return false;
}
//Token creation
_createStartTagToken() {
this.currentToken = {
type: TokenType.START_TAG,
tagName: '',
tagID: TAG_ID.UNKNOWN,
selfClosing: false,
ackSelfClosing: false,
attrs: [],
location: this.getCurrentLocation(1),
};
}
_createEndTagToken() {
this.currentToken = {
type: TokenType.END_TAG,
tagName: '',
tagID: TAG_ID.UNKNOWN,
selfClosing: false,
ackSelfClosing: false,
attrs: [],
location: this.getCurrentLocation(2),
};
}
_createCommentToken(offset) {
this.currentToken = {
type: TokenType.COMMENT,
data: '',
location: this.getCurrentLocation(offset),
};
}
_createDoctypeToken(initialName) {
this.currentToken = {
type: TokenType.DOCTYPE,
name: initialName,
forceQuirks: false,
publicId: null,
systemId: null,
location: this.currentLocation,
};
}
_createCharacterToken(type, chars) {
this.currentCharacterToken = {
type,
chars,
location: this.currentLocation,
};
}
//Tag attributes
_createAttr(attrNameFirstCh) {
this.currentAttr = {
name: attrNameFirstCh,
value: '',
};
this.currentLocation = this.getCurrentLocation(0);
}
_leaveAttrName() {
var _a;
var _b;
const token = this.currentToken;
if (getTokenAttr(token, this.currentAttr.name) === null) {
token.attrs.push(this.currentAttr);
if (token.location && this.currentLocation) {
const attrLocations = ((_a = (_b = token.location).attrs) !== null && _a !== void 0 ? _a : (_b.attrs = Object.create(null)));
attrLocations[this.currentAttr.name] = this.currentLocation;
// Set end location
this._leaveAttrValue();
}
}
else {
this._err(ERR.duplicateAttribute);
}
}
_leaveAttrValue() {
if (this.currentLocation) {
this.currentLocation.endLine = this.preprocessor.line;
this.currentLocation.endCol = this.preprocessor.col;
this.currentLocation.endOffset = this.preprocessor.offset;
}
}
//Token emission
prepareToken(ct) {
this._emitCurrentCharacterToken(ct.location);
this.currentToken = null;
if (ct.location) {
ct.location.endLine = this.preprocessor.line;
ct.location.endCol = this.preprocessor.col + 1;
ct.location.endOffset = this.preprocessor.offset + 1;
}
this.currentLocation = this.getCurrentLocation(-1);
}
emitCurrentTagToken() {
const ct = this.currentToken;
this.prepareToken(ct);
ct.tagID = getTagID(ct.tagName);
if (ct.type === TokenType.START_TAG) {
this.lastStartTagName = ct.tagName;
this.handler.onStartTag(ct);
}
else {
if (ct.attrs.length > 0) {
this._err(ERR.endTagWithAttributes);
}
if (ct.selfClosing) {
this._err(ERR.endTagWithTrailingSolidus);
}
this.handler.onEndTag(ct);
}
this.preprocessor.dropParsedChunk();
}
emitCurrentComment(ct) {
this.prepareToken(ct);
this.handler.onComment(ct);
this.preprocessor.dropParsedChunk();
}
emitCurrentDoctype(ct) {
this.prepareToken(ct);
this.handler.onDoctype(ct);
this.preprocessor.dropParsedChunk();
}
_emitCurrentCharacterToken(nextLocation) {
if (this.currentCharacterToken) {
//NOTE: if we have a pending character token, make it's end location equal to the
//current token's start location.
if (nextLocation && this.currentCharacterToken.location) {
this.currentCharacterToken.location.endLine = nextLocation.startLine;
this.currentCharacterToken.location.endCol = nextLocation.startCol;
this.currentCharacterToken.location.endOffset = nextLocation.startOffset;
}
switch (this.currentCharacterToken.type) {
case TokenType.CHARACTER: {
this.handler.onCharacter(this.currentCharacterToken);
break;
}
case TokenType.NULL_CHARACTER: {
this.handler.onNullCharacter(this.currentCharacterToken);
break;
}
case TokenType.WHITESPACE_CHARACTER: {
this.handler.onWhitespaceCharacter(this.currentCharacterToken);
break;
}
}
this.currentCharacterToken = null;
}
}
_emitEOFToken() {
const location = this.getCurrentLocation(0);
if (location) {
location.endLine = location.startLine;
location.endCol = location.startCol;
location.endOffset = location.startOffset;
}
this._emitCurrentCharacterToken(location);
this.handler.onEof({ type: TokenType.EOF, location });
this.active = false;
}
//Characters emission
//OPTIMIZATION: The specification uses only one type of character token (one token per character).
//This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters.
//If we have a sequence of characters that belong to the same group, the parser can process it
//as a single solid character token.
//So, there are 3 types of character tokens in parse5:
//1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000')
//2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n \r\t \f')
//3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
_appendCharToCurrentCharacterToken(type, ch) {
if (this.currentCharacterToken) {
if (this.currentCharacterToken.type === type) {
this.currentCharacterToken.chars += ch;
return;
}
else {
this.currentLocation = this.getCurrentLocation(0);
this._emitCurrentCharacterToken(this.currentLocation);
this.preprocessor.dropParsedChunk();
}
}
this._createCharacterToken(type, ch);
}
_emitCodePoint(cp) {
const type = isWhitespace(cp)
? TokenType.WHITESPACE_CHARACTER
: cp === $.NULL
? TokenType.NULL_CHARACTER
: TokenType.CHARACTER;
this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp));
}
//NOTE: used when we emit characters explicitly.
//This is always for non-whitespace and non-null characters, which allows us to avoid additional checks.
_emitChars(ch) {
this._appendCharToCurrentCharacterToken(TokenType.CHARACTER, ch);
}
// Character reference helpers
_startCharacterReference() {
this.returnState = this.state;
this.state = State.CHARACTER_REFERENCE;
this.entityStartPos = this.preprocessor.pos;
this.entityDecoder.startEntity(this._isCharacterReferenceInAttribute() ? DecodingMode.Attribute : DecodingMode.Legacy);
}
_isCharacterReferenceInAttribute() {
return (this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED ||
this.returnState === State.ATTRIBUTE_VALUE_SINGLE_QUOTED ||
this.returnState === State.ATTRIBUTE_VALUE_UNQUOTED);
}
_flushCodePointConsumedAsCharacterReference(cp) {
if (this._isCharacterReferenceInAttribute()) {
this.currentAttr.value += String.fromCodePoint(cp);
}
else {
this._emitCodePoint(cp);
}
}
// Calling states this way turns out to be much faster than any other approach.
_callState(cp) {
switch (this.state) {
case State.DATA: {
this._stateData(cp);
break;
}
case State.RCDATA: {
this._stateRcdata(cp);
break;
}
case State.RAWTEXT: {
this._stateRawtext(cp);
break;
}
case State.SCRIPT_DATA: {
this._stateScriptData(cp);
break;
}
case State.PLAINTEXT: {
this._statePlaintext(cp);
break;
}
case State.TAG_OPEN: {
this._stateTagOpen(cp);
break;
}
case State.END_TAG_OPEN: {
this._stateEndTagOpen(cp);
break;
}
case State.TAG_NAME: {
this._stateTagName(cp);
break;
}
case State.RCDATA_LESS_THAN_SIGN: {
this._stateRcdataLessThanSign(cp);
break;
}
case State.RCDATA_END_TAG_OPEN: {
this._stateRcdataEndTagOpen(cp);
break;
}
case State.RCDATA_END_TAG_NAME: {
this._stateRcdataEndTagName(cp);
break;
}
case State.RAWTEXT_LESS_THAN_SIGN: {
this._stateRawtextLessThanSign(cp);
break;
}
case State.RAWTEXT_END_TAG_OPEN: {
this._stateRawtextEndTagOpen(cp);
break;
}
case State.RAWTEXT_END_TAG_NAME: {
this._stateRawtextEndTagName(cp);
break;
}
case State.SCRIPT_DATA_LESS_THAN_SIGN: {
this._stateScriptDataLessThanSign(cp);
break;
}
case State.SCRIPT_DATA_END_TAG_OPEN: {
this._stateScriptDataEndTagOpen(cp);
break;
}
case State.SCRIPT_DATA_END_TAG_NAME: {
this._stateScriptDataEndTagName(cp);
break;
}
case State.SCRIPT_DATA_ESCAPE_START: {
this._stateScriptDataEscapeStart(cp);
break;
}
case State.SCRIPT_DATA_ESCAPE_START_DASH: {
this._stateScriptDataEscapeStartDash(cp);
break;
}
case State.SCRIPT_DATA_ESCAPED: {
this._stateScriptDataEscaped(cp);
break;
}
case State.SCRIPT_DATA_ESCAPED_DASH: {
this._stateScriptDataEscapedDash(cp);
break;
}
case State.SCRIPT_DATA_ESCAPED_DASH_DASH: {
this._stateScriptDataEscapedDashDash(cp);
break;
}
case State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: {
this._stateScriptDataEscapedLessThanSign(cp);
break;
}
case State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN: {
this._stateScriptDataEscapedEndTagOpen(cp);
break;
}
case State.SCRIPT_DATA_ESCAPED_END_TAG_NAME: {
this._stateScriptDataEscapedEndTagName(cp);
break;
}
case State.SCRIPT_DATA_DOUBLE_ESCAPE_START: {
this._stateScriptDataDoubleEscapeStart(cp);
break;
}
case State.SCRIPT_DATA_DOUBLE_ESCAPED: {
this._stateScriptDataDoubleEscaped(cp);
break;
}
case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH: {
this._stateScriptDataDoubleEscapedDash(cp);
break;
}
case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: {
this._stateScriptDataDoubleEscapedDashDash(cp);
break;
}
case State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: {
this._stateScriptDataDoubleEscapedLessThanSign(cp);
break;
}
case State.SCRIPT_DATA_DOUBLE_ESCAPE_END: {
this._stateScriptDataDoubleEscapeEnd(cp);
break;
}
case State.BEFORE_ATTRIBUTE_NAME: {
this._stateBeforeAttributeName(cp);
break;
}
case State.ATTRIBUTE_NAME: {
this._stateAttributeName(cp);
break;
}
case State.AFTER_ATTRIBUTE_NAME: {
this._stateAfterAttributeName(cp);
break;
}
case State.BEFORE_ATTRIBUTE_VALUE: {
this._stateBeforeAttributeValue(cp);
break;
}
case State.ATTRIBUTE_VALUE_DOUBLE_QUOTED: {
this._stateAttributeValueDoubleQuoted(cp);
break;
}
case State.ATTRIBUTE_VALUE_SINGLE_QUOTED: {
this._stateAttributeValueSingleQuoted(cp);
break;
}
case State.ATTRIBUTE_VALUE_UNQUOTED: {
this._stateAttributeValueUnquoted(cp);
break;
}
case State.AFTER_ATTRIBUTE_VALUE_QUOTED: {
this._stateAfterAttributeValueQuoted(cp);
break;
}
case State.SELF_CLOSING_START_TAG: {
this._stateSelfClosingStartTag(cp);
break;
}
case State.BOGUS_COMMENT: {
this._stateBogusComment(cp);
break;
}
case State.MARKUP_DECLARATION_OPEN: {
this._stateMarkupDeclarationOpen(cp);
break;
}
case State.COMMENT_START: {
this._stateCommentStart(cp);
break;
}
case State.COMMENT_START_DASH: {
this._stateCommentStartDash(cp);
break;
}
case State.COMMENT: {
this._stateComment(cp);
break;
}
case State.COMMENT_LESS_THAN_SIGN: {
this._stateCommentLessThanSign(cp);
break;
}
case State.COMMENT_LESS_THAN_SIGN_BANG: {
this._stateCommentLessThanSignBang(cp);
break;
}
case State.COMMENT_LESS_THAN_SIGN_BANG_DASH: {
this._stateCommentLessThanSignBangDash(cp);
break;
}
case State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: {
this._stateCommentLessThanSignBangDashDash(cp);
break;
}
case State.COMMENT_END_DASH: {
this._stateCommentEndDash(cp);
break;
}
case State.COMMENT_END: {
this._stateCommentEnd(cp);
break;
}
case State.COMMENT_END_BANG: {
this._stateCommentEndBang(cp);
break;
}
case State.DOCTYPE: {
this._stateDoctype(cp);
break;
}
case State.BEFORE_DOCTYPE_NAME: {
this._stateBeforeDoctypeName(cp);
break;
}
case State.DOCTYPE_NAME: {
this._stateDoctypeName(cp);
break;
}
case State.AFTER_DOCTYPE_NAME: {
this._stateAfterDoctypeName(cp);
break;
}
case State.AFTER_DOCTYPE_PUBLIC_KEYWORD: {
this._stateAfterDoctypePublicKeyword(cp);
break;
}
case State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: {
this._stateBeforeDoctypePublicIdentifier(cp);
break;
}
case State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: {
this._stateDoctypePublicIdentifierDoubleQuoted(cp);
break;
}
case State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
this._stateDoctypePublicIdentifierSingleQuoted(cp);
break;
}
case State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER: {
this._stateAfterDoctypePublicIdentifier(cp);
break;
}
case State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: {
this._stateBetweenDoctypePublicAndSystemIdentifiers(cp);
break;
}
case State.AFTER_DOCTYPE_SYSTEM_KEYWORD: {
this._stateAfterDoctypeSystemKeyword(cp);
break;
}
case State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: {
this._stateBeforeDoctypeSystemIdentifier(cp);
break;
}
case State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: {
this._stateDoctypeSystemIdentifierDoubleQuoted(cp);
break;
}
case State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
this._stateDoctypeSystemIdentifierSingleQuoted(cp);
break;
}
case State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
this._stateAfterDoctypeSystemIdentifier(cp);
break;
}
case State.BOGUS_DOCTYPE: {
this._stateBogusDoctype(cp);
break;
}
case State.CDATA_SECTION: {
this._stateCdataSection(cp);
break;
}
case State.CDATA_SECTION_BRACKET: {
this._stateCdataSectionBracket(cp);
break;
}
case State.CDATA_SECTION_END: {
this._stateCdataSectionEnd(cp);
break;
}
case State.CHARACTER_REFERENCE: {
this._stateCharacterReference();
break;
}
case State.AMBIGUOUS_AMPERSAND: {
this._stateAmbiguousAmpersand(cp);
break;
}
default: {
throw new Error('Unknown state');
}
}
}
// State machine
// Data state
//------------------------------------------------------------------
_stateData(cp) {
switch (cp) {
case $.LESS_THAN_SIGN: {
this.state = State.TAG_OPEN;
break;
}
case $.AMPERSAND: {
this._startCharacterReference();
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this._emitCodePoint(cp);
break;
}
case $.EOF: {
this._emitEOFToken();
break;
}
default: {
this._emitCodePoint(cp);
}
}
}
// RCDATA state
//------------------------------------------------------------------
_stateRcdata(cp) {
switch (cp) {
case $.AMPERSAND: {
this._startCharacterReference();
break;
}
case $.LESS_THAN_SIGN: {
this.state = State.RCDATA_LESS_THAN_SIGN;
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this._emitChars(REPLACEMENT_CHARACTER);
break;
}
case $.EOF: {
this._emitEOFToken();
break;
}
default: {
this._emitCodePoint(cp);
}
}
}
// RAWTEXT state
//------------------------------------------------------------------
_stateRawtext(cp) {
switch (cp) {
case $.LESS_THAN_SIGN: {
this.state = State.RAWTEXT_LESS_THAN_SIGN;
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this._emitChars(REPLACEMENT_CHARACTER);
break;
}
case $.EOF: {
this._emitEOFToken();
break;
}
default: {
this._emitCodePoint(cp);
}
}
}
// Script data state
//------------------------------------------------------------------
_stateScriptData(cp) {
switch (cp) {
case $.LESS_THAN_SIGN: {
this.state = State.SCRIPT_DATA_LESS_THAN_SIGN;
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this._emitChars(REPLACEMENT_CHARACTER);
break;
}
case $.EOF: {
this._emitEOFToken();
break;
}
default: {
this._emitCodePoint(cp);
}
}
}
// PLAINTEXT state
//------------------------------------------------------------------
_statePlaintext(cp) {
switch (cp) {
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this._emitChars(REPLACEMENT_CHARACTER);
break;
}
case $.EOF: {
this._emitEOFToken();
break;
}
default: {
this._emitCodePoint(cp);
}
}
}
// Tag open state
//------------------------------------------------------------------
_stateTagOpen(cp) {
if (isAsciiLetter(cp)) {
this._createStartTagToken();
this.state = State.TAG_NAME;
this._stateTagName(cp);
}
else
switch (cp) {
case $.EXCLAMATION_MARK: {
this.state = State.MARKUP_DECLARATION_OPEN;
break;
}
case $.SOLIDUS: {
this.state = State.END_TAG_OPEN;
break;
}
case $.QUESTION_MARK: {
this._err(ERR.unexpectedQuestionMarkInsteadOfTagName);
this._createCommentToken(1);
this.state = State.BOGUS_COMMENT;
this._stateBogusComment(cp);
break;
}
case $.EOF: {
this._err(ERR.eofBeforeTagName);
this._emitChars('<');
this._emitEOFToken();
break;
}
default: {
this._err(ERR.invalidFirstCharacterOfTagName);
this._emitChars('<');
this.state = State.DATA;
this._stateData(cp);
}
}
}
// End tag open state
//------------------------------------------------------------------
_stateEndTagOpen(cp) {
if (isAsciiLetter(cp)) {
this._createEndTagToken();
this.state = State.TAG_NAME;
this._stateTagName(cp);
}
else
switch (cp) {
case $.GREATER_THAN_SIGN: {
this._err(ERR.missingEndTagName);
this.state = State.DATA;
break;
}
case $.EOF: {
this._err(ERR.eofBeforeTagName);
this._emitChars('</');
this._emitEOFToken();
break;
}
default: {
this._err(ERR.invalidFirstCharacterOfTagName);
this._createCommentToken(2);
this.state = State.BOGUS_COMMENT;
this._stateBogusComment(cp);
}
}
}
// Tag name state
//------------------------------------------------------------------
_stateTagName(cp) {
const token = this.currentToken;
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
this.state = State.BEFORE_ATTRIBUTE_NAME;
break;
}
case $.SOLIDUS: {
this.state = State.SELF_CLOSING_START_TAG;
break;
}
case $.GREATER_THAN_SIGN: {
this.state = State.DATA;
this.emitCurrentTagToken();
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
token.tagName += REPLACEMENT_CHARACTER;
break;
}
case $.EOF: {
this._err(ERR.eofInTag);
this._emitEOFToken();
break;
}
default: {
token.tagName += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
}
}
}
// RCDATA less-than sign state
//------------------------------------------------------------------
_stateRcdataLessThanSign(cp) {
if (cp === $.SOLIDUS) {
this.state = State.RCDATA_END_TAG_OPEN;
}
else {
this._emitChars('<');
this.state = State.RCDATA;
this._stateRcdata(cp);
}
}
// RCDATA end tag open state
//------------------------------------------------------------------
_stateRcdataEndTagOpen(cp) {
if (isAsciiLetter(cp)) {
this.state = State.RCDATA_END_TAG_NAME;
this._stateRcdataEndTagName(cp);
}
else {
this._emitChars('</');
this.state = State.RCDATA;
this._stateRcdata(cp);
}
}
handleSpecialEndTag(_cp) {
if (!this.preprocessor.startsWith(this.lastStartTagName, false)) {
return !this._ensureHibernation();
}
this._createEndTagToken();
const token = this.currentToken;
token.tagName = this.lastStartTagName;
const cp = this.preprocessor.peek(this.lastStartTagName.length);
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
this._advanceBy(this.lastStartTagName.length);
this.state = State.BEFORE_ATTRIBUTE_NAME;
return false;
}
case $.SOLIDUS: {
this._advanceBy(this.lastStartTagName.length);
this.state = State.SELF_CLOSING_START_TAG;
return false;
}
case $.GREATER_THAN_SIGN: {
this._advanceBy(this.lastStartTagName.length);
this.emitCurrentTagToken();
this.state = State.DATA;
return false;
}
default: {
return !this._ensureHibernation();
}
}
}
// RCDATA end tag name state
//------------------------------------------------------------------
_stateRcdataEndTagName(cp) {
if (this.handleSpecialEndTag(cp)) {
this._emitChars('</');
this.state = State.RCDATA;
this._stateRcdata(cp);
}
}
// RAWTEXT less-than sign state
//------------------------------------------------------------------
_stateRawtextLessThanSign(cp) {
if (cp === $.SOLIDUS) {
this.state = State.RAWTEXT_END_TAG_OPEN;
}
else {
this._emitChars('<');
this.state = State.RAWTEXT;
this._stateRawtext(cp);
}
}
// RAWTEXT end tag open state
//------------------------------------------------------------------
_stateRawtextEndTagOpen(cp) {
if (isAsciiLetter(cp)) {
this.state = State.RAWTEXT_END_TAG_NAME;
this._stateRawtextEndTagName(cp);
}
else {
this._emitChars('</');
this.state = State.RAWTEXT;
this._stateRawtext(cp);
}
}
// RAWTEXT end tag name state
//------------------------------------------------------------------
_stateRawtextEndTagName(cp) {
if (this.handleSpecialEndTag(cp)) {
this._emitChars('</');
this.state = State.RAWTEXT;
this._stateRawtext(cp);
}
}
// Script data less-than sign state
//------------------------------------------------------------------
_stateScriptDataLessThanSign(cp) {
switch (cp) {
case $.SOLIDUS: {
this.state = State.SCRIPT_DATA_END_TAG_OPEN;
break;
}
case $.EXCLAMATION_MARK: {
this.state = State.SCRIPT_DATA_ESCAPE_START;
this._emitChars('<!');
break;
}
default: {
this._emitChars('<');
this.state = State.SCRIPT_DATA;
this._stateScriptData(cp);
}
}
}
// Script data end tag open state
//------------------------------------------------------------------
_stateScriptDataEndTagOpen(cp) {
if (isAsciiLetter(cp)) {
this.state = State.SCRIPT_DATA_END_TAG_NAME;
this._stateScriptDataEndTagName(cp);
}
else {
this._emitChars('</');
this.state = State.SCRIPT_DATA;
this._stateScriptData(cp);
}
}
// Script data end tag name state
//------------------------------------------------------------------
_stateScriptDataEndTagName(cp) {
if (this.handleSpecialEndTag(cp)) {
this._emitChars('</');
this.state = State.SCRIPT_DATA;
this._stateScriptData(cp);
}
}
// Script data escape start state
//------------------------------------------------------------------
_stateScriptDataEscapeStart(cp) {
if (cp === $.HYPHEN_MINUS) {
this.state = State.SCRIPT_DATA_ESCAPE_START_DASH;
this._emitChars('-');
}
else {
this.state = State.SCRIPT_DATA;
this._stateScriptData(cp);
}
}
// Script data escape start dash state
//------------------------------------------------------------------
_stateScriptDataEscapeStartDash(cp) {
if (cp === $.HYPHEN_MINUS) {
this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
this._emitChars('-');
}
else {
this.state = State.SCRIPT_DATA;
this._stateScriptData(cp);
}
}
// Script data escaped state
//------------------------------------------------------------------
_stateScriptDataEscaped(cp) {
switch (cp) {
case $.HYPHEN_MINUS: {
this.state = State.SCRIPT_DATA_ESCAPED_DASH;
this._emitChars('-');
break;
}
case $.LESS_THAN_SIGN: {
this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this._emitChars(REPLACEMENT_CHARACTER);
break;
}
case $.EOF: {
this._err(ERR.eofInScriptHtmlCommentLikeText);
this._emitEOFToken();
break;
}
default: {
this._emitCodePoint(cp);
}
}
}
// Script data escaped dash state
//------------------------------------------------------------------
_stateScriptDataEscapedDash(cp) {
switch (cp) {
case $.HYPHEN_MINUS: {
this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
this._emitChars('-');
break;
}
case $.LESS_THAN_SIGN: {
this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this.state = State.SCRIPT_DATA_ESCAPED;
this._emitChars(REPLACEMENT_CHARACTER);
break;
}
case $.EOF: {
this._err(ERR.eofInScriptHtmlCommentLikeText);
this._emitEOFToken();
break;
}
default: {
this.state = State.SCRIPT_DATA_ESCAPED;
this._emitCodePoint(cp);
}
}
}
// Script data escaped dash dash state
//------------------------------------------------------------------
_stateScriptDataEscapedDashDash(cp) {
switch (cp) {
case $.HYPHEN_MINUS: {
this._emitChars('-');
break;
}
case $.LESS_THAN_SIGN: {
this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
break;
}
case $.GREATER_THAN_SIGN: {
this.state = State.SCRIPT_DATA;
this._emitChars('>');
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this.state = State.SCRIPT_DATA_ESCAPED;
this._emitChars(REPLACEMENT_CHARACTER);
break;
}
case $.EOF: {
this._err(ERR.eofInScriptHtmlCommentLikeText);
this._emitEOFToken();
break;
}
default: {
this.state = State.SCRIPT_DATA_ESCAPED;
this._emitCodePoint(cp);
}
}
}
// Script data escaped less-than sign state
//------------------------------------------------------------------
_stateScriptDataEscapedLessThanSign(cp) {
if (cp === $.SOLIDUS) {
this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN;
}
else if (isAsciiLetter(cp)) {
this._emitChars('<');
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_START;
this._stateScriptDataDoubleEscapeStart(cp);
}
else {
this._emitChars('<');
this.state = State.SCRIPT_DATA_ESCAPED;
this._stateScriptDataEscaped(cp);
}
}
// Script data escaped end tag open state
//------------------------------------------------------------------
_stateScriptDataEscapedEndTagOpen(cp) {
if (isAsciiLetter(cp)) {
this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_NAME;
this._stateScriptDataEscapedEndTagName(cp);
}
else {
this._emitChars('</');
this.state = State.SCRIPT_DATA_ESCAPED;
this._stateScriptDataEscaped(cp);
}
}
// Script data escaped end tag name state
//------------------------------------------------------------------
_stateScriptDataEscapedEndTagName(cp) {
if (this.handleSpecialEndTag(cp)) {
this._emitChars('</');
this.state = State.SCRIPT_DATA_ESCAPED;
this._stateScriptDataEscaped(cp);
}
}
// Script data double escape start state
//------------------------------------------------------------------
_stateScriptDataDoubleEscapeStart(cp) {
if (this.preprocessor.startsWith($$.SCRIPT, false) &&
isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length))) {
this._emitCodePoint(cp);
for (let i = 0; i < $$.SCRIPT.length; i++) {
this._emitCodePoint(this._consume());
}
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
}
else if (!this._ensureHibernation()) {
this.state = State.SCRIPT_DATA_ESCAPED;
this._stateScriptDataEscaped(cp);
}
}
// Script data double escaped state
//------------------------------------------------------------------
_stateScriptDataDoubleEscaped(cp) {
switch (cp) {
case $.HYPHEN_MINUS: {
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
this._emitChars('-');
break;
}
case $.LESS_THAN_SIGN: {
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
this._emitChars('<');
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this._emitChars(REPLACEMENT_CHARACTER);
break;
}
case $.EOF: {
this._err(ERR.eofInScriptHtmlCommentLikeText);
this._emitEOFToken();
break;
}
default: {
this._emitCodePoint(cp);
}
}
}
// Script data double escaped dash state
//------------------------------------------------------------------
_stateScriptDataDoubleEscapedDash(cp) {
switch (cp) {
case $.HYPHEN_MINUS: {
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
this._emitChars('-');
break;
}
case $.LESS_THAN_SIGN: {
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
this._emitChars('<');
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
this._emitChars(REPLACEMENT_CHARACTER);
break;
}
case $.EOF: {
this._err(ERR.eofInScriptHtmlCommentLikeText);
this._emitEOFToken();
break;
}
default: {
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
this._emitCodePoint(cp);
}
}
}
// Script data double escaped dash dash state
//------------------------------------------------------------------
_stateScriptDataDoubleEscapedDashDash(cp) {
switch (cp) {
case $.HYPHEN_MINUS: {
this._emitChars('-');
break;
}
case $.LESS_THAN_SIGN: {
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
this._emitChars('<');
break;
}
case $.GREATER_THAN_SIGN: {
this.state = State.SCRIPT_DATA;
this._emitChars('>');
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
this._emitChars(REPLACEMENT_CHARACTER);
break;
}
case $.EOF: {
this._err(ERR.eofInScriptHtmlCommentLikeText);
this._emitEOFToken();
break;
}
default: {
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
this._emitCodePoint(cp);
}
}
}
// Script data double escaped less-than sign state
//------------------------------------------------------------------
_stateScriptDataDoubleEscapedLessThanSign(cp) {
if (cp === $.SOLIDUS) {
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_END;
this._emitChars('/');
}
else {
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
this._stateScriptDataDoubleEscaped(cp);
}
}
// Script data double escape end state
//------------------------------------------------------------------
_stateScriptDataDoubleEscapeEnd(cp) {
if (this.preprocessor.startsWith($$.SCRIPT, false) &&
isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length))) {
this._emitCodePoint(cp);
for (let i = 0; i < $$.SCRIPT.length; i++) {
this._emitCodePoint(this._consume());
}
this.state = State.SCRIPT_DATA_ESCAPED;
}
else if (!this._ensureHibernation()) {
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
this._stateScriptDataDoubleEscaped(cp);
}
}
// Before attribute name state
//------------------------------------------------------------------
_stateBeforeAttributeName(cp) {
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
// Ignore whitespace
break;
}
case $.SOLIDUS:
case $.GREATER_THAN_SIGN:
case $.EOF: {
this.state = State.AFTER_ATTRIBUTE_NAME;
this._stateAfterAttributeName(cp);
break;
}
case $.EQUALS_SIGN: {
this._err(ERR.unexpectedEqualsSignBeforeAttributeName);
this._createAttr('=');
this.state = State.ATTRIBUTE_NAME;
break;
}
default: {
this._createAttr('');
this.state = State.ATTRIBUTE_NAME;
this._stateAttributeName(cp);
}
}
}
// Attribute name state
//------------------------------------------------------------------
_stateAttributeName(cp) {
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED:
case $.SOLIDUS:
case $.GREATER_THAN_SIGN:
case $.EOF: {
this._leaveAttrName();
this.state = State.AFTER_ATTRIBUTE_NAME;
this._stateAfterAttributeName(cp);
break;
}
case $.EQUALS_SIGN: {
this._leaveAttrName();
this.state = State.BEFORE_ATTRIBUTE_VALUE;
break;
}
case $.QUOTATION_MARK:
case $.APOSTROPHE:
case $.LESS_THAN_SIGN: {
this._err(ERR.unexpectedCharacterInAttributeName);
this.currentAttr.name += String.fromCodePoint(cp);
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this.currentAttr.name += REPLACEMENT_CHARACTER;
break;
}
default: {
this.currentAttr.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
}
}
}
// After attribute name state
//------------------------------------------------------------------
_stateAfterAttributeName(cp) {
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
// Ignore whitespace
break;
}
case $.SOLIDUS: {
this.state = State.SELF_CLOSING_START_TAG;
break;
}
case $.EQUALS_SIGN: {
this.state = State.BEFORE_ATTRIBUTE_VALUE;
break;
}
case $.GREATER_THAN_SIGN: {
this.state = State.DATA;
this.emitCurrentTagToken();
break;
}
case $.EOF: {
this._err(ERR.eofInTag);
this._emitEOFToken();
break;
}
default: {
this._createAttr('');
this.state = State.ATTRIBUTE_NAME;
this._stateAttributeName(cp);
}
}
}
// Before attribute value state
//------------------------------------------------------------------
_stateBeforeAttributeValue(cp) {
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
// Ignore whitespace
break;
}
case $.QUOTATION_MARK: {
this.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
break;
}
case $.APOSTROPHE: {
this.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED;
break;
}
case $.GREATER_THAN_SIGN: {
this._err(ERR.missingAttributeValue);
this.state = State.DATA;
this.emitCurrentTagToken();
break;
}
default: {
this.state = State.ATTRIBUTE_VALUE_UNQUOTED;
this._stateAttributeValueUnquoted(cp);
}
}
}
// Attribute value (double-quoted) state
//------------------------------------------------------------------
_stateAttributeValueDoubleQuoted(cp) {
switch (cp) {
case $.QUOTATION_MARK: {
this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
break;
}
case $.AMPERSAND: {
this._startCharacterReference();
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this.currentAttr.value += REPLACEMENT_CHARACTER;
break;
}
case $.EOF: {
this._err(ERR.eofInTag);
this._emitEOFToken();
break;
}
default: {
this.currentAttr.value += String.fromCodePoint(cp);
}
}
}
// Attribute value (single-quoted) state
//------------------------------------------------------------------
_stateAttributeValueSingleQuoted(cp) {
switch (cp) {
case $.APOSTROPHE: {
this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
break;
}
case $.AMPERSAND: {
this._startCharacterReference();
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this.currentAttr.value += REPLACEMENT_CHARACTER;
break;
}
case $.EOF: {
this._err(ERR.eofInTag);
this._emitEOFToken();
break;
}
default: {
this.currentAttr.value += String.fromCodePoint(cp);
}
}
}
// Attribute value (unquoted) state
//------------------------------------------------------------------
_stateAttributeValueUnquoted(cp) {
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
this._leaveAttrValue();
this.state = State.BEFORE_ATTRIBUTE_NAME;
break;
}
case $.AMPERSAND: {
this._startCharacterReference();
break;
}
case $.GREATER_THAN_SIGN: {
this._leaveAttrValue();
this.state = State.DATA;
this.emitCurrentTagToken();
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this.currentAttr.value += REPLACEMENT_CHARACTER;
break;
}
case $.QUOTATION_MARK:
case $.APOSTROPHE:
case $.LESS_THAN_SIGN:
case $.EQUALS_SIGN:
case $.GRAVE_ACCENT: {
this._err(ERR.unexpectedCharacterInUnquotedAttributeValue);
this.currentAttr.value += String.fromCodePoint(cp);
break;
}
case $.EOF: {
this._err(ERR.eofInTag);
this._emitEOFToken();
break;
}
default: {
this.currentAttr.value += String.fromCodePoint(cp);
}
}
}
// After attribute value (quoted) state
//------------------------------------------------------------------
_stateAfterAttributeValueQuoted(cp) {
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
this._leaveAttrValue();
this.state = State.BEFORE_ATTRIBUTE_NAME;
break;
}
case $.SOLIDUS: {
this._leaveAttrValue();
this.state = State.SELF_CLOSING_START_TAG;
break;
}
case $.GREATER_THAN_SIGN: {
this._leaveAttrValue();
this.state = State.DATA;
this.emitCurrentTagToken();
break;
}
case $.EOF: {
this._err(ERR.eofInTag);
this._emitEOFToken();
break;
}
default: {
this._err(ERR.missingWhitespaceBetweenAttributes);
this.state = State.BEFORE_ATTRIBUTE_NAME;
this._stateBeforeAttributeName(cp);
}
}
}
// Self-closing start tag state
//------------------------------------------------------------------
_stateSelfClosingStartTag(cp) {
switch (cp) {
case $.GREATER_THAN_SIGN: {
const token = this.currentToken;
token.selfClosing = true;
this.state = State.DATA;
this.emitCurrentTagToken();
break;
}
case $.EOF: {
this._err(ERR.eofInTag);
this._emitEOFToken();
break;
}
default: {
this._err(ERR.unexpectedSolidusInTag);
this.state = State.BEFORE_ATTRIBUTE_NAME;
this._stateBeforeAttributeName(cp);
}
}
}
// Bogus comment state
//------------------------------------------------------------------
_stateBogusComment(cp) {
const token = this.currentToken;
switch (cp) {
case $.GREATER_THAN_SIGN: {
this.state = State.DATA;
this.emitCurrentComment(token);
break;
}
case $.EOF: {
this.emitCurrentComment(token);
this._emitEOFToken();
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
token.data += REPLACEMENT_CHARACTER;
break;
}
default: {
token.data += String.fromCodePoint(cp);
}
}
}
// Markup declaration open state
//------------------------------------------------------------------
_stateMarkupDeclarationOpen(cp) {
if (this._consumeSequenceIfMatch($$.DASH_DASH, true)) {
this._createCommentToken($$.DASH_DASH.length + 1);
this.state = State.COMMENT_START;
}
else if (this._consumeSequenceIfMatch($$.DOCTYPE, false)) {
// NOTE: Doctypes tokens are created without fixed offsets. We keep track of the moment a doctype *might* start here.
this.currentLocation = this.getCurrentLocation($$.DOCTYPE.length + 1);
this.state = State.DOCTYPE;
}
else if (this._consumeSequenceIfMatch($$.CDATA_START, true)) {
if (this.inForeignNode) {
this.state = State.CDATA_SECTION;
}
else {
this._err(ERR.cdataInHtmlContent);
this._createCommentToken($$.CDATA_START.length + 1);
this.currentToken.data = '[CDATA[';
this.state = State.BOGUS_COMMENT;
}
}
//NOTE: Sequence lookups can be abrupted by hibernation. In that case, lookup
//results are no longer valid and we will need to start over.
else if (!this._ensureHibernation()) {
this._err(ERR.incorrectlyOpenedComment);
this._createCommentToken(2);
this.state = State.BOGUS_COMMENT;
this._stateBogusComment(cp);
}
}
// Comment start state
//------------------------------------------------------------------
_stateCommentStart(cp) {
switch (cp) {
case $.HYPHEN_MINUS: {
this.state = State.COMMENT_START_DASH;
break;
}
case $.GREATER_THAN_SIGN: {
this._err(ERR.abruptClosingOfEmptyComment);
this.state = State.DATA;
const token = this.currentToken;
this.emitCurrentComment(token);
break;
}
default: {
this.state = State.COMMENT;
this._stateComment(cp);
}
}
}
// Comment start dash state
//------------------------------------------------------------------
_stateCommentStartDash(cp) {
const token = this.currentToken;
switch (cp) {
case $.HYPHEN_MINUS: {
this.state = State.COMMENT_END;
break;
}
case $.GREATER_THAN_SIGN: {
this._err(ERR.abruptClosingOfEmptyComment);
this.state = State.DATA;
this.emitCurrentComment(token);
break;
}
case $.EOF: {
this._err(ERR.eofInComment);
this.emitCurrentComment(token);
this._emitEOFToken();
break;
}
default: {
token.data += '-';
this.state = State.COMMENT;
this._stateComment(cp);
}
}
}
// Comment state
//------------------------------------------------------------------
_stateComment(cp) {
const token = this.currentToken;
switch (cp) {
case $.HYPHEN_MINUS: {
this.state = State.COMMENT_END_DASH;
break;
}
case $.LESS_THAN_SIGN: {
token.data += '<';
this.state = State.COMMENT_LESS_THAN_SIGN;
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
token.data += REPLACEMENT_CHARACTER;
break;
}
case $.EOF: {
this._err(ERR.eofInComment);
this.emitCurrentComment(token);
this._emitEOFToken();
break;
}
default: {
token.data += String.fromCodePoint(cp);
}
}
}
// Comment less-than sign state
//------------------------------------------------------------------
_stateCommentLessThanSign(cp) {
const token = this.currentToken;
switch (cp) {
case $.EXCLAMATION_MARK: {
token.data += '!';
this.state = State.COMMENT_LESS_THAN_SIGN_BANG;
break;
}
case $.LESS_THAN_SIGN: {
token.data += '<';
break;
}
default: {
this.state = State.COMMENT;
this._stateComment(cp);
}
}
}
// Comment less-than sign bang state
//------------------------------------------------------------------
_stateCommentLessThanSignBang(cp) {
if (cp === $.HYPHEN_MINUS) {
this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH;
}
else {
this.state = State.COMMENT;
this._stateComment(cp);
}
}
// Comment less-than sign bang dash state
//------------------------------------------------------------------
_stateCommentLessThanSignBangDash(cp) {
if (cp === $.HYPHEN_MINUS) {
this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH;
}
else {
this.state = State.COMMENT_END_DASH;
this._stateCommentEndDash(cp);
}
}
// Comment less-than sign bang dash dash state
//------------------------------------------------------------------
_stateCommentLessThanSignBangDashDash(cp) {
if (cp !== $.GREATER_THAN_SIGN && cp !== $.EOF) {
this._err(ERR.nestedComment);
}
this.state = State.COMMENT_END;
this._stateCommentEnd(cp);
}
// Comment end dash state
//------------------------------------------------------------------
_stateCommentEndDash(cp) {
const token = this.currentToken;
switch (cp) {
case $.HYPHEN_MINUS: {
this.state = State.COMMENT_END;
break;
}
case $.EOF: {
this._err(ERR.eofInComment);
this.emitCurrentComment(token);
this._emitEOFToken();
break;
}
default: {
token.data += '-';
this.state = State.COMMENT;
this._stateComment(cp);
}
}
}
// Comment end state
//------------------------------------------------------------------
_stateCommentEnd(cp) {
const token = this.currentToken;
switch (cp) {
case $.GREATER_THAN_SIGN: {
this.state = State.DATA;
this.emitCurrentComment(token);
break;
}
case $.EXCLAMATION_MARK: {
this.state = State.COMMENT_END_BANG;
break;
}
case $.HYPHEN_MINUS: {
token.data += '-';
break;
}
case $.EOF: {
this._err(ERR.eofInComment);
this.emitCurrentComment(token);
this._emitEOFToken();
break;
}
default: {
token.data += '--';
this.state = State.COMMENT;
this._stateComment(cp);
}
}
}
// Comment end bang state
//------------------------------------------------------------------
_stateCommentEndBang(cp) {
const token = this.currentToken;
switch (cp) {
case $.HYPHEN_MINUS: {
token.data += '--!';
this.state = State.COMMENT_END_DASH;
break;
}
case $.GREATER_THAN_SIGN: {
this._err(ERR.incorrectlyClosedComment);
this.state = State.DATA;
this.emitCurrentComment(token);
break;
}
case $.EOF: {
this._err(ERR.eofInComment);
this.emitCurrentComment(token);
this._emitEOFToken();
break;
}
default: {
token.data += '--!';
this.state = State.COMMENT;
this._stateComment(cp);
}
}
}
// DOCTYPE state
//------------------------------------------------------------------
_stateDoctype(cp) {
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
this.state = State.BEFORE_DOCTYPE_NAME;
break;
}
case $.GREATER_THAN_SIGN: {
this.state = State.BEFORE_DOCTYPE_NAME;
this._stateBeforeDoctypeName(cp);
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
this._createDoctypeToken(null);
const token = this.currentToken;
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
this._err(ERR.missingWhitespaceBeforeDoctypeName);
this.state = State.BEFORE_DOCTYPE_NAME;
this._stateBeforeDoctypeName(cp);
}
}
}
// Before DOCTYPE name state
//------------------------------------------------------------------
_stateBeforeDoctypeName(cp) {
if (isAsciiUpper(cp)) {
this._createDoctypeToken(String.fromCharCode(toAsciiLower(cp)));
this.state = State.DOCTYPE_NAME;
}
else
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
// Ignore whitespace
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
this._createDoctypeToken(REPLACEMENT_CHARACTER);
this.state = State.DOCTYPE_NAME;
break;
}
case $.GREATER_THAN_SIGN: {
this._err(ERR.missingDoctypeName);
this._createDoctypeToken(null);
const token = this.currentToken;
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this.state = State.DATA;
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
this._createDoctypeToken(null);
const token = this.currentToken;
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
this._createDoctypeToken(String.fromCodePoint(cp));
this.state = State.DOCTYPE_NAME;
}
}
}
// DOCTYPE name state
//------------------------------------------------------------------
_stateDoctypeName(cp) {
const token = this.currentToken;
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
this.state = State.AFTER_DOCTYPE_NAME;
break;
}
case $.GREATER_THAN_SIGN: {
this.state = State.DATA;
this.emitCurrentDoctype(token);
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
token.name += REPLACEMENT_CHARACTER;
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
token.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
}
}
}
// After DOCTYPE name state
//------------------------------------------------------------------
_stateAfterDoctypeName(cp) {
const token = this.currentToken;
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
// Ignore whitespace
break;
}
case $.GREATER_THAN_SIGN: {
this.state = State.DATA;
this.emitCurrentDoctype(token);
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
if (this._consumeSequenceIfMatch($$.PUBLIC, false)) {
this.state = State.AFTER_DOCTYPE_PUBLIC_KEYWORD;
}
else if (this._consumeSequenceIfMatch($$.SYSTEM, false)) {
this.state = State.AFTER_DOCTYPE_SYSTEM_KEYWORD;
}
//NOTE: sequence lookup can be abrupted by hibernation. In that case lookup
//results are no longer valid and we will need to start over.
else if (!this._ensureHibernation()) {
this._err(ERR.invalidCharacterSequenceAfterDoctypeName);
token.forceQuirks = true;
this.state = State.BOGUS_DOCTYPE;
this._stateBogusDoctype(cp);
}
}
}
}
// After DOCTYPE public keyword state
//------------------------------------------------------------------
_stateAfterDoctypePublicKeyword(cp) {
const token = this.currentToken;
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
this.state = State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
break;
}
case $.QUOTATION_MARK: {
this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword);
token.publicId = '';
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
break;
}
case $.APOSTROPHE: {
this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword);
token.publicId = '';
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
break;
}
case $.GREATER_THAN_SIGN: {
this._err(ERR.missingDoctypePublicIdentifier);
token.forceQuirks = true;
this.state = State.DATA;
this.emitCurrentDoctype(token);
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier);
token.forceQuirks = true;
this.state = State.BOGUS_DOCTYPE;
this._stateBogusDoctype(cp);
}
}
}
// Before DOCTYPE public identifier state
//------------------------------------------------------------------
_stateBeforeDoctypePublicIdentifier(cp) {
const token = this.currentToken;
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
// Ignore whitespace
break;
}
case $.QUOTATION_MARK: {
token.publicId = '';
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
break;
}
case $.APOSTROPHE: {
token.publicId = '';
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
break;
}
case $.GREATER_THAN_SIGN: {
this._err(ERR.missingDoctypePublicIdentifier);
token.forceQuirks = true;
this.state = State.DATA;
this.emitCurrentDoctype(token);
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier);
token.forceQuirks = true;
this.state = State.BOGUS_DOCTYPE;
this._stateBogusDoctype(cp);
}
}
}
// DOCTYPE public identifier (double-quoted) state
//------------------------------------------------------------------
_stateDoctypePublicIdentifierDoubleQuoted(cp) {
const token = this.currentToken;
switch (cp) {
case $.QUOTATION_MARK: {
this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
token.publicId += REPLACEMENT_CHARACTER;
break;
}
case $.GREATER_THAN_SIGN: {
this._err(ERR.abruptDoctypePublicIdentifier);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this.state = State.DATA;
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
token.publicId += String.fromCodePoint(cp);
}
}
}
// DOCTYPE public identifier (single-quoted) state
//------------------------------------------------------------------
_stateDoctypePublicIdentifierSingleQuoted(cp) {
const token = this.currentToken;
switch (cp) {
case $.APOSTROPHE: {
this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
token.publicId += REPLACEMENT_CHARACTER;
break;
}
case $.GREATER_THAN_SIGN: {
this._err(ERR.abruptDoctypePublicIdentifier);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this.state = State.DATA;
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
token.publicId += String.fromCodePoint(cp);
}
}
}
// After DOCTYPE public identifier state
//------------------------------------------------------------------
_stateAfterDoctypePublicIdentifier(cp) {
const token = this.currentToken;
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
this.state = State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
break;
}
case $.GREATER_THAN_SIGN: {
this.state = State.DATA;
this.emitCurrentDoctype(token);
break;
}
case $.QUOTATION_MARK: {
this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
token.systemId = '';
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
break;
}
case $.APOSTROPHE: {
this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
token.systemId = '';
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
token.forceQuirks = true;
this.state = State.BOGUS_DOCTYPE;
this._stateBogusDoctype(cp);
}
}
}
// Between DOCTYPE public and system identifiers state
//------------------------------------------------------------------
_stateBetweenDoctypePublicAndSystemIdentifiers(cp) {
const token = this.currentToken;
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
// Ignore whitespace
break;
}
case $.GREATER_THAN_SIGN: {
this.emitCurrentDoctype(token);
this.state = State.DATA;
break;
}
case $.QUOTATION_MARK: {
token.systemId = '';
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
break;
}
case $.APOSTROPHE: {
token.systemId = '';
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
token.forceQuirks = true;
this.state = State.BOGUS_DOCTYPE;
this._stateBogusDoctype(cp);
}
}
}
// After DOCTYPE system keyword state
//------------------------------------------------------------------
_stateAfterDoctypeSystemKeyword(cp) {
const token = this.currentToken;
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
this.state = State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
break;
}
case $.QUOTATION_MARK: {
this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword);
token.systemId = '';
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
break;
}
case $.APOSTROPHE: {
this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword);
token.systemId = '';
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
break;
}
case $.GREATER_THAN_SIGN: {
this._err(ERR.missingDoctypeSystemIdentifier);
token.forceQuirks = true;
this.state = State.DATA;
this.emitCurrentDoctype(token);
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
token.forceQuirks = true;
this.state = State.BOGUS_DOCTYPE;
this._stateBogusDoctype(cp);
}
}
}
// Before DOCTYPE system identifier state
//------------------------------------------------------------------
_stateBeforeDoctypeSystemIdentifier(cp) {
const token = this.currentToken;
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
// Ignore whitespace
break;
}
case $.QUOTATION_MARK: {
token.systemId = '';
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
break;
}
case $.APOSTROPHE: {
token.systemId = '';
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
break;
}
case $.GREATER_THAN_SIGN: {
this._err(ERR.missingDoctypeSystemIdentifier);
token.forceQuirks = true;
this.state = State.DATA;
this.emitCurrentDoctype(token);
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
token.forceQuirks = true;
this.state = State.BOGUS_DOCTYPE;
this._stateBogusDoctype(cp);
}
}
}
// DOCTYPE system identifier (double-quoted) state
//------------------------------------------------------------------
_stateDoctypeSystemIdentifierDoubleQuoted(cp) {
const token = this.currentToken;
switch (cp) {
case $.QUOTATION_MARK: {
this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
token.systemId += REPLACEMENT_CHARACTER;
break;
}
case $.GREATER_THAN_SIGN: {
this._err(ERR.abruptDoctypeSystemIdentifier);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this.state = State.DATA;
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
token.systemId += String.fromCodePoint(cp);
}
}
}
// DOCTYPE system identifier (single-quoted) state
//------------------------------------------------------------------
_stateDoctypeSystemIdentifierSingleQuoted(cp) {
const token = this.currentToken;
switch (cp) {
case $.APOSTROPHE: {
this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
token.systemId += REPLACEMENT_CHARACTER;
break;
}
case $.GREATER_THAN_SIGN: {
this._err(ERR.abruptDoctypeSystemIdentifier);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this.state = State.DATA;
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
token.systemId += String.fromCodePoint(cp);
}
}
}
// After DOCTYPE system identifier state
//------------------------------------------------------------------
_stateAfterDoctypeSystemIdentifier(cp) {
const token = this.currentToken;
switch (cp) {
case $.SPACE:
case $.LINE_FEED:
case $.TABULATION:
case $.FORM_FEED: {
// Ignore whitespace
break;
}
case $.GREATER_THAN_SIGN: {
this.emitCurrentDoctype(token);
this.state = State.DATA;
break;
}
case $.EOF: {
this._err(ERR.eofInDoctype);
token.forceQuirks = true;
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default: {
this._err(ERR.unexpectedCharacterAfterDoctypeSystemIdentifier);
this.state = State.BOGUS_DOCTYPE;
this._stateBogusDoctype(cp);
}
}
}
// Bogus DOCTYPE state
//------------------------------------------------------------------
_stateBogusDoctype(cp) {
const token = this.currentToken;
switch (cp) {
case $.GREATER_THAN_SIGN: {
this.emitCurrentDoctype(token);
this.state = State.DATA;
break;
}
case $.NULL: {
this._err(ERR.unexpectedNullCharacter);
break;
}
case $.EOF: {
this.emitCurrentDoctype(token);
this._emitEOFToken();
break;
}
default:
// Do nothing
}
}
// CDATA section state
//------------------------------------------------------------------
_stateCdataSection(cp) {
switch (cp) {
case $.RIGHT_SQUARE_BRACKET: {
this.state = State.CDATA_SECTION_BRACKET;
break;
}
case $.EOF: {
this._err(ERR.eofInCdata);
this._emitEOFToken();
break;
}
default: {
this._emitCodePoint(cp);
}
}
}
// CDATA section bracket state
//------------------------------------------------------------------
_stateCdataSectionBracket(cp) {
if (cp === $.RIGHT_SQUARE_BRACKET) {
this.state = State.CDATA_SECTION_END;
}
else {
this._emitChars(']');
this.state = State.CDATA_SECTION;
this._stateCdataSection(cp);
}
}
// CDATA section end state
//------------------------------------------------------------------
_stateCdataSectionEnd(cp) {
switch (cp) {
case $.GREATER_THAN_SIGN: {
this.state = State.DATA;
break;
}
case $.RIGHT_SQUARE_BRACKET: {
this._emitChars(']');
break;
}
default: {
this._emitChars(']]');
this.state = State.CDATA_SECTION;
this._stateCdataSection(cp);
}
}
}
// Character reference state
//------------------------------------------------------------------
_stateCharacterReference() {
let length = this.entityDecoder.write(this.preprocessor.html, this.preprocessor.pos);
if (length < 0) {
if (this.preprocessor.lastChunkWritten) {
length = this.entityDecoder.end();
}
else {
// Wait for the rest of the entity.
this.active = false;
// Mark the entire buffer as read.
this.preprocessor.pos = this.preprocessor.html.length - 1;
this.consumedAfterSnapshot = 0;
this.preprocessor.endOfChunkHit = true;
return;
}
}
if (length === 0) {
// This was not a valid entity. Go back to the beginning, and
// figure out what to do.
this.preprocessor.pos = this.entityStartPos;
this._flushCodePointConsumedAsCharacterReference($.AMPERSAND);
this.state =
!this._isCharacterReferenceInAttribute() && isAsciiAlphaNumeric(this.preprocessor.peek(1))
? State.AMBIGUOUS_AMPERSAND
: this.returnState;
}
else {
// We successfully parsed an entity. Switch to the return state.
this.state = this.returnState;
}
}
// Ambiguos ampersand state
//------------------------------------------------------------------
_stateAmbiguousAmpersand(cp) {
if (isAsciiAlphaNumeric(cp)) {
this._flushCodePointConsumedAsCharacterReference(cp);
}
else {
if (cp === $.SEMICOLON) {
this._err(ERR.unknownNamedCharacterReference);
}
this.state = this.returnState;
this._callState(cp);
}
}
}