refactor: move brother_node development artifact to dev/test-nodes subdirectory
Development Artifact Cleanup: ✅ BROTHER_NODE REORGANIZATION: Moved development test node to appropriate location - dev/test-nodes/brother_node/: Moved from root directory for better organization - Contains development configuration, test logs, and test chain data - No impact on production systems - purely development/testing artifact ✅ DEVELOPMENT ARTIFACTS IDENTIFIED: - Chain ID: aitbc-brother-chain (test/development chain) - Ports: 8010 (P2P) and 8011 (RPC) - different from production - Environment: .env file with test configuration - Logs: rpc.log and node.log from development testing session (March 15, 2026) ✅ ROOT DIRECTORY CLEANUP: Removed development clutter from production directory - brother_node/ moved to dev/test-nodes/brother_node/ - Root directory now contains only production-ready components - Development artifacts properly organized in dev/ subdirectory DIRECTORY STRUCTURE IMPROVEMENT: 📁 dev/test-nodes/: Development and testing node configurations 🏗️ Root Directory: Clean production structure with only essential components 🧪 Development Isolation: Test environments separated from production BENEFITS: ✅ Clean Production Directory: No development artifacts in root ✅ Better Organization: Development nodes grouped in dev/ subdirectory ✅ Clear Separation: Production vs development environments clearly distinguished ✅ Maintainability: Easier to identify and manage development components RESULT: Successfully moved brother_node development artifact to dev/test-nodes/ subdirectory, cleaning up the root directory while preserving development testing environment for future use.
This commit is contained in:
851
dev/env/node_modules/@streamparser/json/dist/deno/tokenizer.ts
generated
vendored
Executable file
851
dev/env/node_modules/@streamparser/json/dist/deno/tokenizer.ts
generated
vendored
Executable file
@@ -0,0 +1,851 @@
|
||||
import { charset, escapedSequences } from "./utils/utf-8.ts";
|
||||
import {
|
||||
type StringBuilder,
|
||||
NonBufferedString,
|
||||
BufferedString,
|
||||
} from "./utils/bufferedString.ts";
|
||||
import TokenType from "./utils/types/tokenType.ts";
|
||||
import type { ParsedTokenInfo } from "./utils/types/parsedTokenInfo.ts";
|
||||
|
||||
// Tokenizer States
|
||||
const enum TokenizerStates {
|
||||
START,
|
||||
ENDED,
|
||||
ERROR,
|
||||
TRUE1,
|
||||
TRUE2,
|
||||
TRUE3,
|
||||
FALSE1,
|
||||
FALSE2,
|
||||
FALSE3,
|
||||
FALSE4,
|
||||
NULL1,
|
||||
NULL2,
|
||||
NULL3,
|
||||
STRING_DEFAULT,
|
||||
STRING_AFTER_BACKSLASH,
|
||||
STRING_UNICODE_DIGIT_1,
|
||||
STRING_UNICODE_DIGIT_2,
|
||||
STRING_UNICODE_DIGIT_3,
|
||||
STRING_UNICODE_DIGIT_4,
|
||||
STRING_INCOMPLETE_CHAR,
|
||||
NUMBER_AFTER_INITIAL_MINUS,
|
||||
NUMBER_AFTER_INITIAL_ZERO,
|
||||
NUMBER_AFTER_INITIAL_NON_ZERO,
|
||||
NUMBER_AFTER_FULL_STOP,
|
||||
NUMBER_AFTER_DECIMAL,
|
||||
NUMBER_AFTER_E,
|
||||
NUMBER_AFTER_E_AND_SIGN,
|
||||
NUMBER_AFTER_E_AND_DIGIT,
|
||||
SEPARATOR,
|
||||
BOM_OR_START,
|
||||
BOM,
|
||||
}
|
||||
|
||||
function TokenizerStateToString(tokenizerState: TokenizerStates): string {
|
||||
return [
|
||||
"START",
|
||||
"ENDED",
|
||||
"ERROR",
|
||||
"TRUE1",
|
||||
"TRUE2",
|
||||
"TRUE3",
|
||||
"FALSE1",
|
||||
"FALSE2",
|
||||
"FALSE3",
|
||||
"FALSE4",
|
||||
"NULL1",
|
||||
"NULL2",
|
||||
"NULL3",
|
||||
"STRING_DEFAULT",
|
||||
"STRING_AFTER_BACKSLASH",
|
||||
"STRING_UNICODE_DIGIT_1",
|
||||
"STRING_UNICODE_DIGIT_2",
|
||||
"STRING_UNICODE_DIGIT_3",
|
||||
"STRING_UNICODE_DIGIT_4",
|
||||
"STRING_INCOMPLETE_CHAR",
|
||||
"NUMBER_AFTER_INITIAL_MINUS",
|
||||
"NUMBER_AFTER_INITIAL_ZERO",
|
||||
"NUMBER_AFTER_INITIAL_NON_ZERO",
|
||||
"NUMBER_AFTER_FULL_STOP",
|
||||
"NUMBER_AFTER_DECIMAL",
|
||||
"NUMBER_AFTER_E",
|
||||
"NUMBER_AFTER_E_AND_SIGN",
|
||||
"NUMBER_AFTER_E_AND_DIGIT",
|
||||
"SEPARATOR",
|
||||
"BOM_OR_START",
|
||||
"BOM",
|
||||
][tokenizerState];
|
||||
}
|
||||
|
||||
export interface TokenizerOptions {
|
||||
stringBufferSize?: number;
|
||||
numberBufferSize?: number;
|
||||
separator?: string;
|
||||
emitPartialTokens?: boolean;
|
||||
}
|
||||
|
||||
const defaultOpts: TokenizerOptions = {
|
||||
stringBufferSize: 0,
|
||||
numberBufferSize: 0,
|
||||
separator: undefined,
|
||||
emitPartialTokens: false,
|
||||
};
|
||||
|
||||
export class TokenizerError extends Error {
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
// Typescript is broken. This is a workaround
|
||||
Object.setPrototypeOf(this, TokenizerError.prototype);
|
||||
}
|
||||
}
|
||||
|
||||
export default class Tokenizer {
|
||||
private state = TokenizerStates.BOM_OR_START;
|
||||
|
||||
private bom?: number[];
|
||||
private bomIndex = 0;
|
||||
|
||||
private emitPartialTokens: boolean;
|
||||
private separator?: string;
|
||||
private separatorBytes?: Uint8Array;
|
||||
private separatorIndex = 0;
|
||||
private escapedCharsByteLength = 0;
|
||||
private bufferedString: StringBuilder;
|
||||
private bufferedNumber: StringBuilder;
|
||||
|
||||
private unicode?: string; // unicode escapes
|
||||
private highSurrogate?: number;
|
||||
private bytes_remaining = 0; // number of bytes remaining in multi byte utf8 char to read after split boundary
|
||||
private bytes_in_sequence = 0; // bytes in multi byte utf8 char to read
|
||||
private char_split_buffer = new Uint8Array(4); // for rebuilding chars split before boundary is reached
|
||||
private encoder = new TextEncoder();
|
||||
private offset = -1;
|
||||
|
||||
constructor(opts?: TokenizerOptions) {
|
||||
opts = { ...defaultOpts, ...opts };
|
||||
|
||||
this.emitPartialTokens = opts.emitPartialTokens === true;
|
||||
this.bufferedString =
|
||||
opts.stringBufferSize && opts.stringBufferSize > 4
|
||||
? new BufferedString(opts.stringBufferSize)
|
||||
: new NonBufferedString();
|
||||
this.bufferedNumber =
|
||||
opts.numberBufferSize && opts.numberBufferSize > 0
|
||||
? new BufferedString(opts.numberBufferSize)
|
||||
: new NonBufferedString();
|
||||
|
||||
this.separator = opts.separator;
|
||||
this.separatorBytes = opts.separator
|
||||
? this.encoder.encode(opts.separator)
|
||||
: undefined;
|
||||
}
|
||||
|
||||
public get isEnded(): boolean {
|
||||
return this.state === TokenizerStates.ENDED;
|
||||
}
|
||||
|
||||
public write(input: Iterable<number> | string): void {
|
||||
try {
|
||||
let buffer: Uint8Array;
|
||||
if (input instanceof Uint8Array) {
|
||||
buffer = input;
|
||||
} else if (typeof input === "string") {
|
||||
buffer = this.encoder.encode(input);
|
||||
} else if (Array.isArray(input)) {
|
||||
buffer = Uint8Array.from(input);
|
||||
} else if (ArrayBuffer.isView(input)) {
|
||||
buffer = new Uint8Array(
|
||||
input.buffer,
|
||||
input.byteOffset,
|
||||
input.byteLength,
|
||||
);
|
||||
} else {
|
||||
throw new TypeError(
|
||||
"Unexpected type. The `write` function only accepts Arrays, TypedArrays and Strings.",
|
||||
);
|
||||
}
|
||||
|
||||
for (let i = 0; i < buffer.length; i += 1) {
|
||||
const n = buffer[i]; // get current byte from buffer
|
||||
switch (this.state) {
|
||||
// @ts-expect-error fall through case
|
||||
case TokenizerStates.BOM_OR_START:
|
||||
if (input instanceof Uint8Array && n === 0xef) {
|
||||
this.bom = [0xef, 0xbb, 0xbf];
|
||||
this.bomIndex += 1;
|
||||
this.state = TokenizerStates.BOM;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (input instanceof Uint16Array) {
|
||||
if (n === 0xfe) {
|
||||
this.bom = [0xfe, 0xff];
|
||||
this.bomIndex += 1;
|
||||
this.state = TokenizerStates.BOM;
|
||||
continue;
|
||||
}
|
||||
if (n === 0xff) {
|
||||
this.bom = [0xff, 0xfe];
|
||||
this.bomIndex += 1;
|
||||
this.state = TokenizerStates.BOM;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (input instanceof Uint32Array) {
|
||||
if (n === 0x00) {
|
||||
this.bom = [0x00, 0x00, 0xfe, 0xff];
|
||||
this.bomIndex += 1;
|
||||
this.state = TokenizerStates.BOM;
|
||||
continue;
|
||||
}
|
||||
if (n === 0xff) {
|
||||
this.bom = [0xff, 0xfe, 0x00, 0x00];
|
||||
this.bomIndex += 1;
|
||||
this.state = TokenizerStates.BOM;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// eslint-disable-next-line no-fallthrough
|
||||
case TokenizerStates.START:
|
||||
this.offset += 1;
|
||||
|
||||
if (this.separatorBytes && n === this.separatorBytes[0]) {
|
||||
if (this.separatorBytes.length === 1) {
|
||||
this.state = TokenizerStates.START;
|
||||
this.onToken({
|
||||
token: TokenType.SEPARATOR,
|
||||
value: this.separator as string,
|
||||
offset: this.offset + this.separatorBytes.length - 1,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
this.state = TokenizerStates.SEPARATOR;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
n === charset.SPACE ||
|
||||
n === charset.NEWLINE ||
|
||||
n === charset.CARRIAGE_RETURN ||
|
||||
n === charset.TAB
|
||||
) {
|
||||
// whitespace
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n === charset.LEFT_CURLY_BRACKET) {
|
||||
this.onToken({
|
||||
token: TokenType.LEFT_BRACE,
|
||||
value: "{",
|
||||
offset: this.offset,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if (n === charset.RIGHT_CURLY_BRACKET) {
|
||||
this.onToken({
|
||||
token: TokenType.RIGHT_BRACE,
|
||||
value: "}",
|
||||
offset: this.offset,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if (n === charset.LEFT_SQUARE_BRACKET) {
|
||||
this.onToken({
|
||||
token: TokenType.LEFT_BRACKET,
|
||||
value: "[",
|
||||
offset: this.offset,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if (n === charset.RIGHT_SQUARE_BRACKET) {
|
||||
this.onToken({
|
||||
token: TokenType.RIGHT_BRACKET,
|
||||
value: "]",
|
||||
offset: this.offset,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if (n === charset.COLON) {
|
||||
this.onToken({
|
||||
token: TokenType.COLON,
|
||||
value: ":",
|
||||
offset: this.offset,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if (n === charset.COMMA) {
|
||||
this.onToken({
|
||||
token: TokenType.COMMA,
|
||||
value: ",",
|
||||
offset: this.offset,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n === charset.LATIN_SMALL_LETTER_T) {
|
||||
this.state = TokenizerStates.TRUE1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n === charset.LATIN_SMALL_LETTER_F) {
|
||||
this.state = TokenizerStates.FALSE1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n === charset.LATIN_SMALL_LETTER_N) {
|
||||
this.state = TokenizerStates.NULL1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n === charset.QUOTATION_MARK) {
|
||||
this.bufferedString.reset();
|
||||
this.escapedCharsByteLength = 0;
|
||||
this.state = TokenizerStates.STRING_DEFAULT;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n >= charset.DIGIT_ONE && n <= charset.DIGIT_NINE) {
|
||||
this.bufferedNumber.reset();
|
||||
this.bufferedNumber.appendChar(n);
|
||||
this.state = TokenizerStates.NUMBER_AFTER_INITIAL_NON_ZERO;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n === charset.DIGIT_ZERO) {
|
||||
this.bufferedNumber.reset();
|
||||
this.bufferedNumber.appendChar(n);
|
||||
this.state = TokenizerStates.NUMBER_AFTER_INITIAL_ZERO;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n === charset.HYPHEN_MINUS) {
|
||||
this.bufferedNumber.reset();
|
||||
this.bufferedNumber.appendChar(n);
|
||||
this.state = TokenizerStates.NUMBER_AFTER_INITIAL_MINUS;
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
// STRING
|
||||
case TokenizerStates.STRING_DEFAULT:
|
||||
if (n === charset.QUOTATION_MARK) {
|
||||
const string = this.bufferedString.toString();
|
||||
this.state = TokenizerStates.START;
|
||||
this.onToken({
|
||||
token: TokenType.STRING,
|
||||
value: string,
|
||||
offset: this.offset,
|
||||
});
|
||||
this.offset +=
|
||||
this.escapedCharsByteLength +
|
||||
this.bufferedString.byteLength +
|
||||
1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n === charset.REVERSE_SOLIDUS) {
|
||||
this.state = TokenizerStates.STRING_AFTER_BACKSLASH;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n >= 128) {
|
||||
// Parse multi byte (>=128) chars one at a time
|
||||
if (n >= 194 && n <= 223) {
|
||||
this.bytes_in_sequence = 2;
|
||||
} else if (n <= 239) {
|
||||
this.bytes_in_sequence = 3;
|
||||
} else {
|
||||
this.bytes_in_sequence = 4;
|
||||
}
|
||||
|
||||
if (this.bytes_in_sequence <= buffer.length - i) {
|
||||
// if bytes needed to complete char fall outside buffer length, we have a boundary split
|
||||
this.bufferedString.appendBuf(
|
||||
buffer,
|
||||
i,
|
||||
i + this.bytes_in_sequence,
|
||||
);
|
||||
i += this.bytes_in_sequence - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
this.bytes_remaining = i + this.bytes_in_sequence - buffer.length;
|
||||
this.char_split_buffer.set(buffer.subarray(i));
|
||||
i = buffer.length - 1;
|
||||
this.state = TokenizerStates.STRING_INCOMPLETE_CHAR;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n >= charset.SPACE) {
|
||||
this.bufferedString.appendChar(n);
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
case TokenizerStates.STRING_INCOMPLETE_CHAR:
|
||||
// check for carry over of a multi byte char split between data chunks
|
||||
// & fill temp buffer it with start of this data chunk up to the boundary limit set in the last iteration
|
||||
this.char_split_buffer.set(
|
||||
buffer.subarray(i, i + this.bytes_remaining),
|
||||
this.bytes_in_sequence - this.bytes_remaining,
|
||||
);
|
||||
this.bufferedString.appendBuf(
|
||||
this.char_split_buffer,
|
||||
0,
|
||||
this.bytes_in_sequence,
|
||||
);
|
||||
i = this.bytes_remaining - 1;
|
||||
this.state = TokenizerStates.STRING_DEFAULT;
|
||||
continue;
|
||||
case TokenizerStates.STRING_AFTER_BACKSLASH:
|
||||
// eslint-disable-next-line no-case-declarations
|
||||
const controlChar = escapedSequences[n];
|
||||
if (controlChar) {
|
||||
this.bufferedString.appendChar(controlChar);
|
||||
this.escapedCharsByteLength += 1; // len(\")=2 minus the fact you're appending len(controlChar)=1
|
||||
this.state = TokenizerStates.STRING_DEFAULT;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n === charset.LATIN_SMALL_LETTER_U) {
|
||||
this.unicode = "";
|
||||
this.state = TokenizerStates.STRING_UNICODE_DIGIT_1;
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
case TokenizerStates.STRING_UNICODE_DIGIT_1:
|
||||
case TokenizerStates.STRING_UNICODE_DIGIT_2:
|
||||
case TokenizerStates.STRING_UNICODE_DIGIT_3:
|
||||
if (
|
||||
(n >= charset.DIGIT_ZERO && n <= charset.DIGIT_NINE) ||
|
||||
(n >= charset.LATIN_CAPITAL_LETTER_A &&
|
||||
n <= charset.LATIN_CAPITAL_LETTER_F) ||
|
||||
(n >= charset.LATIN_SMALL_LETTER_A &&
|
||||
n <= charset.LATIN_SMALL_LETTER_F)
|
||||
) {
|
||||
this.unicode += String.fromCharCode(n);
|
||||
this.state += 1;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case TokenizerStates.STRING_UNICODE_DIGIT_4:
|
||||
if (
|
||||
(n >= charset.DIGIT_ZERO && n <= charset.DIGIT_NINE) ||
|
||||
(n >= charset.LATIN_CAPITAL_LETTER_A &&
|
||||
n <= charset.LATIN_CAPITAL_LETTER_F) ||
|
||||
(n >= charset.LATIN_SMALL_LETTER_A &&
|
||||
n <= charset.LATIN_SMALL_LETTER_F)
|
||||
) {
|
||||
const intVal = parseInt(
|
||||
this.unicode + String.fromCharCode(n),
|
||||
16,
|
||||
);
|
||||
let unicodeString: string;
|
||||
if (this.highSurrogate === undefined) {
|
||||
if (intVal >= 0xd800 && intVal <= 0xdbff) {
|
||||
//<55296,56319> - highSurrogate
|
||||
this.highSurrogate = intVal;
|
||||
this.state = TokenizerStates.STRING_DEFAULT;
|
||||
continue;
|
||||
} else {
|
||||
unicodeString = String.fromCharCode(intVal);
|
||||
}
|
||||
} else {
|
||||
if (intVal >= 0xdc00 && intVal <= 0xdfff) {
|
||||
//<56320,57343> - lowSurrogate
|
||||
unicodeString = String.fromCharCode(
|
||||
this.highSurrogate,
|
||||
intVal,
|
||||
);
|
||||
} else {
|
||||
unicodeString = String.fromCharCode(this.highSurrogate);
|
||||
}
|
||||
this.highSurrogate = undefined;
|
||||
}
|
||||
const unicodeBuffer = this.encoder.encode(unicodeString);
|
||||
this.bufferedString.appendBuf(unicodeBuffer);
|
||||
// len(\u0000)=6 minus the fact you're appending len(buf)
|
||||
this.escapedCharsByteLength += 6 - unicodeBuffer.byteLength;
|
||||
this.state = TokenizerStates.STRING_DEFAULT;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
// Number
|
||||
case TokenizerStates.NUMBER_AFTER_INITIAL_MINUS:
|
||||
if (n === charset.DIGIT_ZERO) {
|
||||
this.bufferedNumber.appendChar(n);
|
||||
this.state = TokenizerStates.NUMBER_AFTER_INITIAL_ZERO;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n >= charset.DIGIT_ONE && n <= charset.DIGIT_NINE) {
|
||||
this.bufferedNumber.appendChar(n);
|
||||
this.state = TokenizerStates.NUMBER_AFTER_INITIAL_NON_ZERO;
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
case TokenizerStates.NUMBER_AFTER_INITIAL_ZERO:
|
||||
if (n === charset.FULL_STOP) {
|
||||
this.bufferedNumber.appendChar(n);
|
||||
this.state = TokenizerStates.NUMBER_AFTER_FULL_STOP;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
n === charset.LATIN_SMALL_LETTER_E ||
|
||||
n === charset.LATIN_CAPITAL_LETTER_E
|
||||
) {
|
||||
this.bufferedNumber.appendChar(n);
|
||||
this.state = TokenizerStates.NUMBER_AFTER_E;
|
||||
continue;
|
||||
}
|
||||
|
||||
i -= 1;
|
||||
this.state = TokenizerStates.START;
|
||||
this.emitNumber();
|
||||
continue;
|
||||
case TokenizerStates.NUMBER_AFTER_INITIAL_NON_ZERO:
|
||||
if (n >= charset.DIGIT_ZERO && n <= charset.DIGIT_NINE) {
|
||||
this.bufferedNumber.appendChar(n);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (n === charset.FULL_STOP) {
|
||||
this.bufferedNumber.appendChar(n);
|
||||
this.state = TokenizerStates.NUMBER_AFTER_FULL_STOP;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
n === charset.LATIN_SMALL_LETTER_E ||
|
||||
n === charset.LATIN_CAPITAL_LETTER_E
|
||||
) {
|
||||
this.bufferedNumber.appendChar(n);
|
||||
this.state = TokenizerStates.NUMBER_AFTER_E;
|
||||
continue;
|
||||
}
|
||||
|
||||
i -= 1;
|
||||
this.state = TokenizerStates.START;
|
||||
this.emitNumber();
|
||||
continue;
|
||||
case TokenizerStates.NUMBER_AFTER_FULL_STOP:
|
||||
if (n >= charset.DIGIT_ZERO && n <= charset.DIGIT_NINE) {
|
||||
this.bufferedNumber.appendChar(n);
|
||||
this.state = TokenizerStates.NUMBER_AFTER_DECIMAL;
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
case TokenizerStates.NUMBER_AFTER_DECIMAL:
|
||||
if (n >= charset.DIGIT_ZERO && n <= charset.DIGIT_NINE) {
|
||||
this.bufferedNumber.appendChar(n);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
n === charset.LATIN_SMALL_LETTER_E ||
|
||||
n === charset.LATIN_CAPITAL_LETTER_E
|
||||
) {
|
||||
this.bufferedNumber.appendChar(n);
|
||||
this.state = TokenizerStates.NUMBER_AFTER_E;
|
||||
continue;
|
||||
}
|
||||
|
||||
i -= 1;
|
||||
this.state = TokenizerStates.START;
|
||||
this.emitNumber();
|
||||
continue;
|
||||
// @ts-expect-error fall through case
|
||||
case TokenizerStates.NUMBER_AFTER_E:
|
||||
if (n === charset.PLUS_SIGN || n === charset.HYPHEN_MINUS) {
|
||||
this.bufferedNumber.appendChar(n);
|
||||
this.state = TokenizerStates.NUMBER_AFTER_E_AND_SIGN;
|
||||
continue;
|
||||
}
|
||||
// eslint-disable-next-line no-fallthrough
|
||||
case TokenizerStates.NUMBER_AFTER_E_AND_SIGN:
|
||||
if (n >= charset.DIGIT_ZERO && n <= charset.DIGIT_NINE) {
|
||||
this.bufferedNumber.appendChar(n);
|
||||
this.state = TokenizerStates.NUMBER_AFTER_E_AND_DIGIT;
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
case TokenizerStates.NUMBER_AFTER_E_AND_DIGIT:
|
||||
if (n >= charset.DIGIT_ZERO && n <= charset.DIGIT_NINE) {
|
||||
this.bufferedNumber.appendChar(n);
|
||||
continue;
|
||||
}
|
||||
|
||||
i -= 1;
|
||||
this.state = TokenizerStates.START;
|
||||
this.emitNumber();
|
||||
continue;
|
||||
// TRUE
|
||||
case TokenizerStates.TRUE1:
|
||||
if (n === charset.LATIN_SMALL_LETTER_R) {
|
||||
this.state = TokenizerStates.TRUE2;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case TokenizerStates.TRUE2:
|
||||
if (n === charset.LATIN_SMALL_LETTER_U) {
|
||||
this.state = TokenizerStates.TRUE3;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case TokenizerStates.TRUE3:
|
||||
if (n === charset.LATIN_SMALL_LETTER_E) {
|
||||
this.state = TokenizerStates.START;
|
||||
this.onToken({
|
||||
token: TokenType.TRUE,
|
||||
value: true,
|
||||
offset: this.offset,
|
||||
});
|
||||
this.offset += 3;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
// FALSE
|
||||
case TokenizerStates.FALSE1:
|
||||
if (n === charset.LATIN_SMALL_LETTER_A) {
|
||||
this.state = TokenizerStates.FALSE2;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case TokenizerStates.FALSE2:
|
||||
if (n === charset.LATIN_SMALL_LETTER_L) {
|
||||
this.state = TokenizerStates.FALSE3;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case TokenizerStates.FALSE3:
|
||||
if (n === charset.LATIN_SMALL_LETTER_S) {
|
||||
this.state = TokenizerStates.FALSE4;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case TokenizerStates.FALSE4:
|
||||
if (n === charset.LATIN_SMALL_LETTER_E) {
|
||||
this.state = TokenizerStates.START;
|
||||
this.onToken({
|
||||
token: TokenType.FALSE,
|
||||
value: false,
|
||||
offset: this.offset,
|
||||
});
|
||||
this.offset += 4;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
// NULL
|
||||
case TokenizerStates.NULL1:
|
||||
if (n === charset.LATIN_SMALL_LETTER_U) {
|
||||
this.state = TokenizerStates.NULL2;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case TokenizerStates.NULL2:
|
||||
if (n === charset.LATIN_SMALL_LETTER_L) {
|
||||
this.state = TokenizerStates.NULL3;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case TokenizerStates.NULL3:
|
||||
if (n === charset.LATIN_SMALL_LETTER_L) {
|
||||
this.state = TokenizerStates.START;
|
||||
this.onToken({
|
||||
token: TokenType.NULL,
|
||||
value: null,
|
||||
offset: this.offset,
|
||||
});
|
||||
this.offset += 3;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case TokenizerStates.SEPARATOR:
|
||||
this.separatorIndex += 1;
|
||||
if (
|
||||
!this.separatorBytes ||
|
||||
n !== this.separatorBytes[this.separatorIndex]
|
||||
) {
|
||||
break;
|
||||
}
|
||||
if (this.separatorIndex === this.separatorBytes.length - 1) {
|
||||
this.state = TokenizerStates.START;
|
||||
this.onToken({
|
||||
token: TokenType.SEPARATOR,
|
||||
value: this.separator as string,
|
||||
offset: this.offset + this.separatorIndex,
|
||||
});
|
||||
this.separatorIndex = 0;
|
||||
}
|
||||
continue;
|
||||
// BOM support
|
||||
case TokenizerStates.BOM:
|
||||
if (n === this.bom![this.bomIndex]) {
|
||||
if (this.bomIndex === this.bom!.length - 1) {
|
||||
this.state = TokenizerStates.START;
|
||||
this.bom = undefined;
|
||||
this.bomIndex = 0;
|
||||
continue;
|
||||
}
|
||||
this.bomIndex += 1;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case TokenizerStates.ENDED:
|
||||
if (
|
||||
n === charset.SPACE ||
|
||||
n === charset.NEWLINE ||
|
||||
n === charset.CARRIAGE_RETURN ||
|
||||
n === charset.TAB
|
||||
) {
|
||||
// whitespace
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
throw new TokenizerError(
|
||||
`Unexpected "${String.fromCharCode(
|
||||
n,
|
||||
)}" at position "${i}" in state ${TokenizerStateToString(
|
||||
this.state,
|
||||
)}`,
|
||||
);
|
||||
}
|
||||
|
||||
if (this.emitPartialTokens) {
|
||||
switch (this.state) {
|
||||
case TokenizerStates.TRUE1:
|
||||
case TokenizerStates.TRUE2:
|
||||
case TokenizerStates.TRUE3:
|
||||
this.onToken({
|
||||
token: TokenType.TRUE,
|
||||
value: true,
|
||||
offset: this.offset,
|
||||
partial: true,
|
||||
});
|
||||
break;
|
||||
case TokenizerStates.FALSE1:
|
||||
case TokenizerStates.FALSE2:
|
||||
case TokenizerStates.FALSE3:
|
||||
case TokenizerStates.FALSE4:
|
||||
this.onToken({
|
||||
token: TokenType.FALSE,
|
||||
value: false,
|
||||
offset: this.offset,
|
||||
partial: true,
|
||||
});
|
||||
break;
|
||||
case TokenizerStates.NULL1:
|
||||
case TokenizerStates.NULL2:
|
||||
case TokenizerStates.NULL3:
|
||||
this.onToken({
|
||||
token: TokenType.NULL,
|
||||
value: null,
|
||||
offset: this.offset,
|
||||
partial: true,
|
||||
});
|
||||
break;
|
||||
case TokenizerStates.STRING_DEFAULT: {
|
||||
const string = this.bufferedString.toString();
|
||||
this.onToken({
|
||||
token: TokenType.STRING,
|
||||
value: string,
|
||||
offset: this.offset,
|
||||
partial: true,
|
||||
});
|
||||
break;
|
||||
}
|
||||
case TokenizerStates.NUMBER_AFTER_INITIAL_ZERO:
|
||||
case TokenizerStates.NUMBER_AFTER_INITIAL_NON_ZERO:
|
||||
case TokenizerStates.NUMBER_AFTER_DECIMAL:
|
||||
case TokenizerStates.NUMBER_AFTER_E_AND_DIGIT:
|
||||
try {
|
||||
this.onToken({
|
||||
token: TokenType.NUMBER,
|
||||
value: this.parseNumber(this.bufferedNumber.toString()),
|
||||
offset: this.offset,
|
||||
partial: true,
|
||||
});
|
||||
} catch {
|
||||
// Number couldn't be parsed. Do nothing.
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
this.error(err as Error);
|
||||
}
|
||||
}
|
||||
|
||||
private emitNumber(): void {
|
||||
this.onToken({
|
||||
token: TokenType.NUMBER,
|
||||
value: this.parseNumber(this.bufferedNumber.toString()),
|
||||
offset: this.offset,
|
||||
});
|
||||
this.offset += this.bufferedNumber.byteLength - 1;
|
||||
}
|
||||
|
||||
protected parseNumber(numberStr: string): number {
|
||||
return Number(numberStr);
|
||||
}
|
||||
|
||||
public error(err: Error): void {
|
||||
if (this.state !== TokenizerStates.ENDED) {
|
||||
this.state = TokenizerStates.ERROR;
|
||||
}
|
||||
|
||||
this.onError(err);
|
||||
}
|
||||
|
||||
public end(): void {
|
||||
switch (this.state) {
|
||||
case TokenizerStates.NUMBER_AFTER_INITIAL_ZERO:
|
||||
case TokenizerStates.NUMBER_AFTER_INITIAL_NON_ZERO:
|
||||
case TokenizerStates.NUMBER_AFTER_DECIMAL:
|
||||
case TokenizerStates.NUMBER_AFTER_E_AND_DIGIT:
|
||||
this.state = TokenizerStates.ENDED;
|
||||
this.emitNumber();
|
||||
this.onEnd();
|
||||
break;
|
||||
case TokenizerStates.BOM_OR_START:
|
||||
case TokenizerStates.START:
|
||||
case TokenizerStates.ERROR:
|
||||
case TokenizerStates.SEPARATOR:
|
||||
this.state = TokenizerStates.ENDED;
|
||||
this.onEnd();
|
||||
break;
|
||||
default:
|
||||
this.error(
|
||||
new TokenizerError(
|
||||
`Tokenizer ended in the middle of a token (state: ${TokenizerStateToString(
|
||||
this.state,
|
||||
)}). Either not all the data was received or the data was invalid.`,
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||||
public onToken(parsedToken: ParsedTokenInfo): void {
|
||||
// Override me
|
||||
throw new TokenizerError(
|
||||
'Can\'t emit tokens before the "onToken" callback has been set up.',
|
||||
);
|
||||
}
|
||||
|
||||
public onError(err: Error): void {
|
||||
// Override me
|
||||
throw err;
|
||||
}
|
||||
|
||||
public onEnd(): void {
|
||||
// Override me
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user