2021-12-01 16:24:29 +00:00
|
|
|
/// SPDX-License-Identifier: GPL-3.0-or-later
|
2023-01-17 10:43:15 +00:00
|
|
|
/// SPDX-FileCopyrightText: Copyright © 2016-2023 Tony Garnock-Jones <tonyg@leastfixedpoint.com>
|
2021-12-01 16:24:29 +00:00
|
|
|
|
2021-01-18 22:11:53 +00:00
|
|
|
import { TokenType, Token, Item, GroupInProgress } from './tokens.js';
|
2021-01-14 12:09:53 +00:00
|
|
|
import { Pos, advancePos } from './position.js';
|
|
|
|
|
|
|
|
export abstract class Scanner implements IterableIterator<Token> {
|
|
|
|
readonly pos: Pos;
|
2021-01-22 23:12:11 +00:00
|
|
|
readonly synthetic: boolean | undefined;
|
2021-01-14 12:09:53 +00:00
|
|
|
charBuffer: string | null = null;
|
|
|
|
tokenBuffer: Token | null = null;
|
2021-12-09 17:55:18 +00:00
|
|
|
delimiters = ' \t\n\r\'"`.,;()[]{}/\\';
|
2021-01-14 12:09:53 +00:00
|
|
|
|
2021-01-22 23:12:11 +00:00
|
|
|
constructor(pos: Pos, synthetic?: boolean) {
|
2021-01-16 16:46:18 +00:00
|
|
|
this.pos = { ... pos };
|
2021-01-22 23:12:11 +00:00
|
|
|
this.synthetic = synthetic;
|
2021-01-14 12:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
[Symbol.iterator](): IterableIterator<Token> {
|
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
|
|
|
abstract _peekChar(): string | null;
|
2021-01-18 22:11:53 +00:00
|
|
|
abstract _dropChar(): void;
|
2021-01-14 12:09:53 +00:00
|
|
|
|
|
|
|
peekChar(): string | null {
|
|
|
|
if (this.charBuffer !== null) return this.charBuffer;
|
|
|
|
this.charBuffer = this._peekChar();
|
|
|
|
return this.charBuffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
dropChar() {
|
|
|
|
if (this.charBuffer === null) this.peekChar();
|
|
|
|
if (this.charBuffer !== null) {
|
2021-01-18 22:11:53 +00:00
|
|
|
this._dropChar();
|
2021-01-14 12:09:53 +00:00
|
|
|
advancePos(this.pos, this.charBuffer);
|
|
|
|
this.charBuffer = null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
shiftChar(): string | null {
|
|
|
|
const ch = this.peekChar();
|
|
|
|
this.dropChar();
|
|
|
|
return ch;
|
|
|
|
}
|
|
|
|
|
|
|
|
makeToken(start: Pos, type: TokenType, text: string): Token {
|
2021-01-22 23:12:11 +00:00
|
|
|
return { type, start, end: this.mark(), text, ... this.synthetic && { synthetic: true } };
|
2021-01-14 12:09:53 +00:00
|
|
|
}
|
|
|
|
|
2021-01-18 22:11:53 +00:00
|
|
|
makeGroupInProgress(open: Token, items: Array<Item> = []): GroupInProgress {
|
2021-01-22 23:12:11 +00:00
|
|
|
return { start: open.start, open, close: null, items, ... this.synthetic && { synthetic: true } };
|
2021-01-18 22:11:53 +00:00
|
|
|
}
|
|
|
|
|
2021-01-14 12:09:53 +00:00
|
|
|
mark(): Pos {
|
|
|
|
return { ... this.pos };
|
|
|
|
}
|
|
|
|
|
|
|
|
_while(pred: (ch: string | null) => boolean, f: (ch: string | null) => void) {
|
|
|
|
while (true) {
|
|
|
|
const ch = this.peekChar();
|
|
|
|
if (!pred(ch)) return;
|
|
|
|
this.dropChar();
|
|
|
|
f(ch);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
_collectSpace(buf = '', start = this.mark()): Token {
|
2021-01-15 12:38:15 +00:00
|
|
|
this._while(ch => ch !== null && this.isSpace(ch), ch => buf = buf + ch);
|
2021-01-14 12:09:53 +00:00
|
|
|
return this.makeToken(start, TokenType.SPACE, buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
_punct(type: TokenType): Token {
|
2021-01-15 12:38:15 +00:00
|
|
|
return this.makeToken(this.mark(), type, this.shiftChar()!);
|
2021-01-14 12:09:53 +00:00
|
|
|
}
|
|
|
|
|
2021-01-29 18:56:12 +00:00
|
|
|
_str(q: string, buf: string, isTemplate: boolean): Token {
|
2021-01-14 12:09:53 +00:00
|
|
|
const start = this.mark();
|
2021-01-15 12:38:15 +00:00
|
|
|
let ch: string | null;
|
2021-01-14 12:09:53 +00:00
|
|
|
while (true) {
|
|
|
|
ch = this.shiftChar();
|
|
|
|
if (ch !== null) buf = buf + ch;
|
2021-01-29 18:56:12 +00:00
|
|
|
|
|
|
|
if (ch === null ||
|
|
|
|
ch === q ||
|
|
|
|
(isTemplate ? (ch === '$' && this.peekChar() === '{') : (ch === '\n')))
|
|
|
|
{
|
2021-01-14 12:09:53 +00:00
|
|
|
return this.makeToken(start, TokenType.STRING, buf);
|
|
|
|
}
|
2021-01-29 18:56:12 +00:00
|
|
|
|
2021-01-14 12:09:53 +00:00
|
|
|
if (ch === '\\') {
|
|
|
|
ch = this.shiftChar();
|
|
|
|
if (ch === '\n') {
|
|
|
|
// Do nothing. Line continuation.
|
|
|
|
} else if (ch !== null) {
|
|
|
|
buf = buf + ch;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-29 18:56:12 +00:00
|
|
|
templateConstantFragment(): Token | null {
|
|
|
|
if (this.tokenBuffer !== null) throw new Error("Internal error: templateConstantFragment");
|
|
|
|
const t = this._str('`', '', true);
|
|
|
|
return (t.text.length === 0) ? null : t;
|
|
|
|
}
|
|
|
|
|
2021-01-14 12:09:53 +00:00
|
|
|
isSpace(ch: string): boolean {
|
|
|
|
return ' \t\r'.indexOf(ch) >= 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
isDelimiter(ch: string): boolean {
|
|
|
|
return this.delimiters.indexOf(ch) >= 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
addDelimiters(newDelimiters: string) {
|
|
|
|
this.delimiters = this.delimiters + newDelimiters;
|
|
|
|
}
|
|
|
|
|
|
|
|
_atom(start = this.mark(), buf = ''): Token {
|
2021-01-15 12:38:15 +00:00
|
|
|
let ch: string | null;
|
2021-01-14 12:09:53 +00:00
|
|
|
while (true) {
|
|
|
|
ch = this.peekChar();
|
|
|
|
if (ch === null || this.isDelimiter(ch)) {
|
|
|
|
return this.makeToken(start, TokenType.ATOM, buf);
|
|
|
|
}
|
|
|
|
buf = buf + ch;
|
|
|
|
this.dropChar();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
_maybeComment(): Token {
|
|
|
|
const start = this.mark();
|
2021-01-15 12:38:15 +00:00
|
|
|
let buf = this.shiftChar()!;
|
2021-01-14 12:09:53 +00:00
|
|
|
let ch = this.peekChar();
|
|
|
|
if (ch === null) return this._collectSpace(buf, start);
|
|
|
|
switch (ch) {
|
|
|
|
case '/': // single-line comment.
|
|
|
|
this._while(ch => ch !== null && ch !== '\n', ch => buf = buf + ch);
|
|
|
|
return this._collectSpace(buf, start);
|
|
|
|
case '*': // delimited comment.
|
|
|
|
{
|
|
|
|
let seenStar = false;
|
|
|
|
buf = buf + this.shiftChar();
|
|
|
|
while (true) {
|
|
|
|
ch = this.shiftChar();
|
2021-01-19 23:52:40 +00:00
|
|
|
if (ch === null) break;
|
2021-01-14 12:09:53 +00:00
|
|
|
buf = buf + ch;
|
2021-01-19 23:52:40 +00:00
|
|
|
if ((ch === '/') && seenStar) break;
|
2021-01-14 12:09:53 +00:00
|
|
|
seenStar = (ch === '*');
|
|
|
|
}
|
|
|
|
return this._collectSpace(buf, start);
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return this._atom(start, buf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
_peek(): Token | null {
|
|
|
|
let ch = this.peekChar();
|
|
|
|
if (ch === null) return null;
|
|
|
|
switch (ch) {
|
|
|
|
case ' ':
|
|
|
|
case '\t':
|
|
|
|
case '\r':
|
|
|
|
return this._collectSpace();
|
|
|
|
|
|
|
|
case '\n':
|
|
|
|
return this._punct(TokenType.NEWLINE);
|
|
|
|
|
|
|
|
case '(':
|
|
|
|
case '[':
|
|
|
|
case '{':
|
|
|
|
return this._punct(TokenType.OPEN);
|
|
|
|
case ')':
|
|
|
|
case ']':
|
|
|
|
case '}':
|
|
|
|
return this._punct(TokenType.CLOSE);
|
|
|
|
|
|
|
|
case '\'':
|
|
|
|
case '"':
|
2021-01-29 18:56:12 +00:00
|
|
|
this.dropChar();
|
|
|
|
return this._str(ch, ch, false);
|
|
|
|
|
2021-01-14 12:09:53 +00:00
|
|
|
case '`':
|
2021-01-29 18:56:12 +00:00
|
|
|
this.dropChar();
|
|
|
|
return this._str(ch, ch, true);
|
2021-01-14 12:09:53 +00:00
|
|
|
|
|
|
|
case '/':
|
|
|
|
return this._maybeComment();
|
|
|
|
|
|
|
|
default:
|
2021-01-19 23:52:40 +00:00
|
|
|
if (this.isDelimiter(ch)) {
|
|
|
|
return this._punct(TokenType.ATOM);
|
|
|
|
} else {
|
|
|
|
return this._atom(this.mark(), this.shiftChar()!);
|
|
|
|
}
|
2021-01-14 12:09:53 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
peek(): Token | null {
|
|
|
|
if (this.tokenBuffer === null) this.tokenBuffer = this._peek();
|
|
|
|
return this.tokenBuffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
drop() {
|
|
|
|
if (this.tokenBuffer === null) this.peek();
|
|
|
|
this.tokenBuffer = null;
|
|
|
|
}
|
|
|
|
|
|
|
|
shift(): Token | null {
|
|
|
|
const t = this.peek();
|
|
|
|
this.drop();
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
|
|
|
next(): IteratorResult<Token> {
|
|
|
|
const t = this.shift();
|
|
|
|
if (t === null) {
|
|
|
|
return { done: true, value: null };
|
|
|
|
} else {
|
|
|
|
return { done: false, value: t };
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
export class StringScanner extends Scanner {
|
|
|
|
readonly input: string;
|
2021-01-18 22:11:53 +00:00
|
|
|
index: number;
|
2021-01-14 12:09:53 +00:00
|
|
|
|
2021-01-22 23:12:11 +00:00
|
|
|
constructor(pos: Pos, input: string, synthetic?: boolean) {
|
|
|
|
super(pos, synthetic);
|
2021-01-14 12:09:53 +00:00
|
|
|
this.input = input;
|
2021-01-18 22:11:53 +00:00
|
|
|
this.index = 0;
|
2021-01-14 12:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
_peekChar(): string | null {
|
2021-01-18 22:11:53 +00:00
|
|
|
return this.input[this.index] ?? null;
|
|
|
|
}
|
|
|
|
|
|
|
|
_dropChar(): void {
|
|
|
|
this.index++;
|
2021-01-14 12:09:53 +00:00
|
|
|
}
|
|
|
|
}
|