From 2105886c5e6286cc5a40280099688a957045ebcf Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Thu, 14 Jan 2021 13:09:53 +0100 Subject: [PATCH] WIP lax parser, matcher, rewriter and compiler --- README.md | 2 +- packages/core/bin/syndicatec.js | 4 + packages/core/package.json | 3 + packages/core/src/bin/syndicate-compiler.ts | 1 + packages/core/src/compiler/grammar.ts | 168 +++++++++++++++ packages/core/src/compiler/index.ts | 1 + packages/core/src/compiler/main.ts | 52 +++++ packages/core/src/index.ts | 3 + packages/core/src/syntax/codewriter.ts | 165 +++++++++++++++ packages/core/src/syntax/index.ts | 9 + packages/core/src/syntax/list.ts | 44 ++++ packages/core/src/syntax/matcher.ts | 222 ++++++++++++++++++++ packages/core/src/syntax/position.ts | 32 +++ packages/core/src/syntax/reader.ts | 109 ++++++++++ packages/core/src/syntax/scanner.ts | 214 +++++++++++++++++++ packages/core/src/syntax/template.ts | 59 ++++++ packages/core/src/syntax/tokens.ts | 85 ++++++++ packages/core/src/syntax/vlq.ts | 37 ++++ 18 files changed, 1209 insertions(+), 1 deletion(-) create mode 100755 packages/core/bin/syndicatec.js create mode 100644 packages/core/src/bin/syndicate-compiler.ts create mode 100644 packages/core/src/compiler/grammar.ts create mode 100644 packages/core/src/compiler/index.ts create mode 100644 packages/core/src/compiler/main.ts create mode 100644 packages/core/src/syntax/codewriter.ts create mode 100644 packages/core/src/syntax/index.ts create mode 100644 packages/core/src/syntax/list.ts create mode 100644 packages/core/src/syntax/matcher.ts create mode 100644 packages/core/src/syntax/position.ts create mode 100644 packages/core/src/syntax/reader.ts create mode 100644 packages/core/src/syntax/scanner.ts create mode 100644 packages/core/src/syntax/template.ts create mode 100644 packages/core/src/syntax/tokens.ts create mode 100644 packages/core/src/syntax/vlq.ts diff --git a/README.md b/README.md index 895e8eb..702d49d 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ The output should be something like: ## Licence @syndicate-lang, an implementation of Syndicate for JS. -Copyright (C) 2016-2018 Tony Garnock-Jones +Copyright (C) 2016-2021 Tony Garnock-Jones This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/packages/core/bin/syndicatec.js b/packages/core/bin/syndicatec.js new file mode 100755 index 0000000..c713452 --- /dev/null +++ b/packages/core/bin/syndicatec.js @@ -0,0 +1,4 @@ +#!/usr/bin/env node + +import { main } from '../lib/compiler/main.js'; +main(process.argv); diff --git a/packages/core/package.json b/packages/core/package.json index a52a2cc..50d5653 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -21,5 +21,8 @@ "author": "Tony Garnock-Jones ", "dependencies": { "preserves": "0.4.0" + }, + "bin": { + "syndicatec": "./bin/syndicatec.js" } } diff --git a/packages/core/src/bin/syndicate-compiler.ts b/packages/core/src/bin/syndicate-compiler.ts new file mode 100644 index 0000000..ec10f3e --- /dev/null +++ b/packages/core/src/bin/syndicate-compiler.ts @@ -0,0 +1 @@ +console.log('hi'); diff --git a/packages/core/src/compiler/grammar.ts b/packages/core/src/compiler/grammar.ts new file mode 100644 index 0000000..5e1e07b --- /dev/null +++ b/packages/core/src/compiler/grammar.ts @@ -0,0 +1,168 @@ +import { + Item, Items, + Pattern, + + scope, bind, seq, alt, upTo, atom, group, exec, + repeat, option, withoutSpace, map, rest, discard, + value, + +} from '../syntax/index.js'; +import * as Matcher from '../syntax/matcher.js'; + +export type Expr = Items; +export type Statement = Items; +export type Identifier = Item; + +export const block = (acc?: Items) => + (acc === void 0) + ? group('{', discard) + : group('{', map(rest, items => acc.push(... items))); + +export const statementBoundary = alt(atom(';'), Matcher.newline, Matcher.end); +export const exprBoundary = alt(atom(';'), atom(','), group('{', discard), Matcher.end); + +export interface SpawnStatement { + isDataspace: boolean; + name?: Expr; + initialAssertions: Expr[]; + parentIds: Identifier[]; + parentInits: Expr[]; + bootProcBody: Statement; +} + +export const identifier: Pattern = atom(); + +export function expr(... extraStops: Pattern[]): Pattern { + return withoutSpace(upTo(alt(exprBoundary, ... extraStops))); +} + +export function statement(acc: Items): Pattern { + return alt(group('{', map(rest, items => acc.push(... items))), + withoutSpace(seq(map(upTo(statementBoundary), items => acc.push(... items)), + map(statementBoundary, i => i ? acc.push(i) : void 0)))); +} + +export const spawn: Pattern & { headerExpr: Pattern } = + Object.assign(scope((o: SpawnStatement) => { + o.isDataspace = false; + o.initialAssertions = []; + o.parentIds = []; + o.parentInits = []; + o.bootProcBody = []; + return seq(atom('spawn'), + option(seq(atom('dataspace'), exec(() => o.isDataspace = true))), + option(seq(atom('named'), + bind(o, 'name', spawn.headerExpr))), + repeat(alt(seq(atom(':asserting'), + map(spawn.headerExpr, e => o.initialAssertions.push(e))), + map(scope((l: { id: Identifier, init: Expr }) => + seq(atom(':let'), + bind(l, 'id', identifier), + atom('='), + bind(l, 'init', spawn.headerExpr))), + l => { + o.parentIds.push(l.id); + o.parentInits.push(l.init); + }))), + statement(o.bootProcBody)); + }), { + headerExpr: expr(atom(':asserting'), atom(':let')), + }); + +export interface FieldDeclarationStatement { + member: Expr; + expr?: Expr; +} + +export const fieldDeclarationStatement: Pattern = + scope(o => seq(atom('field'), + bind(o, 'member', expr(atom('='))), + option(seq(atom('='), bind(o, 'expr', expr()))))); + +export interface AssertionEndpointStatement { + isDynamic: boolean, + template: Expr, + test?: Expr, +} + +export const assertionEndpointStatement: Pattern = + scope(o => { + o.isDynamic = true; + return seq(atom('assert'), + option(map(atom(':snapshot'), _ => o.isDynamic = false)), + bind(o, 'template', expr(seq(atom('when'), group('(', discard)))), + option(seq(atom('when'), group('(', bind(o, 'test', expr()))))); + }); + +export const dataflowStatement: Pattern = + value(o => { + o.value = []; + return seq(atom('dataflow'), statement(o.value)); + }); + +export interface EventHandlerEndpointStatement { + terminal: boolean; + triggerType: 'dataflow' | 'start' | 'stop' | 'asserted' | 'retracted' | 'message'; + isDynamic: boolean; + pattern?: Expr; + body: Statement; +} + +export const eventHandlerEndpointStatement: Pattern = + scope(o => { + o.terminal = false; + o.isDynamic = true; + o.body = []; + return seq(option(map(atom('stop'), _ => o.terminal = true)), + atom('on'), + alt(map(group('(', bind(o, 'pattern', expr())), _ => o.triggerType = 'dataflow'), + seq(bind(o, 'triggerType', + map(alt(atom('start'), atom('stop')), e => e.text)), + option(statement(o.body))), + seq(bind(o, 'triggerType', + map(alt(atom('asserted'), + atom('retracted'), + atom('message')), + e => e.text)), + option(map(atom(':snapshot'), _ => o.isDynamic = false)), + bind(o, 'pattern', expr()), + option(statement(o.body))))); + }); + +export interface TypeDefinitionStatement { + expectedUse: 'message' | 'assertion'; + label: Identifier; + fields: Identifier[]; + wireName?: Expr; +} + +export const typeDefinitionStatement: Pattern = + scope(o => seq(bind(o, 'expectedUse', map(alt(atom('message'), atom('assertion')), e => e.text)), + atom('type'), + bind(o, 'label', identifier), + group('(', bind(o, 'fields', repeat(identifier, { separator: atom(',') }))), + option(seq(atom('='), + bind(o, 'wireName', withoutSpace(upTo(statementBoundary))))), + statementBoundary)); + +export const messageSendStatement: Pattern = + value(o => seq(atom('send'), bind(o, 'value', withoutSpace(upTo(statementBoundary))), statementBoundary)); + +export interface DuringStatement { + pattern: Expr; + body: Statement; +} + +export const duringStatement: Pattern = + scope(o => { + o.body = []; + return seq(atom('during'), + bind(o, 'pattern', expr()), + statement(o.body)); + }); + +export const reactStatement: Pattern = + value(o => { + o.value = []; + return seq(atom('react'), statement(o.value)); + }); diff --git a/packages/core/src/compiler/index.ts b/packages/core/src/compiler/index.ts new file mode 100644 index 0000000..05f3c9e --- /dev/null +++ b/packages/core/src/compiler/index.ts @@ -0,0 +1 @@ +export * as Grammar from './grammar.js'; diff --git a/packages/core/src/compiler/main.ts b/packages/core/src/compiler/main.ts new file mode 100644 index 0000000..3bd97b9 --- /dev/null +++ b/packages/core/src/compiler/main.ts @@ -0,0 +1,52 @@ +import fs from 'fs'; +import * as S from '../syntax/index.js'; +import * as G from './grammar.js'; + +export function main(argv: string[]) { + let [ inputFilename ] = argv.slice(2); + inputFilename = inputFilename ?? '/dev/stdin'; + const source = fs.readFileSync(inputFilename, 'utf-8'); + + const scanner = new S.StringScanner(S.startPos(inputFilename), source); + const reader = new S.LaxReader(scanner); + let tree = reader.readToEnd(); + let macro = new S.Templates(); + + let expansionNeeded = true; + function expand(p: S.Pattern, f: (t: T) => S.Items) { + tree = S.replace(tree, p, t => { + expansionNeeded = true; + return f(t); + }); + } + while (expansionNeeded) { + expansionNeeded = false; + expand(G.spawn, + s => macro.template()`SPAWN[${s.name ?? []}][${S.joinItems(s.initialAssertions, ', ')}][[${s.bootProcBody}]]`); + expand(G.fieldDeclarationStatement, + s => macro.template()`FIELD[${s.member}][${s.expr ?? []}]`); + expand(G.assertionEndpointStatement, + s => macro.template()`ASSERT[${''+s.isDynamic}][${s.template}][${s.test ?? []}]`); + expand(G.dataflowStatement, + e => macro.template()`DATAFLOW[${e}]`); + expand(G.eventHandlerEndpointStatement, + s => macro.template()`EVENTHANDLER[${`${s.terminal}/${s.isDynamic}`}][${s.triggerType}][${s.pattern}][${s.body}]`); + expand(G.typeDefinitionStatement, + s => macro.template()`TYPEDEF[${s.expectedUse}][${[s.label]}][${S.joinItems(s.fields.map(f => [f]), ' -- ')}][${s.wireName ?? []}]`); + expand(G.messageSendStatement, + e => macro.template()`SEND[${e}]`); + expand(G.duringStatement, + s => macro.template()`DURING[${s.pattern}][${s.body}]`); + expand(G.reactStatement, + e => macro.template()`REACT[${e}]`); + } + + console.log(S.itemText(tree, { color: true, missing: '\x1b[41m□\x1b[0m' })); + + const cw = new S.CodeWriter(inputFilename); + cw.emit(tree); + fs.writeFileSync('/tmp/adhoc.syndicate', cw.text); + const mm = cw.map; + mm.sourcesContent = [source]; + fs.writeFileSync('/tmp/adhoc.syndicate.map', JSON.stringify(mm)); +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 3d162aa..f3d4664 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -27,6 +27,9 @@ export * from './runtime/ground.js'; export * from './runtime/relay.js'; // export * as Worker from './runtime/worker.js'; +export * as Syntax from './syntax/index.js'; +export * as Compiler from './compiler/index.js'; + import { randomId } from './runtime/randomid.js'; // These aren't so much "Universal" as they are "VM-wide-unique". diff --git a/packages/core/src/syntax/codewriter.ts b/packages/core/src/syntax/codewriter.ts new file mode 100644 index 0000000..f5b933a --- /dev/null +++ b/packages/core/src/syntax/codewriter.ts @@ -0,0 +1,165 @@ +import { TokenType, Item, Items, isGroup } from './tokens.js'; +import { Pos, startPos, advancePos } from './position.js'; +import { vlqEncode } from './vlq.js'; + +export interface SourceMap { + version: 3; + file?: string; + sourceRoot?: string, // default: "" + sources: Array; + sourcesContent?: Array; // default: null at each entry + names: Array; + mappings: string; +} + +export interface Mapping { + generatedStartColumn?: number; // zero-based + sourceIndex?: number; + sourceStartLine?: number; // zero-based (!!) + sourceStartColumn?: number; // zero-based + nameIndex?: number; +} + +function encodeMapping(entry: Mapping): Array { + const a = [entry.generatedStartColumn]; + if ('sourceIndex' in entry) { + a.push(entry.sourceIndex); + a.push(entry.sourceStartLine); + a.push(entry.sourceStartColumn); + if ('nameIndex' in entry) { + a.push(entry.nameIndex); + } + } + return a; +} + +function maybeDelta(newValue: number | undefined, oldValue: number | undefined) { + // console.log('maybeDelta', oldValue, newValue); + return (newValue === void 0) ? void 0 + : (oldValue === void 0) ? newValue + : newValue - oldValue; +} + +export class CodeWriter { + readonly file: string | null; + readonly pos: Pos; + readonly sources: Array = []; + readonly chunks: Array = []; + readonly mappings: Array> = []; + previous: Mapping = {}; + previousPos: Pos | null = null; + + constructor(file: string | null) { + this.file = file; + this.pos = startPos(this.file ?? ''); + } + + get text(): string { + return this.chunks.join(''); + } + + get map(): SourceMap { + // console.log(this.mappings.map(segs => segs.map(encodeMapping))); + const mappings = this.mappings.map(segments => + segments.map(encodeMapping).map(vlqEncode).join(',')).join(';'); + const m: SourceMap = { + version: 3, + sources: [... this.sources], + names: [], + mappings, + }; + if (this.file !== null) m.file = this.file; + return m; + } + + finishLine() { + // console.log('newline'); + this.mappings.push([]); + this.previous.generatedStartColumn = undefined; + this.previousPos = null; + } + + sourceIndexFor(name: string) { + let i = this.sources.indexOf(name); + if (i === -1) { + this.sources.push(name); + i = this.sources.length - 1; + } + return i; + } + + addMapping(p: Pos, type: TokenType) { + // console.log('considering', p, type); + + const oldPos = this.previousPos; + + if ((oldPos === null || oldPos.name === p.name) && + (type === TokenType.SPACE || type === TokenType.NEWLINE)) + { + // console.log('whitespace skip'); + if (this.previousPos !== null) { + this.previousPos = p; + } + return; + } + + this.previousPos = p; + + if ((oldPos?.name === p.name) && + ((p.name === null) || + ((oldPos?.column === p.column) && (oldPos?.line === p.line)))) + { + // console.log('skipping', this.previous, oldPos, p); + return; + } + + const n: Mapping = {}; + n.generatedStartColumn = maybeDelta(this.pos.column, this.previous.generatedStartColumn); + this.previous.generatedStartColumn = this.pos.column; + + if (p.name !== null) { + const sourceIndex = this.sourceIndexFor(p.name); + n.sourceIndex = maybeDelta(sourceIndex, this.previous.sourceIndex); + this.previous.sourceIndex = sourceIndex; + n.sourceStartColumn = maybeDelta(p.column, this.previous.sourceStartColumn); + this.previous.sourceStartColumn = p.column; + n.sourceStartLine = maybeDelta(p.line - 1, this.previous.sourceStartLine); + this.previous.sourceStartLine = p.line - 1; + } + + // console.log('pushing', + // n, + // this.previous, + // oldPos?.line + ':' + oldPos?.column, + // p.line + ':' + p.column); + this.mappings[this.mappings.length - 1].push(n); + } + + chunk(p: Pos, s: string, type: TokenType) { + p = { ... p }; + this.chunks.push(s); + if (this.mappings.length === 0) this.finishLine(); + this.addMapping(p, type); + for (const ch of s) { + advancePos(p, ch); + if (advancePos(this.pos, ch)) { + this.finishLine(); + this.addMapping(p, type); + } + } + } + + emit(i: Item | Items) { + if (Array.isArray(i)) { + i.forEach(j => this.emit(j)); + } else if (isGroup(i)) { + this.emit(i.start); + this.emit(i.items); + if (i.end) this.emit(i.end); + } else if (i === null) { + // Do nothing. + } else { + this.chunk(i.start, i.text, i.type); + } + } +} diff --git a/packages/core/src/syntax/index.ts b/packages/core/src/syntax/index.ts new file mode 100644 index 0000000..556eb87 --- /dev/null +++ b/packages/core/src/syntax/index.ts @@ -0,0 +1,9 @@ +export * from './codewriter.js'; +export * from './list.js'; +export * from './matcher.js'; +export * from './position.js'; +export * from './reader.js'; +export * from './scanner.js'; +export * from './template.js'; +export * from './tokens.js'; +export * from './vlq.js'; diff --git a/packages/core/src/syntax/list.ts b/packages/core/src/syntax/list.ts new file mode 100644 index 0000000..4ec1296 --- /dev/null +++ b/packages/core/src/syntax/list.ts @@ -0,0 +1,44 @@ +export interface List extends Iterable { + item: T | null; + next: List | null; + + toArray(): Array; +} + +export function atEnd(xs: List): boolean { + return xs.item === null; +} + +export class ArrayList implements List { + readonly items: Array; + readonly index: number = 0; + + constructor(items: Array, index = 0) { + this.items = items; + this.index = index; + } + + get item(): T | null { + return this.items[this.index] ?? null; + } + + get next(): List | null { + if (this.index >= this.items.length) return null; + return new ArrayList(this.items, this.index + 1); + } + + toArray(): Array { + return this.items.slice(this.index); + } + + [Symbol.iterator](): Iterator { + let i: List = this; + return { + next(): IteratorResult { + const value = i.item; + if (!atEnd(i)) i = i.next; + return { done: atEnd(i), value }; + } + }; + } +} diff --git a/packages/core/src/syntax/matcher.ts b/packages/core/src/syntax/matcher.ts new file mode 100644 index 0000000..13464db --- /dev/null +++ b/packages/core/src/syntax/matcher.ts @@ -0,0 +1,222 @@ +import { Token, TokenType, Items, Item, isGroup, isToken, isSpace, isTokenType } from './tokens.js'; +import { Pos } from './position.js'; +import { List, ArrayList, atEnd } from './list.js'; + +//--------------------------------------------------------------------------- +// Patterns over Item + +export type PatternResult = [T, List] | null; +export type Pattern = (i: List) => PatternResult; + +export const noItems = new ArrayList([]); + +export const fail: Pattern = _i => null; +export const succeed: Pattern = i => [void 0, i]; +export const discard: Pattern = _i => [void 0, noItems]; +export const rest: Pattern = i => [i.toArray(), noItems]; +export const end: Pattern = i => atEnd(i) ? [void 0, noItems] : null; +export const pos: Pattern = i => [isGroup(i.item) ? i.item.start.start : i.item.start, i]; + +export const newline: Pattern = i => { + while (!atEnd(i) && isTokenType(i.item, TokenType.SPACE)) i = i.next; + if (!isTokenType(i.item, TokenType.NEWLINE)) return null; + return [i.item, i.next]; +}; + +export function skipSpace(i: List): List { + while (!atEnd(i) && isSpace(i.item)) i = i.next; + return i; +} + +export function collectSpace(i: List, acc: Array): List { + while (!atEnd(i) && isSpace(i.item)) { + acc.push(i.item); + i = i.next; + } + return i; +} + +export function withoutSpace(p: Pattern): Pattern { + return i => p(skipSpace(i)); +} + +export function seq(... patterns: Pattern[]): Pattern { + return i => { + for (const p of patterns) { + const r = p(i); + if (r === null) return null; + i = r[1]; + } + return [void 0, i]; + }; +} + +export function alt(... alts: Pattern[]): Pattern { + return i => { + for (const a of alts) { + const r = a(i); + if (r !== null) return r; + } + return null; + }; +} + +export function scope(pf: (scope: T) => Pattern): Pattern { + return i => { + const scope = Object.create(null); + const r = pf(scope)(i); + if (r === null) return null; + return [scope, r[1]]; + }; +} + +export function value(pf: (scope: {value: T}) => Pattern): Pattern { + return i => { + const scope = Object.create(null); + const r = pf(scope)(i); + if (r === null) return null; + return [scope.value, r[1]]; + }; +} + +export function bind(target: T, key: K, pattern: Pattern): Pattern { + return i => { + const r = pattern(i); + if (r === null) return null; + target[key] = r[0]; + return r; + }; +} + +export function exec(thunk: (i: List) => void): Pattern { + return i => { + thunk(i); + return [void 0, i]; + }; +} + +export function map(p: Pattern, f: (T) => R): Pattern { + return i => { + const r = p(i); + if (r === null) return null; + return [f(r[0]), r[1]]; + }; +} + +export interface ItemOptions { + skipSpace?: boolean, // default: true +} + +export interface GroupOptions extends ItemOptions { +} + +export interface TokenOptions extends ItemOptions { + tokenType?: TokenType, // default: TokenType.ATOM +} + +export function group(opener: string, items: Pattern, options: GroupOptions = {}): Pattern { + return i => { + if (options.skipSpace ?? true) i = skipSpace(i); + if (!isGroup(i.item)) return null; + if (i.item.start.text !== opener) return null; + const r = items(new ArrayList(i.item.items)); + if (r === null) return null; + if (!atEnd(r[1])) return null; + return [r[0], i.next]; + }; +} + +export function atom(text?: string | undefined, options: TokenOptions = {}): Pattern { + return i => { + if (options.skipSpace ?? true) i = skipSpace(i); + if (!isToken(i.item)) return null; + if (i.item.type !== (options.tokenType ?? TokenType.ATOM)) return null; + if (text !== void 0 && i.item.text !== text) return null; + return [i.item, i.next]; + } +} + +export function anything(options: ItemOptions = {}): Pattern { + return i => { + if (options.skipSpace ?? true) i = skipSpace(i); + if (atEnd(i)) return null; + return [i.item, i.next]; + }; +} + +export function upTo(p: Pattern): Pattern { + return i => { + const acc = []; + while (true) { + const r = p(i); + if (r !== null) return [acc, i]; + if (atEnd(i)) break; + acc.push(i.item); + i = i.next; + } + return null; + }; +} + +export interface RepeatOptions { + min?: number; + max?: number; + separator?: Pattern; +} + +export function repeat(p: Pattern, options: RepeatOptions = {}): Pattern { + return i => { + const acc: T[] = []; + let needSeparator = false; + const finish = (): PatternResult => (acc.length < (options.min ?? 0)) ? null : [acc, i]; + while (true) { + if (acc.length == (options.max ?? Infinity)) return [acc, i]; + if (needSeparator) { + if (options.separator) { + const r = options.separator(i); + if (r === null) return finish(); + i = r[1]; + } + } else { + needSeparator = true; + } + const r = p(i); + if (r === null) return finish(); + acc.push(r[0]); + i = r[1]; + } + }; +} + +export function option(p: Pattern): Pattern { + return repeat(p, { max: 1 }); +} + +//--------------------------------------------------------------------------- +// Search-and-replace over Item + +export function replace(items: Items, + p: Pattern, + f: (t: T) => Items): Items +{ + const walkItems = (items: Items): Items => { + let i: List = new ArrayList(items); + const acc: Items = []; + while (!atEnd(i = collectSpace(i, acc))) { + const r = p(i); + + if (r !== null) { + acc.push(... f(r[0])); + i = r[1]; + } else if (isToken(i.item)) { + acc.push(i.item); + i = i.next; + } else { + acc.push({ ... i.item, items: walkItems(i.item.items) }); + i = i.next; + } + } + return acc; + }; + return walkItems(items); +} diff --git a/packages/core/src/syntax/position.ts b/packages/core/src/syntax/position.ts new file mode 100644 index 0000000..83551aa --- /dev/null +++ b/packages/core/src/syntax/position.ts @@ -0,0 +1,32 @@ +export interface Pos { + line: number; + column: number; + pos: number; + name: string | null; +} + +export function startPos(name: string | null): Pos { + return { line: 1, column: 0, pos: 0, name }; +} + +export function advancePos(p: Pos, ch: string): boolean { + let advancedLine = false; + p.pos++; + switch (ch) { + case '\t': + p.column = (p.column + 8) & ~7; + break; + case '\n': + p.column = 0; + p.line++; + advancedLine = true; + break; + case '\r': + p.column = 0; + break; + default: + p.column++; + break; + } + return advancedLine; +} diff --git a/packages/core/src/syntax/reader.ts b/packages/core/src/syntax/reader.ts new file mode 100644 index 0000000..61e149d --- /dev/null +++ b/packages/core/src/syntax/reader.ts @@ -0,0 +1,109 @@ +import { TokenType, Token, Group, Item, Items } from './tokens.js'; +import { Scanner } from './scanner.js'; + +function matchingParen(c: string): string | null { + switch (c) { + case ')': return '('; + case ']': return '['; + case '}': return '{'; + default: return null; + } +} + +export class LaxReader implements IterableIterator { + readonly scanner: Scanner; + readonly stack: Array = []; + + constructor(scanner: Scanner) { + this.scanner = scanner; + } + + [Symbol.iterator](): IterableIterator { + return this; + } + + stackTop(): Group | null { + return this.stack[this.stack.length - 1] ?? null; + } + + popUntilMatch(t: Token): Group | 'continue' | 'eof' { + const m = matchingParen(t.text); + + if (m !== null && !this.stack.some(g => g.start.text === m)) { + if (this.stack.length > 0) { + this.stackTop().items.push(t); + return 'continue'; + } + } else { + while (this.stack.length > 0) { + const inner = this.stack.pop(); + if (inner.start.text === m) { + inner.end = t; + } + + if (this.stack.length === 0) { + return inner; + } else { + const outer = this.stackTop(); + outer.items.push(inner); + if (inner.start.text === m) { + return 'continue'; + } + } + } + } + + return 'eof'; + } + + shift(): Token { + return this.scanner.shift() ?? this.scanner.makeToken(this.scanner.mark(), TokenType.CLOSE, ''); + } + + read(): Item | null { + while (true) { + let g = this.stackTop(); + const t = this.shift(); + switch (t.type) { + case TokenType.SPACE: + case TokenType.NEWLINE: + case TokenType.ATOM: + case TokenType.STRING: + if (g === null) return t; + if (t.text === ';') { + while ('(['.indexOf(g.start.text) >= 0) { + this.stack.pop(); + this.stackTop().items.push(g); + g = this.stackTop(); + } + } + g.items.push(t); + break; + + case TokenType.OPEN: + this.stack.push({ start: t, end: null, items: [] }); + break; + + case TokenType.CLOSE: { + const i = this.popUntilMatch(t); + if (i === 'eof') return null; + if (i === 'continue') break; + return i; + } + } + } + } + + readToEnd(): Items { + return Array.from(this); + } + + next(): IteratorResult { + const i = this.read(); + if (i === null) { + return { done: true, value: null }; + } else { + return { done: false, value: i }; + } + } +} diff --git a/packages/core/src/syntax/scanner.ts b/packages/core/src/syntax/scanner.ts new file mode 100644 index 0000000..acafaf3 --- /dev/null +++ b/packages/core/src/syntax/scanner.ts @@ -0,0 +1,214 @@ +import { TokenType, Token } from './tokens.js'; +import { Pos, advancePos } from './position.js'; + +export abstract class Scanner implements IterableIterator { + readonly pos: Pos; + charBuffer: string | null = null; + tokenBuffer: Token | null = null; + delimiters = ' \t\n\r\'"`,;()[]{}/'; + + constructor(pos: Pos) { + this.pos = pos; + } + + [Symbol.iterator](): IterableIterator { + return this; + } + + abstract _peekChar(): string | null; + + peekChar(): string | null { + if (this.charBuffer !== null) return this.charBuffer; + this.charBuffer = this._peekChar(); + return this.charBuffer; + } + + dropChar() { + if (this.charBuffer === null) this.peekChar(); + if (this.charBuffer !== null) { + advancePos(this.pos, this.charBuffer); + this.charBuffer = null; + } + } + + shiftChar(): string | null { + const ch = this.peekChar(); + this.dropChar(); + return ch; + } + + makeToken(start: Pos, type: TokenType, text: string): Token { + return { type, start, end: this.mark(), text }; + } + + mark(): Pos { + return { ... this.pos }; + } + + _while(pred: (ch: string | null) => boolean, f: (ch: string | null) => void) { + while (true) { + const ch = this.peekChar(); + if (!pred(ch)) return; + this.dropChar(); + f(ch); + } + } + + _collectSpace(buf = '', start = this.mark()): Token { + this._while(ch => this.isSpace(ch), ch => buf = buf + ch); + return this.makeToken(start, TokenType.SPACE, buf); + } + + _punct(type: TokenType): Token { + return this.makeToken(this.mark(), type, this.shiftChar()); + } + + _str(forbidNewlines: boolean): Token { + const start = this.mark(); + const q = this.shiftChar(); + let buf = q; + let ch: string; + while (true) { + ch = this.shiftChar(); + if (ch !== null) buf = buf + ch; + if (ch === null || ch === q || (forbidNewlines && (ch === '\n'))) { + return this.makeToken(start, TokenType.STRING, buf); + } + if (ch === '\\') { + ch = this.shiftChar(); + if (ch === '\n') { + // Do nothing. Line continuation. + } else if (ch !== null) { + buf = buf + ch; + } + } + } + } + + isSpace(ch: string): boolean { + return ' \t\r'.indexOf(ch) >= 0; + } + + isDelimiter(ch: string): boolean { + return this.delimiters.indexOf(ch) >= 0; + } + + addDelimiters(newDelimiters: string) { + this.delimiters = this.delimiters + newDelimiters; + } + + _atom(start = this.mark(), buf = ''): Token { + let ch: string; + while (true) { + ch = this.peekChar(); + if (ch === null || this.isDelimiter(ch)) { + return this.makeToken(start, TokenType.ATOM, buf); + } + buf = buf + ch; + this.dropChar(); + } + } + + _maybeComment(): Token { + const start = this.mark(); + let buf = this.shiftChar(); + let ch = this.peekChar(); + if (ch === null) return this._collectSpace(buf, start); + switch (ch) { + case '/': // single-line comment. + this._while(ch => ch !== null && ch !== '\n', ch => buf = buf + ch); + return this._collectSpace(buf, start); + case '*': // delimited comment. + { + let seenStar = false; + buf = buf + this.shiftChar(); + while (true) { + ch = this.shiftChar(); + if ((ch === null) ||((ch === '/') && seenStar)) break; + buf = buf + ch; + seenStar = (ch === '*'); + } + return this._collectSpace(buf, start); + } + default: + return this._atom(start, buf); + } + } + + _peek(): Token | null { + let ch = this.peekChar(); + if (ch === null) return null; + switch (ch) { + case ' ': + case '\t': + case '\r': + return this._collectSpace(); + + case '\n': + return this._punct(TokenType.NEWLINE); + + case '(': + case '[': + case '{': + return this._punct(TokenType.OPEN); + case ')': + case ']': + case '}': + return this._punct(TokenType.CLOSE); + + case '\'': + case '"': + return this._str(true); + case '`': + return this._str(false); + + case ',': + case ';': + return this._punct(TokenType.ATOM); + + case '/': + return this._maybeComment(); + + default: + return this._atom(this.mark(), this.shiftChar()); + } + } + + peek(): Token | null { + if (this.tokenBuffer === null) this.tokenBuffer = this._peek(); + return this.tokenBuffer; + } + + drop() { + if (this.tokenBuffer === null) this.peek(); + this.tokenBuffer = null; + } + + shift(): Token | null { + const t = this.peek(); + this.drop(); + return t; + } + + next(): IteratorResult { + const t = this.shift(); + if (t === null) { + return { done: true, value: null }; + } else { + return { done: false, value: t }; + } + } +} + +export class StringScanner extends Scanner { + readonly input: string; + + constructor(pos: Pos, input: string) { + super(pos); + this.input = input; + } + + _peekChar(): string | null { + return this.input[this.pos.pos] ?? null; + } +} diff --git a/packages/core/src/syntax/template.ts b/packages/core/src/syntax/template.ts new file mode 100644 index 0000000..b168db4 --- /dev/null +++ b/packages/core/src/syntax/template.ts @@ -0,0 +1,59 @@ +import { Items, TokenType } from './tokens.js'; +import { Pos, startPos } from './position.js'; +import { StringScanner } from './scanner.js'; +import { LaxReader } from './reader.js'; +import * as M from './matcher.js'; + +const substPat = M.scope((o: { pos: Pos }) => + M.seq(M.atom('$'), + M.seq(M.bind(o, 'pos', M.pos), M.group('{', M.end, { skipSpace: false })))); + +export type Substitution = Items | string; + +function toItems(s: Substitution, pos: Pos): Items { + return typeof s === 'string' ? [{ type: TokenType.ATOM, text: s, start: pos, end: pos }] : s; +} + +export class Templates { + readonly sources: { [name: string]: string } = {}; + + template(start0: Pos | string = startPos(null)): (consts: TemplateStringsArray, ... vars: Substitution[]) => Items { + const start = (typeof start0 === 'string') ? startPos(start0) : start0; + return (consts, ... vars) => { + const sourcePieces = [consts[0]]; + for (let i = 1; i < consts.length; i++) { + sourcePieces.push('${}'); + sourcePieces.push(consts[i]); + } + const source = sourcePieces.join(''); + if (start.name !== null) { + if (start.name in this.sources && this.sources[start.name] !== source) { + throw new Error(`Duplicate template name: ${start.name}`); + } + this.sources[start.name] = source; + } + const reader = new LaxReader(new StringScanner(start, source)); + reader.scanner.addDelimiters('$'); + let i = 0; + return M.replace(reader.readToEnd(), substPat, sub => toItems(vars[i++], sub.pos)); + }; + } + + sourceFor(name: string): string | undefined { + return this.sources[name]; + } +} + +export function joinItems(itemss: Items[], separator0: Substitution): Items { + if (itemss.length === 0) return []; + const separator = toItems(separator0, startPos(null)); + const acc = itemss[0]; + for (let i = 1; i < itemss.length; i++) { + acc.push(... separator, ... itemss[i]); + } + return acc; +} + +// const lib = new Templates(); +// const t = (o: {xs: Items}) => lib.template('testTemplate')`YOYOYOYO ${o.xs}><`; +// console.log(t({xs: lib.template()`hello there`})); diff --git a/packages/core/src/syntax/tokens.ts b/packages/core/src/syntax/tokens.ts new file mode 100644 index 0000000..6999a58 --- /dev/null +++ b/packages/core/src/syntax/tokens.ts @@ -0,0 +1,85 @@ +import { Pos, startPos } from './position.js'; + +export enum TokenType { + SPACE, + NEWLINE, + ATOM, + STRING, + OPEN, + CLOSE, +} + +export interface Token { + type: TokenType; + start: Pos; + end: Pos; + text: string; +} + +export interface Group { + start: Token; + end: Token | null; + items: Items; +} + +export type Item = Token | Group; +export type Items = Array; + +export function makeToken(text: string, name?: string | null, type: TokenType = TokenType.ATOM): Token { + const p = startPos(name ?? null); + return { + start: p, + end: p, + type, + text + }; +} + +export function makeGroup(start: Token, items: Array, end?: Token) { + return { start, end: end ?? null, items }; +} + +export function isSpace(i: Item): i is Token { + return isTokenType(i, TokenType.SPACE) || isTokenType(i, TokenType.NEWLINE); +} + +export function isGroup(i: Item): i is Group { + return i && ('items' in i); +} + +export function isToken(i: Item): i is Token { + return i && ('type' in i); +} + +export function isTokenType(i: Item, t: TokenType): i is Token { + return isToken(i) && i.type === t; +} + +export type ItemTextOptions = { + missing?: string, + color?: boolean, +}; + +export function itemText(i: Items, options: ItemTextOptions = {}): string { + const walkItems = (i: Items): string => i.map(walk).join(''); + const walk = (i: Item): string => { + if (isGroup(i)) { + return walk(i.start) + walkItems(i.items) + (i.end ? walk(i.end) : options.missing ?? ''); + } else { + if (options.color ?? false) { + switch (i.type) { + case TokenType.SPACE: + case TokenType.NEWLINE: + return '\x1b[31m' + i.text + '\x1b[0m'; + case TokenType.STRING: + return '\x1b[34m' + i.text + '\x1b[0m'; + default: + return i.text; + } + } else { + return i.text; + } + } + }; + return walkItems(i); +} diff --git a/packages/core/src/syntax/vlq.ts b/packages/core/src/syntax/vlq.ts new file mode 100644 index 0000000..4cce577 --- /dev/null +++ b/packages/core/src/syntax/vlq.ts @@ -0,0 +1,37 @@ +const alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +const inverse_alphabet = + new Map(Object.entries(alphabet).map(([i,c])=>[c,parseInt(i)])); + +export function vlqDecode(s: string): Array { + let acc = 0; + let shift_amount = 0; + const buf = []; + for (const ch of s) { + const sextet = inverse_alphabet.get(ch); + acc |= (sextet & 0x1f) << shift_amount; + shift_amount += 5; + if (!(sextet & 0x20)) { + const negative = !!(acc & 1); + acc = acc >> 1; + if (negative) acc = -acc; + buf.push(acc); + acc = 0; + shift_amount = 0; + } + } + return buf; +} + +export function vlqEncode(ns: Array): string { + const buf = []; + for (let n of ns) { + n = (n < 0) ? ((-n) << 1) | 1 : (n << 1); + do { + const m = n & 0x1f; + n = n >> 5; + const sextet = (n > 0) ? m | 0x20 : m; + buf.push(alphabet[sextet]); + } while (n > 0); + } + return buf.join(''); +}