preserves/implementations/javascript/packages/schema/src/compiler.ts

575 lines
25 KiB
TypeScript

import { Pattern, NamedPattern, Schema, Input, Environment, Ref, lookup } from "./meta";
import * as M from './meta';
import { Annotated, Bytes, Dictionary, Fold, fold, KeyedSet, Position, preserves, Record, Set, Tuple, Value } from "@preserves/core";
import { Formatter, parens, seq, Item, opseq, block, commas, brackets, anglebrackets, braces } from "./block";
import { refPosition } from "./reader";
export interface CompilerOptions {
preservesModule?: string;
defaultPointer?: Ref;
warn?(message: string, pos: Position | null): void;
};
function fnblock(... items: Item[]): Item {
return seq('((() => ', block(... items), ')())');
}
export function compile(env: Environment, schema: Schema, options: CompilerOptions = {}): string {
const literals = new Dictionary<string, never>();
const types: Array<Item> = [];
const functions: Array<Item> = [];
const imports = new KeyedSet<[string, string]>();
let temps: Array<string> = [];
let body: Array<Item> = [];
let tempCounter = 0;
const pointerName = Schema._.details(schema).get(M.$pointer);
function gentemp(): string {
const varname = '_tmp' + tempCounter++;
temps.push(varname);
return varname;
}
function literal(v: Input): Item {
let varname = literals.get(v);
if (varname === void 0) {
const s = v.asPreservesText()
.replace('_', '__')
.replace('*', '_STAR_');
varname = M.isValidToken('_' + s, true) ? '$' + s : '__lit' + literals.size;
literals.set(v, varname);
}
return varname;
}
function emit(item: Item): void {
body.push(item);
}
function collectBody(f: () => void): Item {
const oldTemps = temps;
const oldBody = body;
temps = []
body = [];
f();
const ts = temps;
const result = body;
temps = oldTemps;
body = oldBody;
return block(
... ts.length > 0 ? [seq('let ', commas(... ts), ': any')] : [],
... result);
}
function accumulateCompound(p: Pattern,
kFail: () => Item[],
kAcc: (temp: string) => Item[]): Item
{
const t = gentemp();
return seq(`while (!d.closeCompound()) `, collectBody(() => {
emit(seq(`${t} = void 0`));
decoderFor(p, t);
emit(seq(`if (${t} === void 0) `, block(
... kFail(),
seq(`break`))));
kAcc(t).forEach(emit);
}));
}
function derefPattern(p: Pattern): Pattern {
if (p.label === M.$ref) {
return lookup(refPosition(p), p, env,
(p) => p,
(p) => p,
(_mod, _modPath, pp) => pp ?? p);
} else {
return p;
}
}
function decoderForTuple(ps: Pattern[],
dest: string,
recordFields: boolean,
variablePattern: Pattern | undefined): void {
const temps = ps.map(gentemp);
function loop(i: number) {
if (i < ps.length) {
decoderFor(ps[i], temps[i]);
emit(seq(`if (${temps[i]} !== void 0) `,
collectBody(() => loop(i + 1))));
} else {
if (variablePattern === void 0) {
emit(seq(`if (d.closeCompound()) ${dest} = `, brackets(... temps)));
} else {
emit(block(
seq(`let vN: Array<`, typeFor(variablePattern),
`> | undefined = `, brackets(... temps)),
accumulateCompound(variablePattern,
() => [`vN = void 0`],
(t) => [`vN.push(${t})`]),
seq(`${dest} = vN`)));
}
}
}
if (recordFields) {
loop(0);
} else {
emit(seq(`if (d.openSequence()) `, collectBody(() => loop(0))));
}
}
function decoderFor(p: Pattern, dest: string, recordFields = false): void {
switch (p.label) {
case M.$atom:
switch (p[0]) {
case M.$Boolean: emit(`${dest} = d.nextBoolean()`); break;
case M.$Float: emit(`${dest} = d.nextFloat()`); break;
case M.$Double: emit(`${dest} = d.nextDouble()`); break;
case M.$SignedInteger: emit(`${dest} = d.nextSignedInteger()`); break;
case M.$String: emit(`${dest} = d.nextString()`); break;
case M.$ByteString: emit(`${dest} = d.nextByteString()`); break;
case M.$Symbol: emit(`${dest} = d.nextSymbol()`); break;
}
break;
case M.$lit: {
let n: string;
switch (typeof p[0]) {
case 'boolean': n = `d.nextBoolean()`; break;
case 'string': n = `d.nextString()`; break;
case 'number': n = `d.nextSignedInteger()`; break;
case 'symbol': n = `d.nextSymbol()`; break;
default: n = `d.next()`; break;
}
emit(`${dest} = _.asLiteral(${n}, ${literal(p[0])})`);
break;
}
case M.$ref:
lookup(refPosition(p), p, env,
(_p) => emit(`${dest} = decode${p[1].description!}(d)`),
(p) => decoderFor(p, dest),
(mod, modPath,_p) => {
imports.add([mod, modPath]);
emit(`${dest} = ${mod}.decode${p[1].description!}(d)`);
});
break;
case M.$or: {
const alts = p[0];
const recs = alts.map(derefPattern);
if (recs.length > 1 && recs.every(pp => pp.label === M.$rec)) {
// Hoist the record check up.
// This is pretty hacky. If we lift the level of
// discourse a little, we can do this
// automatically and generically...
emit(seq(`if (d.openRecord()) `, collectBody(() => {
const label = gentemp();
emit(seq(`${label} = d.next()`));
const mark = gentemp();
emit(seq(`${mark} = d.mark()`));
function loop(i: number) {
const alt = recs[i];
if (alt.label !== M.$rec) return; // avoid a cast
emit(seq(`if (`, predicateFor(label, alt[0]), `) `, collectBody(() => {
const fs = gentemp();
decoderFor(alt[1], fs, true);
emit(seq(`if (${fs} !== void 0) ${dest} = _.Record`,
anglebrackets(typeFor(alt[0]), typeFor(alt[1])),
parens(seq(label, ` as any`),
seq(fs, ` as any`))));
})));
if (i < recs.length - 1) {
emit(seq(`if (${dest} === void 0) `, collectBody(() => {
emit(`d.restoreMark(${mark})`);
loop(i + 1);
})));
}
}
loop(0);
})));
} else {
switch (alts.length) {
case 0: break; // assume dest is already void 0
case 1: decoderFor(alts[0], dest); break;
default: {
const mark = gentemp();
emit(`${mark} = d.mark()`);
function loop(i: number) {
decoderFor(alts[i], dest);
if (i < alts.length - 1) {
emit(seq(`if (${dest} === void 0) `, collectBody(() => {
emit(`d.restoreMark(${mark})`);
loop(i + 1);
})));
}
}
loop(0);
break;
}
}
}
break;
}
case M.$and:
switch (p[0].length) {
case 0: emit(`${dest} = d.next()`); break;
case 1: decoderFor(p[0][0], dest); break;
default: {
const [pp0, ... ppN] = p[0];
decoderFor(pp0, dest);
const otherChecks =
opseq('true', ' && ', ... ppN.map(pp => predicateFor(dest, pp)));
emit(seq(`if (!`, otherChecks, `) ${dest} = void 0`));
break;
}
}
break;
case M.$pointer:
emit(`${dest} = _decodePtr(d)`);
break;
case M.$rec:
// assume dest is already void 0
emit(seq(`if (d.openRecord()) `, collectBody(() => {
const label = gentemp();
decoderFor(p[0], label);
emit(seq(`if (${label} !== void 0) `,
collectBody(() => {
const fs = gentemp();
decoderFor(p[1], fs, true);
emit(seq(
`if (${fs} !== void 0) ${dest} = _.Record`,
anglebrackets(typeFor(p[0]), typeFor(p[1])),
parens(seq(label, ` as any`),
seq(fs, ` as any`))));
})));
})));
break;
case M.$tuple:
// assume dest is already void 0
decoderForTuple(p[0].map(unname), dest, recordFields, void 0);
break;
case M.$tuple_STAR_:
// assume dest is already void 0
decoderForTuple(p[0].map(unname), dest, recordFields, unname(p[1]));
break;
case M.$setof:
// assume dest is already void 0
emit(seq(`if (d.openSet()) `, collectBody(() => {
emit(seq(`let r: `, typeFor(p), ` | undefined = new _.KeyedSet()`));
emit(accumulateCompound(p[0],
() => [`r = void 0`],
(t) => [`r.add(${t})`]));
emit(`${dest} = r`);
})));
break;
case M.$dictof:
// assume dest is already void 0
emit(seq(`if (d.openDictionary()) `, collectBody(() => {
emit(seq(`let r: `, typeFor(p), ` | undefined = new _.KeyedDictionary()`));
emit(seq(`while (!d.closeCompound()) `, collectBody(() => {
emit(seq(`let K: undefined | `, typeFor(p[0]), ` = void 0`));
decoderFor(p[0], 'K');
emit(seq(`if (K === void 0) { r = void 0; break; }`));
emit(seq(`let V: undefined | `, typeFor(p[1]), ` = void 0`));
decoderFor(p[1], 'V');
emit(seq(`if (V === void 0) { r = void 0; break; }`));
emit(seq(`r.set(K, V)`));
})));
emit(seq(`${dest} = r`));
})));
break;
case M.$dict:
emit(seq(`${dest} = d.next()`));
emit(seq(
`if (${dest} !== void 0 && !(`, predicateFor(dest, p), `)) ${dest} = void 0`));
break;
default:
((_p: never) => {})(p);
throw new Error("Unreachable");
}
}
function typeFor(p: Pattern): Item {
switch (p.label) {
case M.$atom:
switch (p[0]) {
case M.$Boolean: return `boolean`;
case M.$Float: return `_.SingleFloat`;
case M.$Double: return `_.DoubleFloat`;
case M.$SignedInteger: return `number`;
case M.$String: return `string`;
case M.$ByteString: return `_.Bytes`;
case M.$Symbol: return `symbol`;
}
case M.$lit:
return `(typeof ${literal(p[0])})`;
case M.$ref:
return lookup(refPosition(p), p, env,
(_p) => p[1].description!,
(p) => typeFor(p),
(mod, modPath,_p) => {
imports.add([mod, modPath]);
return `${mod}.${p[1].description!}`;
});
case M.$or:
return opseq('never', ' | ', ... p[0].map(pp => typeFor(pp)));
case M.$and:
return opseq('_val', ' & ', ... p[0].map(pp => typeFor(pp)));
case M.$pointer:
return `_ptr`;
case M.$rec:
return seq('_.Record', anglebrackets(typeFor(p[0]), typeFor(p[1]), '_ptr'));
case M.$tuple:
return brackets(... p[0].map(pp => typeFor(unname(pp))));
case M.$tuple_STAR_:
if (p[0].length === 0) {
return seq('Array<', typeFor(unname(p[1])), '>');
} else {
return brackets(... p[0].map(pp => typeFor(unname(pp))),
seq('... Array<', typeFor(unname(p[1])), '>'));
}
case M.$setof:
return seq('_.KeyedSet', anglebrackets(typeFor(p[0]), '_ptr'));
case M.$dictof:
return seq('_.KeyedDictionary', anglebrackets(typeFor(p[0]), typeFor(p[1]), '_ptr'));
case M.$dict:
return parens(seq(
block(
... Array.from(p[0]).map(([k, vp]) =>
seq(`get(k: typeof ${literal(k)}): `, typeFor(vp))),
... Array.from(p[0]).map(([k, _vp]) =>
seq(`has(k: typeof ${literal(k)}): true`))),
' & _.Dictionary<_val, _ptr>'));
default:
((_p: never) => {})(p);
throw new Error("Unreachable");
}
}
function predicateFor(v: string, p: Pattern, recordOkAsTuple = false): Item {
switch (p.label) {
case M.$atom:
switch (p[0]) {
case M.$Boolean: return `typeof ${v} === 'boolean'`;
case M.$Float: return `_.Float.isSingle(${v})`;
case M.$Double: return `_.Float.isDouble(${v})`;
case M.$SignedInteger: return `typeof ${v} === 'number'`;
case M.$String: return `typeof ${v} === 'string'`;
case M.$ByteString: return `_.Bytes.isBytes(${v})`;
case M.$Symbol: return `typeof ${v} === 'symbol'`;
}
case M.$lit:
return `_.is(${v}, ${literal(p[0])})`;
case M.$ref:
return lookup(refPosition(p), p, env,
(_p) => `is${Ref._.name(p).description!}(${v})`,
(pp) => predicateFor(v, pp),
(mod, modPath, _p) => {
imports.add([mod, modPath]);
return `${mod}.is${Ref._.name(p).description!}(${v})`;
});
case M.$or: {
const alts = p[0];
const recs = alts.map(derefPattern);
if (recs.length > 1 && recs.every(pp => pp.label === M.$rec)) {
return seq(
`_.Record.isRecord<_val, _.Tuple<_val>, _ptr>(${v}) && `,
parens(opseq('false', ' || ',
... recs.map(r =>
(r.label !== M.$rec) ? '' : parens(seq(
predicateFor(`${v}.label`, r[0]),
' && ',
predicateFor(v, r[1], true)))))));
} else {
return opseq('false', ' || ', ... p[0].map(pp => predicateFor(v, pp)));
}
}
case M.$and:
return opseq('true', ' && ', ... p[0].map(pp => predicateFor(v, pp)));
case M.$pointer:
return `_.isPointer(${v})`;
case M.$rec:
return opseq('true', ' && ',
`_.Record.isRecord<_val, _.Tuple<_val>, _ptr>(${v})`,
predicateFor(`${v}.label`, p[0]),
predicateFor(v, p[1], true));
case M.$tuple:
return opseq('true', ' && ',
... (recordOkAsTuple ? []
: [`_.Array.isArray(${v})`, `!_.Record.isRecord<_val, _.Tuple<_val>, _ptr>(${v})`]),
`(${v}.length === ${p[0].length})`,
... p[0].map((pp, i) => predicateFor(`${v}[${i}]`, unname(pp))));
case M.$tuple_STAR_:
return opseq('true', ' && ',
... (recordOkAsTuple ? []
: [`_.Array.isArray(${v})`, `!_.Record.isRecord<_val, _.Tuple<_val>, _ptr>(${v})`]),
`(${v}.length >= ${p[0].length})`,
seq(p[0].length > 0 ? `${v}.slice(${p[0].length})` : v,
`.every(v => `,
parens(predicateFor('v', unname(p[1]))),
`)`),
... p[0].map((pp, i) => predicateFor(`${v}[${i}]`, unname(pp))));
case M.$setof:
return opseq('true', ' && ',
`_.Set.isSet<_val>(${v})`,
fnblock(
seq(`for (const vv of ${v}) `, block(
seq('if (!(', predicateFor('vv', p[0]), ')) return false'))),
seq('return true')));
case M.$dictof:
return opseq('true', ' && ',
`_.Dictionary.isDictionary<_val, _ptr>(${v})`,
fnblock(
seq(`for (const e of ${v}) `, block(
seq('if (!(', predicateFor('e[0]', p[0]), ')) return false'),
seq('if (!(', predicateFor('e[1]', p[1]), ')) return false'))),
seq('return true')));
case M.$dict:
return opseq('true', ' && ',
`_.Dictionary.isDictionary<_val, _ptr>(${v})`,
... Array.from(p[0]).map(([k, vp]) => {
const tmp = gentemp();
return parens(seq(
`(${tmp} = ${v}.get(${literal(k)})) !== void 0 && `,
predicateFor(tmp, vp)));
}));
default:
((_p: never) => {})(p);
throw new Error("Unreachable");
}
}
function unname(p: NamedPattern): Pattern {
return (p.label === M.$named) ? p[1] : p;
}
function fieldName(np: NamedPattern, index: number): string {
return (np.label === M.$named) ? np[0].description! : `_field${index}`;
}
function fieldEntry(np: NamedPattern, index: number): Item {
return seq(JSON.stringify(fieldName(np, index)), ': ', typeFor(unname(np)));
}
for (const [name0, pattern] of Schema._.details(schema).get(M.$definitions)) {
const name = name0 as symbol;
temps = [];
const recognizer = predicateFor('v', pattern);
if (pattern.label === M.$rec &&
pattern[0].label === M.$lit &&
pattern[1].label === M.$tuple)
{
types.push(
seq(`export const ${name.description!} = _.Record.makeConstructor<`,
braces(... pattern[1][0].map(fieldEntry)),
`, _ptr>()(${literal(pattern[0][0])}, `,
JSON.stringify(pattern[1][0].map(fieldName)), `);`));
}
types.push(
seq(`export type ${name.description!} = `, typeFor(pattern), `;`));
functions.push(
seq(`export function is${name.description!}`,
'(v: any): v is ', name.description!, ' ',
block(
... temps.length > 0 ? [seq('let ', commas(... temps), ': any')] : [],
seq('return ', recognizer))));
functions.push(
seq(`export function as${name.description!}`,
'(v: any): ', name.description!, ' ',
block(
seq(`if (!is${name.description!}(v)) `,
block(`throw new TypeError(\`Invalid ${name.description!}: \${_.stringify(v)}\`)`),
' else ',
block(`return v`)))));
functions.push(
seq(`export function decode${name.description!}`,
`(d: _.TypedDecoder<_ptr>): ${name.description!} | undefined `,
collectBody(() => {
emit(seq(`let result`));
decoderFor(pattern, 'result');
emit(seq(`return result`));
})));
}
types.push(seq('export type _ptr = ', pointerName === false ? 'never' : typeFor(pointerName), `;`));
types.push(`export type _val = _.Value<_ptr>;`);
functions.push(seq(`export const _decodePtr = `,
(pointerName === false
? '() => { throw new _.DecodeError("Pointers forbidden"); }'
: seq(`(d: _.TypedDecoder<_ptr>) => `,
collectBody(() => {
emit(seq(`let result`));
decoderFor(pointerName, 'result');
emit(seq(`return result`));
}))),
`;`));
const f = new Formatter();
f.write(`import * as _ from ${JSON.stringify(options.preservesModule ?? '@preserves/core')};\n`);
imports.forEach(([identifier, path]) => {
f.write(`import * as ${identifier} from ${JSON.stringify(path)};\n`);
});
f.newline();
const sortedLiterals = Array.from(literals);
sortedLiterals.sort((a, b) => a[1] < b[1] ? -1 : a[1] === b[1] ? 0 : 1);
for (const [lit, varname] of sortedLiterals) {
f.write(seq(`export const ${varname} = `, sourceCodeFor(lit), `;\n`));
}
f.newline();
types.forEach(t => {
f.write(t);
f.newline();
f.newline();
});
f.newline();
functions.forEach(p => {
f.write(p);
f.newline();
f.newline();
});
return f.toString();
}
export function stringSource(s: string) {
return JSON.stringify(s);
}
export function sourceCodeFor(v: Value<any>): Item {
return fold(v, {
boolean(b: boolean): Item { return b.toString(); },
single(f: number): Item { return f.toString(); },
double(f: number): Item { return f.toString(); },
integer(i: number): Item { return i.toString(); },
string(s: string): Item { return stringSource(s); },
bytes(b: Bytes): Item {
return seq(`Uint8Array.from(`, brackets(... Array.from(b).map(b => b.toString())), `)`);
},
symbol(s: symbol): Item { return `Symbol.for(${stringSource(s.description!)})`; },
record(r: Record<Value<any>, Tuple<Value<any>>, any>, k: Fold<any, Item>): Item {
return seq(`_.Record<_val, _.Tuple<_val>, _ptr>`, parens(k(r.label), brackets(... r.map(k))));
},
array(a: Array<Value<any>>, k: Fold<any, Item>): Item {
return brackets(... a.map(k));
},
set(s: Set<any>, k: Fold<any, Item>): Item {
return seq('new _.Set<_val>', parens(brackets(... Array.from(s).map(k))));
},
dictionary(d: Dictionary<Value<any>, any>, k: Fold<any, Item>): Item {
return seq('new _.Dictionary<_val, _ptr>', parens(brackets(... Array.from(d).map(([kk,vv]) =>
brackets(k(kk), k(vv))))));
},
annotated(a: Annotated<any>, k: Fold<any, Item>): Item {
return seq('_.annotate<_ptr>', parens(k(a.item), ... a.annotations.map(k)));
},
pointer(t: any, _k: Fold<any, Item>): Item {
throw new Error(preserves`Cannot emit source code for construction of pointer ${t}`);
},
});
}