From 6e3950cbc5660f1cf4e1ff3d5e07197e238fc58c Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Sat, 4 Nov 2023 16:10:08 +0100 Subject: [PATCH] pexprs.rkt --- .../racket/preserves/preserves/main.rkt | 30 ++- .../racket/preserves/preserves/pexprs.rkt | 229 ++++++++++++++++++ .../preserves/preserves/read-text-generic.rkt | 26 +- .../racket/preserves/preserves/read-text.rkt | 20 +- 4 files changed, 283 insertions(+), 22 deletions(-) create mode 100644 implementations/racket/preserves/preserves/pexprs.rkt diff --git a/implementations/racket/preserves/preserves/main.rkt b/implementations/racket/preserves/preserves/main.rkt index c7583fc..75f9fc2 100644 --- a/implementations/racket/preserves/preserves/main.rkt +++ b/implementations/racket/preserves/preserves/main.rkt @@ -7,6 +7,7 @@ (all-from-out "order.rkt") (all-from-out "embedded.rkt") (all-from-out "merge.rkt") + (all-from-out "pexprs.rkt") (all-from-out "read-binary.rkt") (all-from-out "read-text.rkt") @@ -18,7 +19,9 @@ detect-preserve-syntax read-preserve port->preserves - file->preserves) + file->preserves + port->pexprs + file->pexprs) (require racket/dict) (require racket/match) @@ -32,6 +35,7 @@ (require "order.rkt") (require "embedded.rkt") (require "merge.rkt") +(require "pexprs.rkt") (require "read-binary.rkt") (require "read-text.rkt") @@ -79,7 +83,7 @@ #:decode-embedded decode-embedded #:source path)))) -(define (port->preserves in-port +(define (port->preserves [in-port (current-input-port)] #:read-syntax? [read-syntax? #f] #:decode-embedded [decode-embedded #f] #:source [source (object-name in-port)]) @@ -89,3 +93,25 @@ #:decode-embedded decode-embedded #:source source)) in-port)) + +(define (file->pexprs path + #:read-syntax? [read-syntax? #f] + #:decode-embedded [decode-embedded #f]) + (call-with-input-file path (lambda (p) (port->pexprs p + #:read-syntax? read-syntax? + #:decode-embedded decode-embedded)))) + +(define (port->pexprs [in-port (current-input-port)] + #:read-syntax? [read-syntax? #f] + #:decode-embedded [decode-embedded #f] + #:source [source (object-name in-port)]) + (define vs + (port->list (lambda (in-port) + (read-pexpr in-port + #:read-syntax? read-syntax? + #:decode-embedded decode-embedded + #:source source)) + in-port)) + (if read-syntax? + vs + (remove-trailer vs))) diff --git a/implementations/racket/preserves/preserves/pexprs.rkt b/implementations/racket/preserves/preserves/pexprs.rkt new file mode 100644 index 0000000..88dce23 --- /dev/null +++ b/implementations/racket/preserves/preserves/pexprs.rkt @@ -0,0 +1,229 @@ +#lang racket/base + +(provide read-pexpr + string->pexpr + + pexpr->preserve + preserve->pexpr + + COMMA + SEMICOLON + TRAILER-ANCHOR + COLONS + + *COMMA* + *SEMICOLON* + *TRAILER-ANCHOR* + + RECORD + BLOCK + GROUP + SET + + remove-trailer) + +(require (for-syntax racket/base)) +(require racket/match) +(require racket/set) +(require "record.rkt") +(require "annotation.rkt") +(require "embedded.rkt") +(require "read-text-generic.rkt") +(require (only-in racket/list append-map)) + +(define *reader-name* 'read-pexpr) + +(define *COMMA* (record 'p '(|,|))) +(define *SEMICOLON* (record 'p '(|;|))) +(define *TRAILER-ANCHOR* (record 'a '())) + +(define-match-expander COMMA + (syntax-rules () [(_) (strip-annotations (== *COMMA*) #:depth 3)]) + (lambda (stx) + (syntax-case stx () + [(_) #'*COMMA*] + [_ #'(lambda () *COMMA*)]))) + +(define-match-expander SEMICOLON + (syntax-rules () [(_) (strip-annotations (== *SEMICOLON*) #:depth 3)]) + (lambda (stx) + (syntax-case stx () + [(_) #'*SEMICOLON*] + [_ #'(lambda () *SEMICOLON*)]))) + +(define-match-expander TRAILER-ANCHOR + (syntax-rules () [(_) (strip-annotations (== *TRAILER-ANCHOR*) #:depth 2)]) + (lambda (stx) + (syntax-case stx () + [(_) #'*TRAILER-ANCHOR*] + [_ #'(lambda () *TRAILER-ANCHOR*)]))) + +(define-match-expander RECORD + (syntax-rules () [(_ ps ...) (strip-annotations (record 'r (list ps ...)) #:depth 2)]) + (lambda (stx) + (syntax-case stx () + [(_ v ...) #'(record 'r (list v ...))] + [_ #'(lambda vs (record 'r vs))]))) + +(define-match-expander BLOCK + (syntax-rules () [(_ ps ...) (strip-annotations (record 'b (list ps ...)) #:depth 2)]) + (lambda (stx) + (syntax-case stx () + [(_ v ...) #'(record 'b (list v ...))] + [_ #'(lambda vs (record 'b vs))]))) + +(define-match-expander GROUP + (syntax-rules () [(_ ps ...) (strip-annotations (record 'g (list ps ...)) #:depth 2)]) + (lambda (stx) + (syntax-case stx () + [(_ v ...) #'(record 'g (list v ...))] + [_ #'(lambda vs (record 'g vs))]))) + +(define-match-expander SET + (syntax-rules () [(_ ps ...) (strip-annotations (record 's (list ps ...)) #:depth 2)]) + (lambda (stx) + (syntax-case stx () + [(_ v ...) #'(record 's (list v ...))] + [_ #'(lambda vs (record 's vs))]))) + +(define (colon-sym-length s) + (and (symbol? s) + (let ((s (string->list (symbol->string s)))) + (and (andmap (lambda (c) (eqv? c #\:)) s) + (length s))))) + +(define (colon-sym n) (string->symbol (make-string n #\:))) +(define (make-colons n) (record 'p (list (colon-sym n)))) + +(define-match-expander COLONS + (syntax-rules () [(_ n) (strip-annotations (record 'p (list (app colon-sym-length n))) #:depth 3)]) + (lambda (stx) + (syntax-case stx () + [(_ n) (let ((s (string->symbol (make-string (syntax->datum #'n) #\:)))) #`(record 'p '(#,s)))] + [_ #'make-colons]))) + +(define (remove-trailer ps) + (filter (match-lambda [(TRAILER-ANCHOR) #f] [_ #t]) ps)) + +(define pexpr-reader + (make-preserve-text-reader + #:reader-name *reader-name* + #:read-annotated-value (lambda (in-port source next parse-error*) + (lambda () + (skip-whitespace in-port) + (match (peek-char in-port) + [(or (? eof-object?) #\] #\> #\} #\)) *TRAILER-ANCHOR*] + [_ (next)]))) + #:on-hash (lambda (in-port source next parse-error* default) + (match-lambda + [#\{ (read-sequence 's in-port source next #\})] + [c (default c)])) + #:on-char (lambda (in-port source next parse-error* default) + (match-lambda + [#\< (read-sequence 'r in-port source next #\>)] + [#\[ (read-sequence #f in-port source next #\])] + [#\{ (read-sequence 'b in-port source next #\})] + [#\( (read-sequence 'g in-port source next #\))] + [#\, *COMMA*] + [#\; *SEMICOLON*] + [#\: (let loop ((acc '(#\:))) + (match (peek-char in-port) + [#\: (loop (cons (read-char in-port) acc))] + [_ (record 'p (list (string->symbol (list->string (reverse acc)))))]))] + [c (default c)])))) + +(define string->pexpr (make-preserve-string-reader *reader-name* pexpr-reader)) + +(define (read-sequence record-label in-port source next terminator-char) + (let loop ((acc '())) + (skip-whitespace in-port) + (match (eof-guard *reader-name* in-port source (peek-char in-port)) + [(== terminator-char) + (read-char in-port) + (if record-label + (record record-label (reverse acc)) + (reverse acc))] + [_ + (match (next) + [(record 'a '()) ;; skip bare annotation-anchors + ;; NB. Not matching (TRAILER-ANCHOR)! That would skip *non-bare* anchors too! + (loop acc)] + [v (loop (cons v acc))])]))) + +(define (read-pexpr [in-port (current-input-port)] + #:read-syntax? [read-syntax? #f] + #:decode-embedded [decode-embedded #f] + #:source [source (object-name in-port)]) + (pexpr-reader in-port source read-syntax? decode-embedded)) + +;;--------------------------------------------------------------------------- + +(define (uncomma p + #:map-embedded [map-embedded (lambda (walk v) (walk v))]) + (define (walk-seq ps) + (map walk (filter (match-lambda [(COMMA) #f] [_ #t]) ps))) + + (define (walk p) + (match p + [(? list? ps) (walk-seq ps)] + [(RECORD ps ...) (apply RECORD (walk-seq ps))] + [(GROUP ps ...) (apply GROUP (walk-seq ps))] + [(BLOCK ps ...) (apply BLOCK (walk-seq ps))] + [(SET ps ...) (apply SET (walk-seq ps))] + [(embedded v) (map-embedded walk v)] + [(annotated as loc v) (annotated (map walk as) loc (walk v))] + [(COMMA) (error 'uncomma "Cannot remove commas from term with comma outside container")] + [v v])) + + (walk p)) + +(define (pexpr->preserve p + #:discard-trailers? [discard-trailers? #f] + #:map-embedded [map-embedded (lambda (walk v) (walk v))]) + (define untrailer (if discard-trailers? remove-trailer values)) + (let walk ((p (uncomma p #:map-embedded map-embedded))) + (match p + [(list ps ...) + (map walk (untrailer ps))] + [(RECORD l ps ...) (record (walk l) (map walk (untrailer ps)))] + [(GROUP _ ...) (error 'pexpr->preserve "Cannot convert uninterpreted grouping")] + [(BLOCK ps ...) + (let loop ((acc (hash)) (ps (untrailer ps))) + (match ps + [(list) + acc] + [(list k0 (COLONS 1) v more ...) + (define k (walk k0)) + (if (hash-has-key? acc k) + (error 'pexpr->preserve "Duplicate key in dictionary: ~v" k) + (loop (hash-set acc k (walk v)) more))] + [_ + (error 'pexpr->preserve "Cannot convert invalid dictionary")]))] + [(SET ps ...) + (let loop ((acc (set)) (ps (untrailer ps))) + (match ps + ['() acc] + [(cons v0 more) + (define v (walk v0)) + (if (set-member? acc v) + (error 'pexpr->preserve "Duplicate item in set: ~v" v) + (loop (set-add acc v) more))]))] + [(SEMICOLON) (error 'pexpr->preserve "Cannot convert semicolon")] + [(COLONS _) (error 'pexpr->preserve "Cannot convert colons")] + [(TRAILER-ANCHOR) (error 'pexpr->preserve "Cannot convert trailer")] + [(embedded v) (map-embedded walk v)] + [(annotated as loc v) (annotated (map walk as) loc (walk v))] + [v v]))) + +(define (preserve->pexpr v #:map-embedded [map-embedded (lambda (walk v) (walk v))]) + (let walk ((v v)) + (match v + [(list vs ...) (map walk vs)] + [(? set?) (record 's (map walk (set->list v)))] + [(hash-table (kk vv) ...) + (record 'b (append-map (lambda (kk vv) (list (walk kk) (COLONS 1) (walk vv))) + kk vv))] + [(record l fs) (record 'r (map walk (cons l fs)))] + [(embedded v) (map-embedded walk v)] + [(annotated as loc v) (annotated (map walk as) loc (walk v))] + [v v]))) diff --git a/implementations/racket/preserves/preserves/read-text-generic.rkt b/implementations/racket/preserves/preserves/read-text-generic.rkt index bdc88ce..d66bccf 100644 --- a/implementations/racket/preserves/preserves/read-text-generic.rkt +++ b/implementations/racket/preserves/preserves/read-text-generic.rkt @@ -1,6 +1,7 @@ #lang racket/base (provide make-preserve-text-reader + make-preserve-string-reader parse-error skip-whitespace @@ -18,6 +19,21 @@ (define PIPE #\|) +(define (make-preserve-string-reader reader-name reader) + (lambda (s + #:read-syntax? [read-syntax? #f] + #:decode-embedded [decode-embedded #f] + #:source [source ""]) + (define p (open-input-string s)) + (when read-syntax? (port-count-lines! p)) + (define v (reader p source read-syntax? decode-embedded)) + (when (eof-object? v) + (parse-error #:raise-proc raise-read-eof-error reader-name p source "Unexpected end of input")) + (skip-whitespace p) + (when (not (eof-object? (peek-char p))) + (parse-error reader-name p source "Unexpected following text")) + v)) + (define (parse-error #:raise-proc [raise-proc raise-read-error] reader-name i source fmt . args) (define-values [line column pos] (port-next-location i)) (raise-proc (format "~a: ~a" reader-name (apply format fmt args)) @@ -48,7 +64,8 @@ (define ((make-preserve-text-reader #:reader-name reader-name #:on-char on-char0 - #:on-hash on-hash0) + #:on-hash on-hash0 + #:read-annotated-value read-annotated-value0) in-port source read-syntax? decode-embedded0) (define read-annotations? read-syntax?) (define decode-embedded (or decode-embedded0 @@ -104,10 +121,13 @@ (on-hash0 in-port source next parse-error* (lambda (c) (parse-error* "Invalid # syntax: ~v" c)))) + (define read-annotated-value + (read-annotated-value0 in-port source next parse-error*)) + (define (annotate-next-with a) (if read-annotations? - (annotate (next) a) - (next))) + (annotate (read-annotated-value) a) + (read-annotated-value))) ;;--------------------------------------------------------------------------- ;; Source location tracking diff --git a/implementations/racket/preserves/preserves/read-text.rkt b/implementations/racket/preserves/preserves/read-text.rkt index 5a46450..1099232 100644 --- a/implementations/racket/preserves/preserves/read-text.rkt +++ b/implementations/racket/preserves/preserves/read-text.rkt @@ -11,26 +11,10 @@ (define *reader-name* 'read-preserve/text) -(define (string->preserve s - #:read-syntax? [read-syntax? #f] - #:decode-embedded [decode-embedded #f] - #:source [source ""]) - (define p (open-input-string s)) - (when read-syntax? (port-count-lines! p)) - (define v (read-preserve/text p - #:read-syntax? read-syntax? - #:decode-embedded decode-embedded - #:source source)) - (when (eof-object? v) - (parse-error #:raise-proc raise-read-eof-error *reader-name* p source "Unexpected end of input")) - (skip-whitespace p) - (when (not (eof-object? (peek-char p))) - (parse-error *reader-name* p source "Unexpected text following preserve")) - v) - (define text-reader (make-preserve-text-reader #:reader-name *reader-name* + #:read-annotated-value (lambda (in-port source next parse-error*) next) #:on-hash (lambda (in-port source next parse-error* default) (match-lambda [#\{ (sequence-fold in-port @@ -62,6 +46,8 @@ [c (default c)])))) +(define string->preserve (make-preserve-string-reader *reader-name* text-reader)) + (define (sequence-fold in-port source next skip-ws acc accumulate-one finish terminator-char) (let loop ((acc acc)) (skip-ws in-port)