Split apart text reader in prep for p-expressions
This commit is contained in:
parent
d540ee6faf
commit
cd4f8e410f
|
@ -0,0 +1,290 @@
|
||||||
|
#lang racket/base
|
||||||
|
|
||||||
|
(provide make-preserve-text-reader
|
||||||
|
|
||||||
|
parse-error
|
||||||
|
skip-whitespace
|
||||||
|
skip-whitespace/commas
|
||||||
|
eof-guard)
|
||||||
|
|
||||||
|
(require racket/match)
|
||||||
|
(require "embedded.rkt")
|
||||||
|
(require "annotation.rkt")
|
||||||
|
(require "float.rkt")
|
||||||
|
(require "float-bytes.rkt")
|
||||||
|
(require syntax/readerr)
|
||||||
|
(require (only-in file/sha1 hex-string->bytes))
|
||||||
|
(require (only-in net/base64 base64-decode))
|
||||||
|
|
||||||
|
(define PIPE #\|)
|
||||||
|
|
||||||
|
(define (parse-error #:raise-proc [raise-proc raise-read-error] reader-name i source fmt . args)
|
||||||
|
(define-values [line column pos] (port-next-location i))
|
||||||
|
(raise-proc (format "~a: ~a" reader-name (apply format fmt args))
|
||||||
|
source
|
||||||
|
line
|
||||||
|
column
|
||||||
|
pos
|
||||||
|
#f))
|
||||||
|
|
||||||
|
(define (skip-whitespace i)
|
||||||
|
(regexp-match? #px#"^\\s*" i)) ;; side effect: consumes matched portion of input
|
||||||
|
|
||||||
|
(define (skip-whitespace/commas i)
|
||||||
|
(regexp-match? #px#"^(\\s|,)*" i)) ;; side effect: consumes matched portion of input
|
||||||
|
|
||||||
|
(define (eof-guard reader-name in-port source v)
|
||||||
|
(when (eof-object? v)
|
||||||
|
(parse-error #:raise-proc raise-read-eof-error reader-name in-port source
|
||||||
|
"Unexpected end of input"))
|
||||||
|
v)
|
||||||
|
|
||||||
|
(define (next-char reader-name in-port source)
|
||||||
|
(eof-guard reader-name in-port source (read-char in-port)))
|
||||||
|
|
||||||
|
(define-match-expander px
|
||||||
|
(syntax-rules ()
|
||||||
|
[(_ re pat) (app (lambda (v) (regexp-try-match re v)) pat)]))
|
||||||
|
|
||||||
|
(define ((make-preserve-text-reader #:reader-name reader-name
|
||||||
|
#:on-char on-char0
|
||||||
|
#:on-hash on-hash0)
|
||||||
|
in-port source read-syntax? decode-embedded0)
|
||||||
|
(define read-annotations? read-syntax?)
|
||||||
|
(define decode-embedded (or decode-embedded0
|
||||||
|
(lambda (v)
|
||||||
|
(error reader-name "No decode-embedded function supplied"))))
|
||||||
|
|
||||||
|
;;---------------------------------------------------------------------------
|
||||||
|
;; Basic I/O
|
||||||
|
|
||||||
|
(define (parse-error* fmt . args)
|
||||||
|
(apply parse-error reader-name in-port source fmt args))
|
||||||
|
|
||||||
|
(define (next-char*) (next-char reader-name in-port source))
|
||||||
|
|
||||||
|
(define (skip-whitespace*) (skip-whitespace in-port))
|
||||||
|
(define (skip-whitespace/commas*) (skip-whitespace/commas in-port))
|
||||||
|
|
||||||
|
;;---------------------------------------------------------------------------
|
||||||
|
;; Core of parser
|
||||||
|
|
||||||
|
(define (next) (wrap (pos) (next*)))
|
||||||
|
|
||||||
|
(define (next*)
|
||||||
|
(skip-whitespace*)
|
||||||
|
(match (next-char*)
|
||||||
|
[#\" (read-string #\")]
|
||||||
|
[(== PIPE) (string->symbol (read-string PIPE))]
|
||||||
|
|
||||||
|
[#\@ (annotate-next-with (next))]
|
||||||
|
|
||||||
|
[#\# (match (next-char*)
|
||||||
|
[(or #\space #\tab) (annotate-next-with (read-comment-line))]
|
||||||
|
[(or #\newline #\return) (annotate-next-with "")]
|
||||||
|
[#\f (unless (delimiter-follows?) (parse-error* "Delimiter must follow #f")) #f]
|
||||||
|
[#\t (unless (delimiter-follows?) (parse-error* "Delimiter must follow #t")) #t]
|
||||||
|
[#\" (read-literal-binary)]
|
||||||
|
[#\x (match (next-char*)
|
||||||
|
[#\" (read-hex-binary '())]
|
||||||
|
[#\f (read-hex-float 'float)]
|
||||||
|
[#\d (read-hex-float 'double)]
|
||||||
|
[c (parse-error* "Invalid #x syntax: ~v" c)])]
|
||||||
|
[#\[ (read-base64-binary '())]
|
||||||
|
[#\! (embedded (decode-embedded (next)))]
|
||||||
|
[c (on-hash c)])]
|
||||||
|
|
||||||
|
[c (on-char c)]))
|
||||||
|
|
||||||
|
(define on-char
|
||||||
|
(on-char0 in-port source next parse-error*
|
||||||
|
(lambda (c) (read-raw-symbol-or-number (list c)))))
|
||||||
|
|
||||||
|
(define on-hash
|
||||||
|
(on-hash0 in-port source next parse-error*
|
||||||
|
(lambda (c) (parse-error* "Invalid # syntax: ~v" c))))
|
||||||
|
|
||||||
|
(define (annotate-next-with a)
|
||||||
|
(if read-annotations?
|
||||||
|
(annotate (next) a)
|
||||||
|
(next)))
|
||||||
|
|
||||||
|
;;---------------------------------------------------------------------------
|
||||||
|
;; Source location tracking
|
||||||
|
|
||||||
|
(define pos
|
||||||
|
(if read-syntax?
|
||||||
|
(lambda ()
|
||||||
|
(define-values (line column position) (port-next-location in-port))
|
||||||
|
(list line column position))
|
||||||
|
(lambda () #f)))
|
||||||
|
|
||||||
|
(define wrap
|
||||||
|
(if read-syntax?
|
||||||
|
(lambda (pos0 v)
|
||||||
|
(if (annotated? v)
|
||||||
|
v
|
||||||
|
(let ()
|
||||||
|
(match-define (list line0 column0 position0) pos0)
|
||||||
|
(match-define (list line1 column1 position1) (pos))
|
||||||
|
(define loc (and line0 column0 position0 position1
|
||||||
|
(srcloc source line0 column0 position0 (- position1 position0))))
|
||||||
|
(annotated '() loc v))))
|
||||||
|
(lambda (pos0 v) v)))
|
||||||
|
|
||||||
|
;;---------------------------------------------------------------------------
|
||||||
|
;; String-like things
|
||||||
|
|
||||||
|
(define (read-stringlike xform-item finish terminator-char hexescape-char hexescape-proc)
|
||||||
|
(let loop ((acc '()))
|
||||||
|
(match (next-char*)
|
||||||
|
[(== terminator-char) (finish (reverse acc))]
|
||||||
|
[#\\ (match (next-char*)
|
||||||
|
[(== hexescape-char) (loop (cons (hexescape-proc) acc))]
|
||||||
|
[(and c (or (== terminator-char) #\\ #\/)) (loop (cons (xform-item c) acc))]
|
||||||
|
[#\b (loop (cons (xform-item #\u08) acc))]
|
||||||
|
[#\f (loop (cons (xform-item #\u0C) acc))]
|
||||||
|
[#\n (loop (cons (xform-item #\u0A) acc))]
|
||||||
|
[#\r (loop (cons (xform-item #\u0D) acc))]
|
||||||
|
[#\t (loop (cons (xform-item #\u09) acc))]
|
||||||
|
[c (parse-error* "Invalid escape code \\~a" c)])]
|
||||||
|
[c (loop (cons (xform-item c) acc))])))
|
||||||
|
|
||||||
|
(define (read-string terminator-char)
|
||||||
|
(read-stringlike values
|
||||||
|
list->string
|
||||||
|
terminator-char
|
||||||
|
#\u
|
||||||
|
(lambda ()
|
||||||
|
(integer->char
|
||||||
|
(match in-port
|
||||||
|
[(px #px#"^[a-fA-F0-9]{4}" (list hexdigits))
|
||||||
|
(define n1 (string->number (bytes->string/utf-8 hexdigits) 16))
|
||||||
|
(if (<= #xd800 n1 #xdfff) ;; surrogate pair
|
||||||
|
(if (>= n1 #xdc00)
|
||||||
|
(parse-error* "Bad first half of surrogate pair")
|
||||||
|
(match in-port
|
||||||
|
[(px #px#"^\\\\u([a-fA-F0-9]{4})" (list _ hexdigits2))
|
||||||
|
(define n2 (string->number (bytes->string/utf-8 hexdigits2) 16))
|
||||||
|
(if (<= #xdc00 n2 #xdfff)
|
||||||
|
(+ (arithmetic-shift (- n1 #xd800) 10)
|
||||||
|
(- n2 #xdc00)
|
||||||
|
#x10000)
|
||||||
|
(parse-error* "Bad second half of surrogate pair"))]
|
||||||
|
[_ (parse-error* "Missing second half of surrogate pair")]))
|
||||||
|
n1)]
|
||||||
|
[_ (parse-error* "Bad string \\u escape")])))))
|
||||||
|
|
||||||
|
(define (read-literal-binary)
|
||||||
|
(read-stringlike (lambda (c)
|
||||||
|
(define b (char->integer c))
|
||||||
|
(when (>= b 256)
|
||||||
|
(parse-error* "Invalid code point ~a (~v) in literal binary" b c))
|
||||||
|
b)
|
||||||
|
list->bytes
|
||||||
|
#\"
|
||||||
|
#\x
|
||||||
|
(lambda ()
|
||||||
|
(match in-port
|
||||||
|
[(px #px#"^[a-fA-F0-9]{2}" (list hexdigits))
|
||||||
|
(string->number (bytes->string/utf-8 hexdigits) 16)]
|
||||||
|
[_ (parse-error* "Bad binary \\x escape")]))))
|
||||||
|
|
||||||
|
;;---------------------------------------------------------------------------
|
||||||
|
;; Hex-encoded ByteStrings
|
||||||
|
|
||||||
|
(define (hexdigit? ch)
|
||||||
|
(or (and (char>=? ch #\A) (char<=? ch #\F))
|
||||||
|
(and (char>=? ch #\a) (char<=? ch #\f))
|
||||||
|
(and (char>=? ch #\0) (char<=? ch #\9))))
|
||||||
|
|
||||||
|
(define (read-hex-binary acc)
|
||||||
|
(skip-whitespace*)
|
||||||
|
(define ch (next-char*))
|
||||||
|
(cond [(eqv? ch #\")
|
||||||
|
(hex-string->bytes (list->string (reverse acc)))]
|
||||||
|
[(hexdigit? ch)
|
||||||
|
(define ch2 (next-char*))
|
||||||
|
(when (not (hexdigit? ch2))
|
||||||
|
(parse-error* "Hex-encoded binary digits must come in pairs"))
|
||||||
|
(read-hex-binary (cons ch2 (cons ch acc)))]
|
||||||
|
[else
|
||||||
|
(parse-error* "Invalid hex character")]))
|
||||||
|
|
||||||
|
;;---------------------------------------------------------------------------
|
||||||
|
;; Hex-encoded floating point numbers
|
||||||
|
|
||||||
|
(define (read-hex-float precision)
|
||||||
|
(unless (eqv? (next-char*) #\")
|
||||||
|
(parse-error* "Missing open-double-quote in hex-encoded floating-point number"))
|
||||||
|
(define bs (read-hex-binary '()))
|
||||||
|
(unless (= (bytes-length bs) (match precision ['float 4] ['double 8]))
|
||||||
|
(parse-error* "Incorrect number of bytes in hex-encoded floating-point number"))
|
||||||
|
(match precision
|
||||||
|
['float (bytes->float bs)]
|
||||||
|
['double (bytes->double bs)]))
|
||||||
|
|
||||||
|
;;---------------------------------------------------------------------------
|
||||||
|
;; Base64-encoded ByteStrings
|
||||||
|
|
||||||
|
(define (read-base64-binary acc)
|
||||||
|
(skip-whitespace*)
|
||||||
|
(define ch (next-char*))
|
||||||
|
(cond [(eqv? ch #\])
|
||||||
|
(base64-decode (string->bytes/latin-1 (list->string (reverse acc))))]
|
||||||
|
[(char=? ch #\-) (read-base64-binary (cons #\+ acc))]
|
||||||
|
[(char=? ch #\_) (read-base64-binary (cons #\/ acc))]
|
||||||
|
[(or (and (char>=? ch #\A) (char<=? ch #\Z))
|
||||||
|
(and (char>=? ch #\a) (char<=? ch #\z))
|
||||||
|
(and (char>=? ch #\0) (char<=? ch #\9))
|
||||||
|
(memv ch '(#\+ #\/ #\=)))
|
||||||
|
(read-base64-binary (cons ch acc))]
|
||||||
|
[else
|
||||||
|
(parse-error* "Invalid base64 character")]))
|
||||||
|
|
||||||
|
;;---------------------------------------------------------------------------
|
||||||
|
;; Comments
|
||||||
|
|
||||||
|
(define (read-comment-line)
|
||||||
|
(define pos0 (pos))
|
||||||
|
(let loop ((acc '()))
|
||||||
|
(match (next-char*)
|
||||||
|
[(or #\newline #\return)
|
||||||
|
(wrap pos0 (list->string (reverse acc)))]
|
||||||
|
[c (loop (cons c acc))])))
|
||||||
|
|
||||||
|
;;---------------------------------------------------------------------------
|
||||||
|
;; "Raw" symbols and numbers
|
||||||
|
|
||||||
|
(define (delimiter-follows?)
|
||||||
|
(define c (peek-char in-port))
|
||||||
|
(or (eof-object? c)
|
||||||
|
(char-whitespace? c)
|
||||||
|
(eqv? c PIPE)
|
||||||
|
(memv c '(#\< #\> #\[ #\] #\{ #\} #\( #\)
|
||||||
|
#\# #\: #\" #\@ #\; #\,))))
|
||||||
|
|
||||||
|
(define (read-raw-symbol-or-number acc)
|
||||||
|
(if (delimiter-follows?)
|
||||||
|
(let ((input (list->string (reverse acc))))
|
||||||
|
(or (analyze-number input)
|
||||||
|
(string->symbol input)))
|
||||||
|
(read-raw-symbol-or-number (cons (read-char in-port) acc))))
|
||||||
|
|
||||||
|
(define (analyze-number input)
|
||||||
|
(match input
|
||||||
|
[(pregexp #px"^([-+]?\\d+)(((\\.\\d+([eE][-+]?\\d+)?)|([eE][-+]?\\d+))([fF]?))?$"
|
||||||
|
(list _ whole _ frac _ _ _ f))
|
||||||
|
(define n (string->number (if frac (string-append whole frac) whole)))
|
||||||
|
(cond [(not n) #f]
|
||||||
|
[(and f (positive? (string-length f))) (float n)]
|
||||||
|
[else n])]
|
||||||
|
[_ #f]))
|
||||||
|
|
||||||
|
;;---------------------------------------------------------------------------
|
||||||
|
;; Main entry point to parser
|
||||||
|
|
||||||
|
(skip-whitespace*)
|
||||||
|
(match (peek-char in-port)
|
||||||
|
[(? eof-object?) eof]
|
||||||
|
[_ (next)]))
|
|
@ -5,29 +5,11 @@
|
||||||
|
|
||||||
(require racket/match)
|
(require racket/match)
|
||||||
(require racket/set)
|
(require racket/set)
|
||||||
(require "embedded.rkt")
|
|
||||||
(require "annotation.rkt")
|
|
||||||
(require "read-binary.rkt")
|
|
||||||
(require "record.rkt")
|
(require "record.rkt")
|
||||||
(require "float.rkt")
|
(require "read-text-generic.rkt")
|
||||||
(require "float-bytes.rkt")
|
|
||||||
(require syntax/readerr)
|
(require syntax/readerr)
|
||||||
(require (only-in file/sha1 hex-string->bytes))
|
|
||||||
(require (only-in net/base64 base64-decode))
|
|
||||||
|
|
||||||
(define PIPE #\|)
|
(define *reader-name* 'read-preserve/text)
|
||||||
|
|
||||||
(define (parse-error* #:raise-proc [raise-proc raise-read-error] i source fmt . args)
|
|
||||||
(define-values [line column pos] (port-next-location i))
|
|
||||||
(raise-proc (format "read-preserve: ~a" (apply format fmt args))
|
|
||||||
source
|
|
||||||
line
|
|
||||||
column
|
|
||||||
pos
|
|
||||||
#f))
|
|
||||||
|
|
||||||
(define (default-decode-embedded v)
|
|
||||||
(error 'read-preserve/text "No decode-embedded function supplied"))
|
|
||||||
|
|
||||||
(define (string->preserve s
|
(define (string->preserve s
|
||||||
#:read-syntax? [read-syntax? #f]
|
#:read-syntax? [read-syntax? #f]
|
||||||
|
@ -40,302 +22,74 @@
|
||||||
#:decode-embedded decode-embedded
|
#:decode-embedded decode-embedded
|
||||||
#:source source))
|
#:source source))
|
||||||
(when (eof-object? v)
|
(when (eof-object? v)
|
||||||
(parse-error* #:raise-proc raise-read-eof-error p source "Unexpected end of input"))
|
(parse-error #:raise-proc raise-read-eof-error *reader-name* p source "Unexpected end of input"))
|
||||||
(skip-whitespace* p)
|
(skip-whitespace p)
|
||||||
(when (not (eof-object? (peek-char p)))
|
(when (not (eof-object? (peek-char p)))
|
||||||
(parse-error* p source "Unexpected text following preserve"))
|
(parse-error *reader-name* p source "Unexpected text following preserve"))
|
||||||
v)
|
v)
|
||||||
|
|
||||||
(define (skip-whitespace* i)
|
(define text-reader
|
||||||
(regexp-match? #px#"^\\s*" i)) ;; side effect: consumes matched portion of input
|
(make-preserve-text-reader
|
||||||
|
#:reader-name *reader-name*
|
||||||
|
#:on-hash (lambda (in-port source next parse-error* default)
|
||||||
|
(match-lambda
|
||||||
|
[#\{ (sequence-fold in-port
|
||||||
|
source
|
||||||
|
next
|
||||||
|
skip-whitespace/commas
|
||||||
|
(set)
|
||||||
|
(lambda (s e)
|
||||||
|
(when (set-member? s e)
|
||||||
|
(parse-error* "Duplicate set element: ~v" e))
|
||||||
|
(set-add s e))
|
||||||
|
values
|
||||||
|
#\})]
|
||||||
|
[c (default c)]))
|
||||||
|
#:on-char (lambda (in-port source next parse-error* default)
|
||||||
|
(match-lambda
|
||||||
|
[#\< (match (read-sequence in-port source next skip-whitespace #\>)
|
||||||
|
['() (parse-error* "Missing record label")]
|
||||||
|
[(cons label fields) (record label fields)])]
|
||||||
|
[#\[ (read-sequence in-port source next skip-whitespace/commas #\])]
|
||||||
|
[#\{ (read-dictionary in-port source next parse-error*)]
|
||||||
|
|
||||||
(define-match-expander px
|
[#\; (parse-error* "Semicolon is reserved syntax")]
|
||||||
(syntax-rules ()
|
[#\: (parse-error* "Unexpected key/value separator between items")]
|
||||||
[(_ re pat) (app (lambda (v) (regexp-try-match re v)) pat)]))
|
[#\> (parse-error* "Unexpected >")]
|
||||||
|
[#\] (parse-error* "Unexpected ]")]
|
||||||
|
[#\} (parse-error* "Unexpected }")]
|
||||||
|
[#\, (parse-error* "Unexpected ,")]
|
||||||
|
|
||||||
|
[c (default c)]))))
|
||||||
|
|
||||||
|
(define (sequence-fold in-port source next skip-ws acc accumulate-one finish terminator-char)
|
||||||
|
(let loop ((acc acc))
|
||||||
|
(skip-ws in-port)
|
||||||
|
(match (eof-guard *reader-name* in-port source (peek-char in-port))
|
||||||
|
[(== terminator-char) (read-char in-port) (finish acc)]
|
||||||
|
[_ (loop (accumulate-one acc (next)))])))
|
||||||
|
|
||||||
|
(define (read-sequence in-port source next skip-ws terminator)
|
||||||
|
(sequence-fold in-port source next skip-ws '() (lambda (acc v) (cons v acc)) reverse terminator))
|
||||||
|
|
||||||
|
(define (read-dictionary in-port source next parse-error*)
|
||||||
|
(sequence-fold in-port
|
||||||
|
source
|
||||||
|
next
|
||||||
|
skip-whitespace/commas
|
||||||
|
(hash)
|
||||||
|
(lambda (acc k)
|
||||||
|
(skip-whitespace in-port)
|
||||||
|
(match (peek-char in-port)
|
||||||
|
[#\: (read-char in-port)
|
||||||
|
(when (hash-has-key? acc k) (parse-error* "Duplicate key: ~v" k))
|
||||||
|
(hash-set acc k (next))]
|
||||||
|
[_ (parse-error* "Missing expected key/value separator")]))
|
||||||
|
values
|
||||||
|
#\}))
|
||||||
|
|
||||||
(define (read-preserve/text [in-port (current-input-port)]
|
(define (read-preserve/text [in-port (current-input-port)]
|
||||||
#:read-syntax? [read-syntax? #f]
|
#:read-syntax? [read-syntax? #f]
|
||||||
#:decode-embedded [decode-embedded0 #f]
|
#:decode-embedded [decode-embedded #f]
|
||||||
#:source [source (object-name in-port)])
|
#:source [source (object-name in-port)])
|
||||||
(define read-annotations? read-syntax?)
|
(text-reader in-port source read-syntax? decode-embedded))
|
||||||
(define decode-embedded (or decode-embedded0 default-decode-embedded))
|
|
||||||
|
|
||||||
;;---------------------------------------------------------------------------
|
|
||||||
;; Core of parser
|
|
||||||
|
|
||||||
(define (next) (wrap (pos) (next*)))
|
|
||||||
|
|
||||||
(define (next*)
|
|
||||||
(skip-whitespace)
|
|
||||||
(match (next-char)
|
|
||||||
[#\" (read-string #\")]
|
|
||||||
[(== PIPE) (string->symbol (read-string PIPE))]
|
|
||||||
|
|
||||||
[#\@ (annotate-next-with (next))]
|
|
||||||
|
|
||||||
[#\; (parse-error "Semicolon is reserved syntax")]
|
|
||||||
[#\: (parse-error "Unexpected key/value separator between items")]
|
|
||||||
|
|
||||||
[#\# (match (next-char)
|
|
||||||
[(or #\space #\tab) (annotate-next-with (read-comment-line))]
|
|
||||||
[(or #\newline #\return) (annotate-next-with "")]
|
|
||||||
[#\f (unless (delimiter-follows?) (parse-error "Delimiter must follow #f")) #f]
|
|
||||||
[#\t (unless (delimiter-follows?) (parse-error "Delimiter must follow #t")) #t]
|
|
||||||
[#\{ (sequence-fold #t (set) set-add* values #\})]
|
|
||||||
[#\" (read-literal-binary)]
|
|
||||||
[#\x (match (next-char)
|
|
||||||
[#\" (read-hex-binary '())]
|
|
||||||
[#\f (read-hex-float 'float)]
|
|
||||||
[#\d (read-hex-float 'double)]
|
|
||||||
[c (parse-error "Invalid #x syntax: ~v" c)])]
|
|
||||||
[#\[ (read-base64-binary '())]
|
|
||||||
[#\! (embedded (decode-embedded (next)))]
|
|
||||||
[c (parse-error "Invalid # syntax: ~v" c)])]
|
|
||||||
|
|
||||||
[#\< (match (read-sequence #\> #f)
|
|
||||||
['() (parse-error "Missing record label")]
|
|
||||||
[(cons label fields) (record label fields)])]
|
|
||||||
[#\[ (read-sequence #\] #t)]
|
|
||||||
[#\{ (read-dictionary)]
|
|
||||||
|
|
||||||
[#\> (parse-error "Unexpected >")]
|
|
||||||
[#\] (parse-error "Unexpected ]")]
|
|
||||||
[#\} (parse-error "Unexpected }")]
|
|
||||||
[#\, (parse-error "Unexpected ,")]
|
|
||||||
|
|
||||||
[c (read-raw-symbol-or-number (list c))]))
|
|
||||||
|
|
||||||
(define (set-add* s e)
|
|
||||||
(when (set-member? s e) (parse-error "Duplicate set element: ~v" e))
|
|
||||||
(set-add s e))
|
|
||||||
|
|
||||||
(define (annotate-next-with a)
|
|
||||||
(if read-annotations?
|
|
||||||
(annotate (next) a)
|
|
||||||
(next)))
|
|
||||||
|
|
||||||
;;---------------------------------------------------------------------------
|
|
||||||
;; Basic I/O
|
|
||||||
|
|
||||||
(define (parse-error fmt . args)
|
|
||||||
(apply parse-error* in-port source fmt args))
|
|
||||||
|
|
||||||
(define (eof-guard v)
|
|
||||||
(when (eof-object? v)
|
|
||||||
(parse-error* #:raise-proc raise-read-eof-error in-port source "Unexpected end of input"))
|
|
||||||
v)
|
|
||||||
|
|
||||||
(define (next-char) (eof-guard (read-char in-port)))
|
|
||||||
|
|
||||||
(define (skip-whitespace) (skip-whitespace* in-port))
|
|
||||||
|
|
||||||
(define (skip-whitespace/commas)
|
|
||||||
(regexp-match? #px#"^(\\s|,)*" in-port)) ;; side effect: consumes matched portion of input
|
|
||||||
|
|
||||||
;;---------------------------------------------------------------------------
|
|
||||||
;; Source location tracking
|
|
||||||
|
|
||||||
(define pos
|
|
||||||
(if read-syntax?
|
|
||||||
(lambda ()
|
|
||||||
(define-values (line column position) (port-next-location in-port))
|
|
||||||
(list line column position))
|
|
||||||
(lambda () #f)))
|
|
||||||
|
|
||||||
(define wrap
|
|
||||||
(if read-syntax?
|
|
||||||
(lambda (pos0 v)
|
|
||||||
(if (annotated? v)
|
|
||||||
v
|
|
||||||
(let ()
|
|
||||||
(match-define (list line0 column0 position0) pos0)
|
|
||||||
(match-define (list line1 column1 position1) (pos))
|
|
||||||
(define loc (and line0 column0 position0 position1
|
|
||||||
(srcloc source line0 column0 position0 (- position1 position0))))
|
|
||||||
(annotated '() loc v))))
|
|
||||||
(lambda (pos0 v) v)))
|
|
||||||
|
|
||||||
;;---------------------------------------------------------------------------
|
|
||||||
;; String-like things
|
|
||||||
|
|
||||||
(define (read-stringlike xform-item finish terminator-char hexescape-char hexescape-proc)
|
|
||||||
(let loop ((acc '()))
|
|
||||||
(match (next-char)
|
|
||||||
[(== terminator-char) (finish (reverse acc))]
|
|
||||||
[#\\ (match (next-char)
|
|
||||||
[(== hexescape-char) (loop (cons (hexescape-proc) acc))]
|
|
||||||
[(and c (or (== terminator-char) #\\ #\/)) (loop (cons (xform-item c) acc))]
|
|
||||||
[#\b (loop (cons (xform-item #\u08) acc))]
|
|
||||||
[#\f (loop (cons (xform-item #\u0C) acc))]
|
|
||||||
[#\n (loop (cons (xform-item #\u0A) acc))]
|
|
||||||
[#\r (loop (cons (xform-item #\u0D) acc))]
|
|
||||||
[#\t (loop (cons (xform-item #\u09) acc))]
|
|
||||||
[c (parse-error "Invalid escape code \\~a" c)])]
|
|
||||||
[c (loop (cons (xform-item c) acc))])))
|
|
||||||
|
|
||||||
(define (read-string terminator-char)
|
|
||||||
(read-stringlike values
|
|
||||||
list->string
|
|
||||||
terminator-char
|
|
||||||
#\u
|
|
||||||
(lambda ()
|
|
||||||
(integer->char
|
|
||||||
(match in-port
|
|
||||||
[(px #px#"^[a-fA-F0-9]{4}" (list hexdigits))
|
|
||||||
(define n1 (string->number (bytes->string/utf-8 hexdigits) 16))
|
|
||||||
(if (<= #xd800 n1 #xdfff) ;; surrogate pair
|
|
||||||
(if (>= n1 #xdc00)
|
|
||||||
(parse-error "Bad first half of surrogate pair")
|
|
||||||
(match in-port
|
|
||||||
[(px #px#"^\\\\u([a-fA-F0-9]{4})" (list _ hexdigits2))
|
|
||||||
(define n2 (string->number (bytes->string/utf-8 hexdigits2) 16))
|
|
||||||
(if (<= #xdc00 n2 #xdfff)
|
|
||||||
(+ (arithmetic-shift (- n1 #xd800) 10)
|
|
||||||
(- n2 #xdc00)
|
|
||||||
#x10000)
|
|
||||||
(parse-error "Bad second half of surrogate pair"))]
|
|
||||||
[_ (parse-error "Missing second half of surrogate pair")]))
|
|
||||||
n1)]
|
|
||||||
[_ (parse-error "Bad string \\u escape")])))))
|
|
||||||
|
|
||||||
(define (read-literal-binary)
|
|
||||||
(read-stringlike (lambda (c)
|
|
||||||
(define b (char->integer c))
|
|
||||||
(when (>= b 256)
|
|
||||||
(parse-error "Invalid code point ~a (~v) in literal binary" b c))
|
|
||||||
b)
|
|
||||||
list->bytes
|
|
||||||
#\"
|
|
||||||
#\x
|
|
||||||
(lambda ()
|
|
||||||
(match in-port
|
|
||||||
[(px #px#"^[a-fA-F0-9]{2}" (list hexdigits))
|
|
||||||
(string->number (bytes->string/utf-8 hexdigits) 16)]
|
|
||||||
[_ (parse-error "Bad binary \\x escape")]))))
|
|
||||||
|
|
||||||
;;---------------------------------------------------------------------------
|
|
||||||
;; Hex-encoded ByteStrings
|
|
||||||
|
|
||||||
(define (hexdigit? ch)
|
|
||||||
(or (and (char>=? ch #\A) (char<=? ch #\F))
|
|
||||||
(and (char>=? ch #\a) (char<=? ch #\f))
|
|
||||||
(and (char>=? ch #\0) (char<=? ch #\9))))
|
|
||||||
|
|
||||||
(define (read-hex-binary acc)
|
|
||||||
(skip-whitespace)
|
|
||||||
(define ch (next-char))
|
|
||||||
(cond [(eqv? ch #\")
|
|
||||||
(hex-string->bytes (list->string (reverse acc)))]
|
|
||||||
[(hexdigit? ch)
|
|
||||||
(define ch2 (next-char))
|
|
||||||
(when (not (hexdigit? ch2))
|
|
||||||
(parse-error "Hex-encoded binary digits must come in pairs"))
|
|
||||||
(read-hex-binary (cons ch2 (cons ch acc)))]
|
|
||||||
[else
|
|
||||||
(parse-error "Invalid hex character")]))
|
|
||||||
|
|
||||||
;;---------------------------------------------------------------------------
|
|
||||||
;; Hex-encoded floating point numbers
|
|
||||||
|
|
||||||
(define (read-hex-float precision)
|
|
||||||
(unless (eqv? (next-char) #\")
|
|
||||||
(parse-error "Missing open-double-quote in hex-encoded floating-point number"))
|
|
||||||
(define bs (read-hex-binary '()))
|
|
||||||
(unless (= (bytes-length bs) (match precision ['float 4] ['double 8]))
|
|
||||||
(parse-error "Incorrect number of bytes in hex-encoded floating-point number"))
|
|
||||||
(match precision
|
|
||||||
['float (bytes->float bs)]
|
|
||||||
['double (bytes->double bs)]))
|
|
||||||
|
|
||||||
;;---------------------------------------------------------------------------
|
|
||||||
;; Base64-encoded ByteStrings
|
|
||||||
|
|
||||||
(define (read-base64-binary acc)
|
|
||||||
(skip-whitespace)
|
|
||||||
(define ch (next-char))
|
|
||||||
(cond [(eqv? ch #\])
|
|
||||||
(base64-decode (string->bytes/latin-1 (list->string (reverse acc))))]
|
|
||||||
[(char=? ch #\-) (read-base64-binary (cons #\+ acc))]
|
|
||||||
[(char=? ch #\_) (read-base64-binary (cons #\/ acc))]
|
|
||||||
[(or (and (char>=? ch #\A) (char<=? ch #\Z))
|
|
||||||
(and (char>=? ch #\a) (char<=? ch #\z))
|
|
||||||
(and (char>=? ch #\0) (char<=? ch #\9))
|
|
||||||
(memv ch '(#\+ #\/ #\=)))
|
|
||||||
(read-base64-binary (cons ch acc))]
|
|
||||||
[else
|
|
||||||
(parse-error "Invalid base64 character")]))
|
|
||||||
|
|
||||||
;;---------------------------------------------------------------------------
|
|
||||||
;; Comments
|
|
||||||
|
|
||||||
(define (read-comment-line)
|
|
||||||
(define pos0 (pos))
|
|
||||||
(let loop ((acc '()))
|
|
||||||
(match (next-char)
|
|
||||||
[(or #\newline #\return)
|
|
||||||
(wrap pos0 (list->string (reverse acc)))]
|
|
||||||
[c (loop (cons c acc))])))
|
|
||||||
|
|
||||||
;;---------------------------------------------------------------------------
|
|
||||||
;; Collections
|
|
||||||
|
|
||||||
(define (sequence-fold commas-allowed? acc accumulate-one finish terminator-char)
|
|
||||||
(let loop ((acc acc))
|
|
||||||
(if commas-allowed?
|
|
||||||
(skip-whitespace/commas)
|
|
||||||
(skip-whitespace))
|
|
||||||
(match (eof-guard (peek-char in-port))
|
|
||||||
[(== terminator-char) (read-char in-port) (finish acc)]
|
|
||||||
[_ (loop (accumulate-one acc (next)))])))
|
|
||||||
|
|
||||||
(define (read-sequence terminator commas-allowed?)
|
|
||||||
(sequence-fold commas-allowed? '() (lambda (acc v) (cons v acc)) reverse terminator))
|
|
||||||
|
|
||||||
(define (read-dictionary)
|
|
||||||
(sequence-fold #t
|
|
||||||
(hash)
|
|
||||||
(lambda (acc k)
|
|
||||||
(skip-whitespace)
|
|
||||||
(match (peek-char in-port)
|
|
||||||
[#\: (read-char in-port)
|
|
||||||
(when (hash-has-key? acc k) (parse-error "Duplicate key: ~v" k))
|
|
||||||
(hash-set acc k (next))]
|
|
||||||
[_ (parse-error "Missing expected key/value separator")]))
|
|
||||||
values
|
|
||||||
#\}))
|
|
||||||
|
|
||||||
;;---------------------------------------------------------------------------
|
|
||||||
;; "Raw" symbols and numbers
|
|
||||||
|
|
||||||
(define (delimiter-follows?)
|
|
||||||
(define c (peek-char in-port))
|
|
||||||
(or (eof-object? c)
|
|
||||||
(char-whitespace? c)
|
|
||||||
(eqv? c PIPE)
|
|
||||||
(memv c '(#\< #\> #\[ #\] #\{ #\} #\( #\)
|
|
||||||
#\# #\: #\" #\@ #\; #\,))))
|
|
||||||
|
|
||||||
(define (read-raw-symbol-or-number acc)
|
|
||||||
(if (delimiter-follows?)
|
|
||||||
(let ((input (list->string (reverse acc))))
|
|
||||||
(or (analyze-number input)
|
|
||||||
(string->symbol input)))
|
|
||||||
(read-raw-symbol-or-number (cons (read-char in-port) acc))))
|
|
||||||
|
|
||||||
(define (analyze-number input)
|
|
||||||
(match input
|
|
||||||
[(pregexp #px"^([-+]?\\d+)(((\\.\\d+([eE][-+]?\\d+)?)|([eE][-+]?\\d+))([fF]?))?$"
|
|
||||||
(list _ whole _ frac _ _ _ f))
|
|
||||||
(define n (string->number (if frac (string-append whole frac) whole)))
|
|
||||||
(cond [(not n) #f]
|
|
||||||
[(and f (positive? (string-length f))) (float n)]
|
|
||||||
[else n])]
|
|
||||||
[_ #f]))
|
|
||||||
|
|
||||||
;;---------------------------------------------------------------------------
|
|
||||||
;; Main entry point to parser
|
|
||||||
|
|
||||||
(skip-whitespace)
|
|
||||||
(match (peek-char in-port)
|
|
||||||
[(? eof-object?) eof]
|
|
||||||
[_ (next)]))
|
|
||||||
|
|
Loading…
Reference in New Issue