preserves/implementations/racket/preserves/preserves/read-binary.rkt

193 lines
7.9 KiB
Racket

#lang racket/base
(provide read-preserve/binary
bytes->preserve
preserve-sequence-reader)
(require racket/match)
(require "record.rkt")
(require "embedded.rkt")
(require "float.rkt")
(require "annotation.rkt")
(require "varint.rkt")
(require racket/set)
(require (only-in racket/port call-with-input-bytes port->bytes))
(define (default-on-short) (error 'read-preserve/binary "Short Preserves binary"))
(define (default-on-fail message . args) (error 'read-preserve/binary (apply format message args)))
(define (default-decode-embedded v)
(error 'read-preserve/binary "No decode-embedded function supplied"))
(define (bytes->preserve bs
#:read-syntax? [read-syntax? #f]
#:decode-embedded [decode-embedded #f]
#:on-short [on-short default-on-short]
[on-fail default-on-fail]
[expected-input-length #f])
(call-with-input-bytes
bs
(lambda (p)
(match (read-preserve/binary p
#:read-syntax? read-syntax?
#:decode-embedded decode-embedded
#:on-short on-short
on-fail
expected-input-length)
[(? eof-object?) (on-short)]
[v v]))))
(define (read-preserve/binary [in-port (current-input-port)]
#:read-syntax? [read-syntax? #f]
#:decode-embedded [decode-embedded0 #f]
#:on-short [on-short default-on-short]
[on-fail default-on-fail]
[expected-input-length #f])
(define read-annotations? read-syntax?)
(define decode-embedded (or decode-embedded0 default-decode-embedded))
(let/ec return
(define count expected-input-length)
(define (eof-guard v)
(if (eof-object? v)
(return (on-short))
v))
(define (next-byte) (eof-guard (next-byte*)))
(define (next-byte*)
(cond [(not count) (read-byte in-port)]
[(zero? count) eof]
[else (begin0 (read-byte in-port) (set! count (- count 1)))]))
(define (remaining-bytes)
(if (not count)
(port->bytes in-port)
(let ((bs (eof-guard (read-bytes count in-port))))
(if (< (bytes-length bs) count)
(return (on-short))
(begin0 bs (set! count 0))))))
(define (next) (wrap (pos) (next* (next-byte))))
(define pos
(if read-syntax?
(lambda ()
(define-values (_line _column position) (port-next-location in-port))
position)
(lambda () #f)))
(define wrap
(if read-syntax?
(lambda (pos0 v)
(if (annotated? v)
v
(annotated '() (srcloc #f #f #f pos0 (- (pos) pos0)) v)))
(lambda (pos0 v) v)))
(define (next* tag)
(match tag
[#xA0 #f]
[#xA1 #t]
[#xA2 (let ((bs (remaining-bytes)))
(match (bytes-length bs)
[4 (float (floating-point-bytes->real bs #t 0 4))]
[8 (floating-point-bytes->real bs #t 0 8)]
[n (return (on-fail "Invalid floating-point length: ~v" n))]))]
[#xA3 (let* ((acc0 (initial-integer))
(acc (if (< acc0 128) acc0 (- acc0 256))))
(for/fold [(acc acc)] [(b (remaining-bytes))] (+ (* acc 256) b)))]
[#xA4 (let* ((bs (remaining-bytes))
(n (bytes-length bs)))
(if (or (zero? n) (not (zero? (bytes-ref bs (- n 1)))))
(return (on-fail "String not NUL terminated"))
(bytes->string/utf-8 (subbytes bs 0 (- n 1)))))]
[#xA5 (remaining-bytes)]
[#xA6 (string->symbol (bytes->string/utf-8 (remaining-bytes)))]
[#xA7 (apply (lambda (label . fields) (record label fields)) (next-items))]
[#xA8 (next-items)]
[#xA9 (list->set (next-items))]
[#xAA (build-dictionary (next-items))]
[#xAB (embedded (decode-embedded (next)))]
[#xBF (if read-annotations?
(apply annotate (next-items))
(begin0 (next-item (next-byte)) (remaining-bytes)))]
[_ (return (on-fail "Invalid Preserves binary tag: ~v" tag))]))
(define (initial-integer)
(cond [(not count) (match (read-byte in-port) [(? eof-object?) 0] [n n])]
[(zero? count) 0]
[else (next-byte)]))
(define (next-item first-varint-byte)
(define block-len (eof-guard (read-varint in-port first-varint-byte)))
(define next-count (and count (- count block-len)))
(set! count block-len)
(begin0 (next) (set! count next-count)))
(define (next-items)
(cond [(not count) (match (read-byte in-port)
[(? eof-object?) '()]
[n (cons (next-item n) (next-items))])]
[(zero? count) '()]
[else (cons (next-item (next-byte)) (next-items))]))
(define (build-dictionary items)
(when (not (even? (length items))) (return (on-fail "Odd number of items in dictionary")))
(apply hash items))
(if (not count)
(let ((pos0 (pos)))
(match (next-byte*)
[(? eof-object?) eof]
[tag (wrap pos0 (next* tag))]))
(next))))
(define (read-preserve/binary/length-prefix [in-port (current-input-port)]
#:read-syntax? [read-syntax? #f]
#:decode-embedded [decode-embedded #f]
#:on-short [on-short default-on-short]
[on-fail default-on-fail])
(if (eof-object? (peek-byte in-port))
eof
(match (read-varint in-port)
[(? eof-object?) (on-short)]
[block-len (read-preserve/binary in-port
#:read-syntax? read-syntax?
#:decode-embedded decode-embedded
#:on-short on-short
on-fail
block-len)])))
(define (preserve-sequence-reader [in-port (current-input-port)]
#:read-syntax? [read-syntax? #f]
#:decode-embedded [decode-embedded #f]
#:on-short [on-short default-on-short]
[on-fail default-on-fail])
(if (eqv? (peek-byte in-port) #xA8)
(begin (read-byte in-port)
(lambda () (read-preserve/binary/length-prefix in-port
#:read-syntax? read-syntax?
#:decode-embedded decode-embedded
#:on-short on-short
on-fail)))
#f))
(module+ test
(require rackunit)
(require (only-in file/sha1 hex-string->bytes))
(let ((r (preserve-sequence-reader (open-input-bytes
(hex-string->bytes "a882a30182a30281a381")))))
(check-equal? (r) 1)
(check-equal? (r) 2)
(check-equal? (r) 0)
(check-exn #px"Short Preserves binary" (lambda () (r))))
(check-equal? (read-preserve/binary/length-prefix (open-input-bytes (hex-string->bytes "82a301"))) 1)
(check-equal? (read-preserve/binary/length-prefix (open-input-bytes (hex-string->bytes "82a302"))) 2)
(check-equal? (read-preserve/binary/length-prefix (open-input-bytes (hex-string->bytes "81a3"))) 0)
(check-equal? (read-preserve/binary/length-prefix (open-input-bytes (hex-string->bytes "81"))
#:on-short (lambda () 'short))
'short))