Compare commits

...

7 Commits

Author SHA1 Message Date
Tony Garnock-Jones 4713e957ca Adapt route.rkt to use sorted-map; doesn't work, because they don't have canonical forms 2014-07-18 19:58:58 -07:00
Tony Garnock-Jones 3bfc9b910a sorted-map-has-key? 2014-07-18 19:58:58 -07:00
Tony Garnock-Jones 26bd5c7638 changed to use the order? convention from data/order 2014-07-18 19:58:58 -07:00
Tony Garnock-Jones f6a8b84d81 sorted-map-values 2014-07-18 19:58:57 -07:00
Tony Garnock-Jones a5dc977d73 Add sorted-map-keys 2014-07-18 19:58:57 -07:00
Tony Garnock-Jones 0b0020153e Constant-time sorted-map-size 2014-07-18 19:58:57 -07:00
Tony Garnock-Jones 10803adcd2 Hash-consed red-black treemap 2014-07-18 19:58:57 -07:00
3 changed files with 725 additions and 161 deletions

52
minimart/memoize.rkt Normal file
View File

@ -0,0 +1,52 @@
#lang racket/base
;; Poor-man's memoization.
(provide memoize1)
(define sentinel (cons #f #f))
(define (memoize1 f)
(define results (make-weak-hash))
(lambda (arg)
(hash-ref results arg (lambda ()
(define val (f arg))
(hash-set! results arg val)
val))))
(module+ test
(require rackunit)
(define call-counter 0)
(define (raw x)
(set! call-counter (+ call-counter 1))
(gensym 'raw-result))
(define cooked (memoize1 raw))
;; These tests will *likely* pass, but if garbage collection strikes
;; at an inopportune moment, they may fail.
(collect-garbage)
(define v (cons 1 2))
(check-equal? call-counter 0)
(check-eq? (cooked v) (cooked v))
(check-equal? call-counter 1)
(set! v (cons 1 2))
(check-equal? call-counter 1)
(check-equal? (cooked v) (cooked v))
(check-equal? call-counter 1)
(set! v (cons 1 2))
(collect-garbage)
(collect-garbage)
(collect-garbage)
(check-equal? call-counter 1)
(check-equal? (cooked v) (cooked v))
(check-equal? call-counter 2))

View File

@ -55,6 +55,8 @@
(require (only-in racket/port call-with-output-string with-output-to-string))
(require (only-in racket/class object?))
(require "canonicalize.rkt")
(require "sorted-map.rkt")
(require data/order)
(require rackunit)
@ -102,13 +104,13 @@
;; - #f, indicating no further matches possible
;; - (success Any), representing a successful match (if the end of
;; the input has been reached)
;; - (HashTable (U Sigma Wildcard) Matcher), {TODO}
;; TODO::: reimplement to use (ordinary-state (Option Matcher) (HashTable Sigma Matcher)), {TODO}
;; - (SortedMap (U Sigma Wildcard) Matcher), {TODO}
;; TODO::: reimplement to use (ordinary-state (Option Matcher) (SortedMap Sigma Matcher)), {TODO}
;; - (wildcard-sequence Matcher), {TODO}
;; If, in a hashtable matcher, a wild key is present, it is intended
;; If, in a sorted-map matcher, a wild key is present, it is intended
;; to catch all and ONLY those keys not otherwise present in the
;; table.
;; INVARIANT: if a key is present in a hashtable, then the
;; INVARIANT: if a key is present in a sorted-map, then the
;; corresponding value MUST NOT be equal to the wildcard
;; continuation, bearing in mind that
;; - if the wildcard is absent, it is implicitly #f;
@ -127,7 +129,7 @@
;; - ILM, signifying the transition into the cdr position of a pair
;; - EOS, signifying the notional close-paren at the end of a compound.
;; - any other value, representing itself.
;; N.B. hash-tables cannot be Sigmas at present.
;; N.B. sorted-maps cannot be Sigmas at present.
(define-singleton-struct SOL start-of-list "<")
(define-singleton-struct SOV start-of-vector "<vector")
(define-singleton-struct ILM improper-list-marker "|")
@ -162,8 +164,8 @@
(or (eq? x #f)
(success? x)
(wildcard-sequence? x)
(and (hash? x)
(for/and ([v (in-hash-values x)])
(and (sorted-map? x)
(for/and ([v (sorted-map-values x)])
(matcher? v)))))
;; -> Matcher
@ -192,14 +194,33 @@
(define (rsuccess v)
(and v (canonicalize (success v))))
;; Order for sigmas
(define (sigma-order a b)
(define sta? (struct-type? a))
(define stb? (struct-type? b))
(cond
[(and sta? stb?) (datum-order (struct-type-name a) (struct-type-name b))]
[sta? '<]
[stb? '>]
[else (datum-order a b)]))
;; (SortedMap (U Sigma Wildcard) Matcher)
;; The empty branch-matcher
(define empty-smap (sorted-map-empty sigma-order))
;; (U Sigma Wildcard) Matcher -> Matcher
;; Prepends e to r, if r is non-empty.
(define (rseq e r)
(if (matcher-empty? r) r (canonicalize (hash e r))))
(if (matcher-empty? r)
r
(sorted-map-insert empty-smap e r)))
;; [ (U Sigma Wildcard) Matcher ] ... -> Matcher
(define (rseq-multi . ers)
(canonicalize (apply hash ers)))
(let walk ((ers ers))
(match ers
[(list* e r rest) (sorted-map-insert (walk rest) e r)]
[(list) empty-smap])))
;; Matcher -> Matcher
;; Prepends the wildcard pseudo-Sigma to r, if r is non-empty.
@ -221,27 +242,27 @@
[_ #f]))
;; Matcher (U Sigma Wildcard) Matcher -> Matcher
;; r must be a hashtable matcher. Retrieves the continuation after
;; r must be a sorted-map matcher. Retrieves the continuation after
;; accepting key. If key is absent, returns wild-edge-value, modified
;; depending on key.
(define (rlookup r key wild-edge-value)
(hash-ref r key (lambda ()
(cond
[(key-open? key) (rwildseq wild-edge-value)]
[(key-close? key) (runwildseq wild-edge-value)]
[else wild-edge-value]))))
(sorted-map-get r key (lambda ()
(cond
[(key-open? key) (rwildseq wild-edge-value)]
[(key-close? key) (runwildseq wild-edge-value)]
[else wild-edge-value]))))
;; (Option (HashTable (U Sigma Wildcard) Matcher)) Sigma Matcher -> Matcher
;; (Option (SortedMap (U Sigma Wildcard) Matcher)) Sigma Matcher -> Matcher
;; Updates (installs or removes) a continuation in the Matcher r. r
;; must be either #f or a hashtable matcher. key MUST NOT be ?.
;; must be either #f or a sorted-map matcher. key MUST NOT be ?.
;; Preserves invariant that a key is never added if its continuation
;; is the same as the wildcard's continuation (which is implicitly #f
;; if absent, of course).
(define (rupdate r0 key k)
(when (eq? key ?) (error 'rupdate "Internal error: supplied wildcard as key"))
(define r (or r0 (hash)))
(empty-hash-guard
(let ((old-wild (hash-ref r ? (lambda () #f))))
(define r (or r0 empty-smap))
(empty-smap-guard
(let ((old-wild (sorted-map-get r ? (lambda () #f))))
(if (cond [(key-open? key)
(if (wildcard-sequence? k)
(requal? (wildcard-sequence-matcher k) old-wild)
@ -252,14 +273,14 @@
(matcher-empty? k))]
[else
(requal? k old-wild)])
(hash-remove r key)
(hash-set r key k)))))
(sorted-map-delete r key)
(sorted-map-insert r key k)))))
;; Hash -> Matcher
;; SortedMap -> Matcher
;; If the argument is empty, returns the canonical empty matcher;
;; otherwise, (canonicalizes and) returns the argument.
(define (empty-hash-guard h)
(and (positive? (hash-count h)) (canonicalize h)))
;; otherwise, returns the argument.
(define (empty-smap-guard h)
(and (positive? (sorted-map-size h)) h))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pattern compilation
@ -285,13 +306,13 @@
[(cons p1 p2) (rseq SOL (walk p1 (walk-pair-chain p2 acc)))]
[(? vector? v) (rseq SOV (vector-foldr walk (rseq EOS acc) v))]
[(embedded-matcher m) (matcher-append m (lambda (_mv) acc))]
;; TODO: consider options for treating sorted-maps as compounds
;; rather than (useless) atoms
[(? sorted-map?) (error 'pattern->matcher "Cannot match on sorted-maps at present")]
[(? non-object-struct?)
(rseq (struct->struct-type p)
(walk-pair-chain (cdr (vector->list (struct->vector p)))
acc))]
;; TODO: consider options for treating hash tables as compounds
;; rather than (useless) atoms
[(? hash?) (error 'pattern->matcher "Cannot match on hash tables at present")]
[other (rseq other acc)]))
(walk-pair-chain ps0 (rsuccess v)))
@ -371,7 +392,8 @@
[((wildcard-sequence r1) r2) (f (expand-wildseq r1) r2)]
[(r1 (wildcard-sequence r2)) (f r1 (expand-wildseq r2))]
[((success v1) (success v2)) (rsuccess (vf v1 v2))]
[((? hash? h1) (? hash? h2)) (fold-over-keys h1 h2 f (left-base h1) (right-base h2))]))
[((? sorted-map? h1) (? sorted-map? h2))
(fold-over-keys h1 h2 f (left-base h1) (right-base h2))]))
(define (fold-over-keys h1 h2 f left-base right-base)
(define w1 (rlookup h1 ? #f))
@ -380,35 +402,48 @@
(cond
[(and w1 w2)
(for/fold [(acc (rwild (f w1 w2)))]
[(key (set-remove (set-union (hash-keys h1) (hash-keys h2)) ?))]
[(key (set-remove (set-union (sorted-map-keys h1) (sorted-map-keys h2)) ?))]
(rupdate acc key (f (rlookup h1 key w1) (rlookup h2 key w2))))]
[w1
(for/fold [(acc left-base)] [(key (in-hash-keys h2))]
(for/fold [(acc left-base)] [(key (sorted-map-keys h2))]
(rupdate acc key (f (rlookup h1 key w1) (rlookup h2 key w2))))]
[w2
(for/fold [(acc right-base)] [(key (in-hash-keys h1))]
(for/fold [(acc right-base)] [(key (sorted-map-keys h1))]
(rupdate acc key (f (rlookup h1 key w1) (rlookup h2 key w2))))]
[(< (hash-count h1) (hash-count h2))
(for/fold [(acc right-base)] [(key (in-hash-keys h1))]
[(< (sorted-map-size h1) (sorted-map-size h2))
(for/fold [(acc right-base)] [(key (sorted-map-keys h1))]
(rupdate acc key (f (rlookup h1 key w1) (rlookup h2 key w2))))]
[else
(for/fold [(acc left-base)] [(key (in-hash-keys h2))]
(for/fold [(acc left-base)] [(key (sorted-map-keys h2))]
(rupdate acc key (f (rlookup h1 key w1) (rlookup h2 key w2))))])))
;; Matcher -> Matcher
;; When a matcher contains only entries for (EOS -> m') and (★ ->
;; (wildcard-sequence m')), it is equivalent to (wildcard-sequence m')
;; itself. Also, if it's just (★ -> (wildcard-sequence m')), that's
;; equivalent to (wildcard-sequence m'). This is nearly the inverse of
;; expand-wildseq.
(define (collapse-wildcard-sequences m)
(match m
[(hash-table ((== ?) (and w (wildcard-sequence wk)))
((? key-close?) k))
(if (requal? k wk) w m)]
[(hash-table ((== ?) (and w (wildcard-sequence wk))))
w]
[_ m]))
;; itself. This is the inverse of expand-wildseq.
;;
;; In addition, we rewrite (★ -> (wildcard-sequence m')) to
;; (wildcard-sequence m'), since matcher-match-value will fall back to
;; ★ if EOS is missing, and rlookup adjusts appropriately.
(define collapse-wildcard-sequences
(let ((expanded-keys1 (set ? EOS))
(expanded-keys2 (set ?)))
(lambda (m)
(if (sorted-map? m)
(let ((keys (sorted-map-keys m)))
(cond
[(equal? keys expanded-keys1)
(define w (sorted-map-get m ?))
(define k (sorted-map-get m EOS))
(if (and (wildcard-sequence? w) (requal? k (wildcard-sequence-matcher w)))
w
m)]
[(equal? keys expanded-keys2)
(define w (sorted-map-get m ?))
(if (wildcard-sequence? w) w m)]
[else
m]))
m))))
;; Sigma -> Boolean
;; True iff k represents the start of a compound datum.
@ -425,8 +460,7 @@
;; Matcher -> Matcher
;; Unrolls the implicit recursion in a wildcard-sequence.
(define (expand-wildseq r)
(canonicalize (hash ? (rwildseq r)
EOS r)))
(sorted-map-insert (sorted-map-insert empty-smap ? (rwildseq r)) EOS r))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Matching single keys into a multimap
@ -458,8 +492,8 @@
(null? stack))
result
failure-result)]
[(? hash?)
(define (get key) (hash-ref r key (lambda () #f)))
[(? sorted-map?)
(define (get key) (sorted-map-get r key (lambda () #f)))
(match vs
['()
(match stack
@ -498,17 +532,17 @@
[((wildcard-sequence r1) r2) (walk (expand-wildseq r1) r2 acc)]
[(r1 (wildcard-sequence r2)) (walk r1 (expand-wildseq r2) acc)]
[((success v1) (success v2)) ((matcher-match-matcher-successes) v1 v2 acc)]
[((? hash? h1) (? hash? h2))
[((? sorted-map? h1) (? sorted-map? h2))
(define w1 (rlookup h1 ? #f))
(define w2 (rlookup h2 ? #f))
(define r (walk w1 w2 acc))
(for/fold [(r r)]
[(key (cond
[(and w1 w2) (set-remove (set-union (hash-keys h1) (hash-keys h2)) ?)]
[w1 (hash-keys h2)]
[w2 (hash-keys h1)]
[(< (hash-count h1) (hash-count h2)) (hash-keys h1)]
[else (hash-keys h2)]))]
[(and w1 w2) (set-remove (set-union (sorted-map-keys h1) (sorted-map-keys h2)) ?)]
[w1 (sorted-map-keys h2)]
[w2 (sorted-map-keys h1)]
[(< (sorted-map-size h1) (sorted-map-size h2)) (sorted-map-keys h1)]
[else (sorted-map-keys h2)]))]
(walk (rlookup h1 key w1) (rlookup h2 key w2) r))])))
;; Matcher × (Value → Matcher) → Matcher
@ -523,11 +557,12 @@
[#f #f]
[(success v) (error 'matcher-append "Ill-formed matcher: ~v" m0)]
[(wildcard-sequence m1) (rwildseq (walk m1))]
[(? hash?) (for/fold [(acc (rwild (walk (rlookup m ? #f))))]
[((k v) (in-hash m)) #:when (not (eq? k ?))]
(if (and (key-close? k) (success? v))
(matcher-union acc (m-tail-fn (success-value v)))
(rupdate acc k (walk v))))])))
[(? sorted-map?) (for/fold [(acc (rwild (walk (rlookup m ? #f))))]
[(kv (sorted-map-to-alist m)) #:when (not (eq? (car kv) ?))]
(match-define (cons k v) kv)
(if (and (key-close? k) (success? v))
(matcher-union acc (m-tail-fn (success-value v)))
(rupdate acc k (walk v))))])))
;; Matcher (Value -> (Option Value)) -> Matcher
;; Maps f over success values in m.
@ -537,9 +572,9 @@
[#f #f]
[(success v) (rsuccess (f v))]
[(wildcard-sequence m1) (rwildseq (walk m1))]
[(? hash?) (for/fold [(acc (rwild (walk (rlookup m ? #f))))]
[((k v) (in-hash m)) #:when (not (eq? k ?))]
(rupdate acc k (walk v)))])))
[(? sorted-map?) (for/fold [(acc (rwild (walk (rlookup m ? #f))))]
[(kv (sorted-map-to-alist m)) #:when (not (eq? (car kv) ?))]
(rupdate acc (car kv) (walk (cdr kv))))])))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Projection
@ -561,12 +596,12 @@
[(cons p1 p2) (cons SOL (walk p1 (walk-pair-chain p2 acc)))]
[(? vector? v) (cons SOV (vector-foldr walk (cons EOS acc) v))]
[(embedded-matcher m) (error 'compile-projection "Cannot embed matcher in projection")]
;; TODO: consider options for treating sorted-maps as compounds rather than (useless) atoms
[(? sorted-map?) (error 'compile-projection "Cannot match on sorted-maps at present")]
[(? non-object-struct?)
(cons (struct->struct-type p)
(walk-pair-chain (cdr (vector->list (struct->vector p)))
acc))]
;; TODO: consider options for treating hash tables as compounds rather than (useless) atoms
[(? hash?) (error 'compile-projection "Cannot match on hash tables at present")]
[other (cons other acc)]))
(walk-pair-chain ps0 '()))
@ -584,12 +619,12 @@
[(capture sub) sub] ;; TODO: maybe enforce non-nesting here too?
[(cons p1 p2) (cons (walk p1) (walk p2))]
[(? vector? v) (for/vector [(e (in-vector v))] (walk e))]
;; TODO: consider options for treating sorted-maps as compounds
;; rather than (useless) atoms
[(? sorted-map?) (error 'projection->pattern "Cannot match on sorted-maps at present")]
[(? non-object-struct?)
(apply (struct-type-make-constructor (struct->struct-type p))
(map walk (cdr (vector->list (struct->vector p)))))]
;; TODO: consider options for treating hash tables as compounds
;; rather than (useless) atoms
[(? hash?) (error 'projection->pattern "Cannot match on hash tables at present")]
[other other])))
;; Matcher × CompiledProjection -> Matcher
@ -619,14 +654,14 @@
[(cons (== ?) k)
(match m
[(wildcard-sequence _) (add-wild (walk m k))]
[(? hash?)
(for/fold [(acc (add-wild (walk (rlookup m ? #f) k)))] [((key mk) (in-hash m))]
(if (eq? key ?)
acc
(add-edge acc key (cond
[(key-open? key) (balanced mk (lambda (mk) (walk mk k)))]
[(key-close? key) #f]
[else (walk mk k)]))))]
[(? sorted-map?)
(for/fold [(acc (add-wild (walk (rlookup m ? #f) k)))]
[(key-mk (sorted-map-to-alist m)) #:when (not (eq? (car key-mk) ?))]
(match-define (cons key mk) key-mk)
(add-edge acc key (cond
[(key-open? key) (balanced mk (lambda (mk) (walk mk k)))]
[(key-close? key) #f]
[else (walk mk k)])))]
[_ (matcher-empty)])]
[(cons sigma k)
@ -637,21 +672,21 @@
[(key-open? sigma) (walk (rwildseq m) k)]
[(key-close? sigma) (walk mk k)]
[else (walk m k)])]
[(? hash?) (walk (rlookup m sigma (rlookup m ? #f)) k)]
[(? sorted-map?) (walk (rlookup m sigma (rlookup m ? #f)) k)]
[_ (matcher-empty)]))])))
(define (general-balanced add-wildseq add-wild add-edge m k)
(let walk ((m m) (k k))
(match m
[(wildcard-sequence mk) (add-wildseq (k mk))]
[(? hash?)
(for/fold [(acc (add-wild (walk (rlookup m ? #f) k)))] [((key mk) (in-hash m))]
(if (eq? key ?)
acc
(add-edge acc key (cond
[(key-open? key) (walk mk (lambda (mk) (walk mk k)))]
[(key-close? key) (k mk)]
[else (walk mk k)]))))]
[(? sorted-map?)
(for/fold [(acc (add-wild (walk (rlookup m ? #f) k)))]
[(key-mk (sorted-map-to-alist m)) #:when (not (eq? (car key-mk) ?))]
(match-define (cons key mk) key-mk)
(add-edge acc key (cond
[(key-open? key) (walk mk (lambda (mk) (walk mk k)))]
[(key-close? key) (k mk)]
[else (walk mk k)])))]
[_ (matcher-empty)])))
drop-match))
@ -679,9 +714,10 @@
(define (walk m k)
(match m
[(wildcard-sequence _) #f]
[(? hash?)
(and (not (hash-has-key? m ?))
(for/fold [(acc (set))] [((key mk) (in-hash m))]
[(? sorted-map?)
(and (not (sorted-map-has-key? m ?))
(for/fold [(acc (set))] [(key-mk (sorted-map-to-alist m))]
(match-define (cons key mk) key-mk)
(maybe-union
acc
(cond
@ -702,9 +738,10 @@
(define (walk-seq m k)
(match m
[(wildcard-sequence _) #f]
[(? hash?)
(and (not (hash-has-key? m ?))
(for/fold [(acc (set))] [((key mk) (in-hash m))]
[(? sorted-map?)
(and (not (sorted-map-has-key? m ?))
(for/fold [(acc (set))] [(key-mk (sorted-map-to-alist m))]
(match-define (cons key mk) key-mk)
(maybe-union acc (cond
[(key-close? key) (k (set '()) mk)]
[else (walk (rseq key mk)
@ -756,10 +793,11 @@
(d "{")
(d vs)
(d "}")]
[(? hash? h)
(if (zero? (hash-count h))
(d " ::: empty hash!")
(for/fold [(need-sep? #f)] [((key k) (in-hash h))]
[(? sorted-map? h)
(if (zero? (sorted-map-size h))
(d " ::: empty sorted-map!")
(for/fold [(need-sep? #f)] [(key-k (sorted-map-to-alist h))]
(match-define (cons key k) key-k)
(when need-sep?
(newline port)
(d (make-string i #\space)))
@ -790,16 +828,18 @@
[#f '()]
[(success v) (list "" (success->jsexpr v))]
[(wildcard-sequence m1) (list "...)" (walk m1))]
[(? hash?) (for/list [((k v) (in-hash m))]
(list (match k
[(== ?) (list "__")]
[(== SOL) (list "(")]
[(== SOV) (list "#(")]
[(== EOS) (list ")")]
[(? struct-type? t)
(list (string-append (symbol->string (struct-type-name t)) "("))]
[else k])
(walk v)))])))
[(? sorted-map?)
(for/list [(kv (sorted-map-to-alist m))]
(match-define (cons k v) kv)
(list (match k
[(== ?) (list "__")]
[(== SOL) (list "(")]
[(== SOV) (list "#(")]
[(== EOS) (list ")")]
[(? struct-type? t)
(list (string-append (symbol->string (struct-type-name t)) "("))]
[else k])
(walk v)))])))
;; String -> String
;; Undoes the encoding of struct-type names used in the JSON serialization of Matchers.
@ -817,25 +857,26 @@
[(list "" vj) (rsuccess (jsexpr->success vj))]
[(list "...)" j1) (rwildseq (walk j1))]
[(list (list kjs vjs) ...)
(canonicalize
(for/hash [(kj kjs) (vj vjs)]
(values (match kj
[(list "__") ?]
[(list "(") SOL]
[(list "#(") SOV]
[(list ")") EOS]
[(list (? string? s))
(match (deserialize-struct-type-name s)
[#f (error 'jsexpr->matcher
"Illegal open-parenthesis mark ~v"
kj)]
[tn (match (struct-type-name->struct-type tn)
[#f (error 'jsexpr->matcher
"Unexpected struct type ~v"
tn)]
[t t])])]
[other other])
(walk vj))))])))
(for/fold [(acc empty-smap)]
[(kj kjs) (vj vjs)]
(sorted-map-insert acc
(match kj
[(list "__") ?]
[(list "(") SOL]
[(list "#(") SOV]
[(list ")") EOS]
[(list (? string? s))
(match (deserialize-struct-type-name s)
[#f (error 'jsexpr->matcher
"Illegal open-parenthesis mark ~v"
kj)]
[tn (match (struct-type-name->struct-type tn)
[#f (error 'jsexpr->matcher
"Unexpected struct type ~v"
tn)]
[t t])])]
[other other])
(walk vj)))])))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -1180,39 +1221,32 @@
(check-requal? (intersect (a 'a) (b 'a)) #f)
(check-exn #px"Cannot match on hash tables at present"
(check-exn #px"Cannot match on sorted-maps at present"
(lambda ()
(intersect (canonicalize (hash 'a 1 'b ?))
(canonicalize (hash 'a ? 'b 2)))))
;; (check-requal? (intersect (rseq 'a 1 'b ?) (rseq 'a ? 'b 2)) (rseq 'a 1 'b 2))
;; (check-requal? (intersect (rseq 'a 1 'b ?) (rseq 'a ?)) (void))
;; (check-requal? (intersect (rseq 'a 1 'b ?) (rseq 'a 1 'b ?)) (rseq 'a 1 'b ?))
;; (check-requal? (intersect (rseq 'a 1 'b ?) (rseq 'a ? 'c ?)) (void))
(define (h a b c d)
(sorted-map-insert (sorted-map-insert empty-smap a b) c d))
(intersect (h 'a 1 'b ?)
(h 'a ? 'b 2))))
;; (check-requal? (intersect (rseq 'a 1 'b ?) (rseq 'a 1 'b (list 2 ?)))
;; (rseq 'a 1 'b (list 2 ?)))
;; (check-requal? (intersect (rseq 'a 1 'b (list ? 3)) (rseq 'a 1 'b (list 2 ?)))
;; (rseq 'a 1 'b (list 2 3)))
(let ((H rseq-multi))
(newline)
(printf "Checking that intersection with wildcard is identity-like\n")
(define m1 (pretty-print-matcher*
(foldr matcher-union (matcher-empty)
(list (pattern->matcher SA (list 'a ?))
(pattern->matcher SB (list 'b ?))
(pattern->matcher SC (list 'b 'c))))))
(define m2 (pretty-print-matcher* (pattern->matcher SD ?)))
(define mi (pretty-print-matcher* (matcher-intersect m1 m2)))
(check-requal? mi
(H SOL (H 'a (H ? (H EOS (E (set 'A 'D))))
'b (H ? (H EOS (E (set 'B 'D)))
'c (H EOS (E (set 'B 'C 'D)))))))
(check-requal? (pretty-print-matcher*
(parameterize ((matcher-intersect-successes (lambda (v1 v2) v1)))
(matcher-intersect m1 m2)))
m1))
)
(let ((H rseq-multi))
(newline)
(printf "Checking that intersection with wildcard is identity-like\n")
(define m1 (pretty-print-matcher*
(foldr matcher-union (matcher-empty)
(list (pattern->matcher SA (list 'a ?))
(pattern->matcher SB (list 'b ?))
(pattern->matcher SC (list 'b 'c))))))
(define m2 (pretty-print-matcher* (pattern->matcher SD ?)))
(define mi (pretty-print-matcher* (matcher-intersect m1 m2)))
(check-requal? mi
(H SOL (H 'a (H ? (H EOS (E (set 'A 'D))))
'b (H ? (H EOS (E (set 'B 'D)))
'c (H EOS (E (set 'B 'C 'D)))))))
(check-requal? (pretty-print-matcher*
(parameterize ((matcher-intersect-successes (lambda (v1 v2) v1)))
(matcher-intersect m1 m2)))
m1))
)
(module+ test
(define (matcher-match-matcher-list m1 m2)
@ -1411,13 +1445,13 @@
(pattern->matcher SD (list ? 3))
(pattern->matcher SB (list 3 4)))))
(S '((("(")
((("__") ((2 (((")") (((")") ("" ("A")))))))
(3 (((")") (((")") ("" ("D")))))))))
(1 ((2 (((")") (((")") ("" ("A")))))))
((1 ((2 (((")") (((")") ("" ("A")))))))
(3 (((")") (((")") ("" ("D" "C")))))))))
(3 ((2 (((")") (((")") ("" ("A")))))))
(3 (((")") (((")") ("" ("D")))))))
(4 (((")") (((")") ("" ("B"))))))))))))))
(4 (((")") (((")") ("" ("B")))))))))
(("__") ((2 (((")") (((")") ("" ("A")))))))
(3 (((")") (((")") ("" ("D"))))))))))))))
(check-equal? (matcher->jsexpr M (lambda (v) (map symbol->string (set->list v)))) S)
(check-requal? (jsexpr->matcher S (lambda (v) (list->set (map string->symbol v)))) M)))

478
minimart/sorted-map.rkt Normal file
View File

@ -0,0 +1,478 @@
#lang racket/base
;; Matt Might's red-black tree code from
;; http://matt.might.net/articles/red-black-delete/code/sorted-map.rkt
;;
;; Modified by Tony Garnock-Jones, July 2014:
;; - trees are hashconsed
;; - sorted-map-size is made constant-time
(provide (struct-out sorted-map)
sorted-map-empty
sorted-map-modify-at
sorted-map-insert
sorted-map-insert*
sorted-map-to-tree
sorted-map-to-alist
sorted-map-submap?
sorted-map-get
sorted-map-size
sorted-map-max
sorted-map-delete
sorted-map-has-key?
sorted-map-keys
sorted-map-values
)
(require "canonicalize.rkt")
(require "memoize.rkt")
(require racket/set)
; A purely functional sorted-map library.
; Provides logarithmic insert, update, get & delete.
; Based on Okasaki's red-black trees
; with purely functional red-black delete.
; Author: Matthew Might
; Site: http://matt.might.net/
; Page: http://matt.might.net/articles/red-black-delete/
(require (except-in racket/match define/match))
; Syntactic sugar for define forms
; with match as their body:
(define-syntax define/match
(syntax-rules ()
[(_ (id name) clause ...)
; =>
(define (id name)
(match name clause ...))]))
(define-syntax define/match*
(syntax-rules ()
[(_ (id name ...) clause ...)
; =>
(define (id name ...)
(match* (name ...)
clause ...))]))
; A form for matching the result of a comparison:
;; tonyg 20140718: changed to use the order? convention from data/order
(define-syntax switch-compare
(syntax-rules (= < >)
[(_ (cmp v1 v2)
[< action1 ...]
[= action2 ...]
[> action3 ...])
; =>
(let ((dir (cmp v1 v2)))
(case dir
[(<) action1 ...]
[(=) action2 ...]
[(>) action3 ...]))]))
;; tonyg 20140718: for hash-consing, we have to be able to compare
;; trees using equal?, which necessitates use of #:transparent in our
;; struct definitions.
; Struct definition for sorted-map:
(define-struct sorted-map (compare) #:transparent)
; Internal nodes:
(define-struct (T sorted-map)
(color left key value right) #:transparent)
; Leaf nodes:
(define-struct (L sorted-map) () #:transparent)
; Double-black leaf nodes:
(define-struct (BBL sorted-map) () #:transparent)
; Color manipulators.
; Turns a node black.
(define/match (blacken node)
[(T cmp _ l k v r) (canonicalize (T cmp 'B l k v r))]
[(BBL cmp) (canonicalize (L cmp))]
[(L _) node])
; Turns a node red.
(define/match (redden node)
[(T cmp _ l k v r) (canonicalize (T cmp 'R l k v r))]
[(L _) (error "Can't redden leaf.")])
; Color arithmetic.
(define/match (black+1 color-or-node)
[(T cmp c l k v r) (canonicalize (T cmp (black+1 c) l k v r))]
[(L cmp) (canonicalize (BBL cmp))]
['-B 'R]
['R 'B]
['B 'BB])
(define/match (black-1 color-or-node)
[(T cmp c l k v r) (canonicalize (T cmp (black-1 c) l k v r))]
[(BBL cmp) (canonicalize (L cmp))]
['R '-B]
['B 'R]
['BB 'B])
; Creates an empty map:
(define (sorted-map-empty compare)
(canonicalize (L compare)))
;; Custom patterns.
; Matches internal nodes:
(define-match-expander T!
(syntax-rules ()
[(_) (T _ _ _ _ _ _)]
[(_ l r) (T _ _ l _ _ r)]
[(_ c l r) (T _ c l _ _ r)]
[(_ l k v r) (T _ _ l k v r)]
[(_ c l k v r) (T _ c l k v r)]))
; Matches leaf nodes:
(define-match-expander L!
(syntax-rules ()
[(_) (L _)]))
; Matches black nodes (leaf or internal):
(define-match-expander B
(syntax-rules ()
[(_) (or (T _ 'B _ _ _ _)
(L _))]
[(_ cmp) (or (T cmp 'B _ _ _ _)
(L cmp))]
[(_ l r) (T _ 'B l _ _ r)]
[(_ l k v r) (T _ 'B l k v r)]
[(_ cmp l k v r) (T cmp 'B l k v r)]))
; Matches red nodes:
(define-match-expander R
(syntax-rules ()
[(_) (T _ 'R _ _ _ _)]
[(_ cmp) (T cmp 'R _ _ _ _)]
[(_ l r) (T _ 'R l _ _ r)]
[(_ l k v r) (T _ 'R l k v r)]
[(_ cmp l k v r) (T cmp 'R l k v r)]))
; Matches negative black nodes:
(define-match-expander -B
(syntax-rules ()
[(_) (T _ '-B _ _ _ _)]
[(_ cmp) (T cmp '-B _ _ _ _)]
[(_ l k v r) (T _ '-B l k v r)]
[(_ cmp l k v r) (T cmp '-B l k v r)]))
; Matches double-black nodes (leaf or internal):
(define-match-expander BB
(syntax-rules ()
[(_) (or (T _ 'BB _ _ _ _)
(BBL _))]
[(_ cmp) (or (T cmp 'BB _ _ _ _)
(BBL _))]
[(_ l k v r) (T _ 'BB l k v r)]
[(_ cmp l k v r) (T cmp 'BB l k v r)]))
(define/match (double-black? node)
[(BB) #t]
[_ #f])
; Turns a black-balanced tree with invalid colors
; into a black-balanced tree with valid colors:
(define (balance-node node)
(define cmp (sorted-map-compare node))
(match node
[(or (T! (or 'B 'BB) (R (R a xk xv b) yk yv c) zk zv d)
(T! (or 'B 'BB) (R a xk xv (R b yk yv c)) zk zv d)
(T! (or 'B 'BB) a xk xv (R (R b yk yv c) zk zv d))
(T! (or 'B 'BB) a xk xv (R b yk yv (R c zk zv d))))
; =>
(canonicalize (T cmp
(black-1 (T-color node))
(canonicalize (T cmp 'B a xk xv b))
yk
yv
(canonicalize (T cmp 'B c zk zv d))))]
[(BB a xk xv (-B (B b yk yv c) zk zv (and d (B))))
; =>
(canonicalize (T cmp
'B
(canonicalize (T cmp 'B a xk xv b))
yk
yv
(balance cmp 'B c zk zv (redden d))))]
[(BB (-B (and a (B)) xk xv (B b yk yv c)) zk zv d)
; =>
(canonicalize (T cmp
'B
(balance cmp 'B (redden a) xk xv b)
yk
yv
(canonicalize (T cmp 'B c zk zv d))))]
[else node]))
(define (balance cmp c l k v r)
(balance-node (canonicalize (T cmp c l k v r))))
; Moves to a location in the map and
; peformes an update with the function:
;; tonyg 20140718 added on-missing argument
(define (sorted-map-modify-at node key f [on-missing (lambda () #f)])
(define (internal-modify-at node key f)
(match node
[(T cmp c l k v r)
; =>
(switch-compare (cmp key k)
[< (balance cmp c (internal-modify-at l key f) k v r)]
[= (canonicalize (T cmp c l k (f k v) r))]
[> (balance cmp c l k v (internal-modify-at r key f))])]
[(L cmp)
; =>
(canonicalize (T cmp 'R node key (f key (on-missing)) node))]))
(blacken (internal-modify-at node key f)))
; Inserts an element into the map
(define (sorted-map-insert node key value)
(sorted-map-modify-at node key (lambda (k v) value)))
; Inserts several elements into the map:
(define (sorted-map-insert* node keys values)
(if (or (not (pair? keys))
(not (pair? values)))
node
(sorted-map-insert*
(sorted-map-insert node (car keys) (car values))
(cdr keys) (cdr values))))
; Coverts a sorted map into a tree:
(define/match (sorted-map-to-tree node)
[(L!) 'L]
[(T! c l k v r) `(,c ,(sorted-map-to-tree l) ,k ,v ,(sorted-map-to-tree r))]
[else node])
; Converts a sorted map into an alist:
(define (sorted-map-to-alist node)
(define (sorted-map-prepend-as-alist node alist)
(match node
[(T! l k v r)
; =>
(sorted-map-prepend-as-alist
l
(cons (cons k v)
(sorted-map-prepend-as-alist r alist)))]
[(L _)
; =>
alist]))
(sorted-map-prepend-as-alist node '()))
; Tests whether this map is a submap of another map:
(define (sorted-map-submap? map1 map2 #:by [by equal?])
(define amap1 (sorted-map-to-alist map1))
(define amap2 (sorted-map-to-alist map2))
(define cmp (sorted-map-compare map1))
(define (compare-alists amap1 amap2)
(match* (amap1 amap2)
[(`((,k1 . ,v1) . ,rest1)
`((,k2 . ,v2) . ,rest2))
; =>
(switch-compare (cmp k1 k2)
[< #f]
[= (and (by v1 v2) (compare-alists rest1 rest2))]
[> (compare-alists amap1 rest2)])]
[('() '()) #t]
[(_ '()) #f]
[('() _) #t]))
(compare-alists amap1 amap2))
; Gets an element from a sorted map:
;; tonyg 20140718 add on-missing argument
(define (sorted-map-get node key [on-missing (lambda () #f)])
(let walk ((node node))
(match node
[(L!) (on-missing)]
[(T cmp c l k v r)
; =>
(switch-compare (cmp key k)
[< (walk l)]
[= v]
[> (walk r)])])))
; Returns the size of the sorted map:
;; tonyg 20140718 this is memoized to run in O(1) for every smap
(define sorted-map-size
(memoize1
(lambda (smap)
(match smap
[(T! l r) (+ 1 (sorted-map-size l)
(sorted-map-size r))]
[(L!) 0]))))
; Returns the maxium (key . value) pair:
(define/match (sorted-map-max node)
[(T! _ k v (L!)) (cons k v)]
[(T! _ r) (sorted-map-max r)])
; Performs a check to see if both invariants are met:
(define (sorted-map-is-legal? node)
; Calculates the max black nodes on path:
(define/match (max-black-height node)
[(T! c l r)
; =>
(+ (if (eq? c 'B) 1 0) (max (max-black-height l)
(max-black-height r)))]
[(L!) 1])
; Calculates the min black nodes on a path:
(define/match (min-black-height node)
[(T! c l r)
; =>
(+ (if (eq? c 'B) 1 0) (min (min-black-height l)
(min-black-height r)))]
[(L!) 1])
; Is this tree black-balanced?
(define (black-balanced? node)
(= (max-black-height node)
(min-black-height node)))
; Does this tree contain a red child of red?
(define/match (no-red-red? node)
[(or (B l r)
(R (and l (B)) (and r (B))))
; =>
(and (no-red-red? l) (no-red-red? r))]
[(L!) #t]
[else #f])
(let ((colored? (no-red-red? node))
(balanced? (black-balanced? node)))
(and colored? balanced?)))
; Deletes a key from this map:
(define (sorted-map-delete node key)
(define cmp (sorted-map-compare node))
; Finds the node to be removed:
(define/match (del node)
[(T! c l k v r)
; =>
(switch-compare (cmp key k)
[< (bubble c (del l) k v r)]
[= (remove node)]
[> (bubble c l k v (del r))])]
[else node])
; Removes this node; it might
; leave behind a double-black node:
(define/match (remove node)
; Leaves are easiest to kill:
[(R (L!) (L!)) (canonicalize (L cmp))]
[(B (L!) (L!)) (canonicalize (BBL cmp))]
; Killing a node with one child;
; parent or child is red:
[(or (R child (L!))
(R (L!) child))
; =>
child]
[(or (B (R l k v r) (L!))
(B (L!) (R l k v r)))
; =>
(canonicalize (T cmp 'B l k v r))]
; Killing a black node with one black child:
[(or (B (L!) (and child (B)))
(B (and child (B)) (L!)))
; =>
(black+1 child)]
; Killing a node with two sub-trees:
[(T! c (and l (T!)) (and r (T!)))
; =>
(match-let (((cons k v) (sorted-map-max l))
(l* (remove-max l)))
(bubble c l* k v r))])
; Kills a double-black, or moves it to the top:
(define (bubble c l k v r)
(cond
[(or (double-black? l) (double-black? r))
; =>
(balance cmp (black+1 c) (black-1 l) k v (black-1 r))]
[else (canonicalize (T cmp c l k v r))]))
; Removes the max node:
(define/match (remove-max node)
[(T! l (L!)) (remove node)]
[(T! c l k v r ) (bubble c l k v (remove-max r))])
; Delete the key, and color the new root black:
(blacken (del node)))
;; tonyg 20140718 True iff key is in node
(define (sorted-map-has-key? node key)
(let walk ((node node))
(match node
[(L!) #f]
[(T cmp c l k v r)
(switch-compare (cmp key k)
[< (walk l)]
[= #t]
[> (walk r)])])))
;; tonyg 20140718 Retrieve a set of the keys of smap
(define (sorted-map-keys smap [empty-set (set)])
(let walk ((node smap) (acc empty-set))
(match node
[(T! l k v r) (walk l (set-add (walk r acc) k))]
[(L _) acc])))
;; tonyg 20140718 Retrieve a list of the values of smap
(define (sorted-map-values smap)
(let walk ((node smap) (acc '()))
(match node
[(T! l k v r) (walk l (cons v (walk r acc)))]
[(L _) acc])))