Work on cheatsheets
This commit is contained in:
parent
01a766c974
commit
3360e013a4
|
@ -1,3 +1,57 @@
|
|||
For a value <span class="postcard-grammar binarysyntax">*V*</span>, we write <span
|
||||
class="postcard-grammar binarysyntax">«*V*»</span> for the binary encoding of <span
|
||||
class="postcard-grammar binarysyntax">*V*</span>.
|
||||
|
||||
{:.postcard-grammar.binarysyntax}
|
||||
«`#f`» | = | `80`
|
||||
«`#t`» | = | `81`
|
||||
|
||||
{:.postcard-grammar.binarysyntax}
|
||||
«`@`*W* *V*» | = | `85` «*W*» «*V*»
|
||||
«`#!`*V*» | = | `86` «*V*»
|
||||
|
||||
{:.postcard-grammar.binarysyntax}
|
||||
«*V*» | = | `87``04` **binary32**(*V*) | if *V* ∈ Float
|
||||
«*V*» | = | `87``08` **binary64**(*V*) | if *V* ∈ Double
|
||||
|
||||
{:.postcard-grammar.binarysyntax}
|
||||
«*V*» | = | `B0` **varint**(|**intbytes**(*V*)|) **intbytes**(*V*) | if *V* ∈ SignedInteger
|
||||
«*V*» | = | `B1` **varint**(|**utf8**(*V*)|) **utf8**(*V*) | if *V* ∈ String
|
||||
«*V*» | = | `B2` **varint**(|*V*|) *V* | if *V* ∈ ByteString
|
||||
«*V*» | = | `B3` **varint**(|**utf8**(*V*)|) **utf8**(*V*) | if *V* ∈ Symbol
|
||||
|
||||
{:.postcard-grammar.binarysyntax}
|
||||
«`<`*L* *F*<sub>1</sub> ... *F*<sub>m</sub>`>`» | = | `B4` «*L*» «*F*<sub>1</sub>» ... «*F*<sub>m</sub>» `84`
|
||||
«`[`*X*<sub>1</sub> ... *X*<sub>m</sub>`]`» | = | `B5` «*X*<sub>1</sub>» ... «*X*<sub>m</sub>» `84`
|
||||
«`#{`*E*<sub>1</sub> ... *E*<sub>m</sub>`}`» | = | `B6` «*E*<sub>1</sub>» ... «*E*<sub>m</sub>» `84`
|
||||
«`{`*K*<sub>1</sub>`:`*V*<sub>1</sub> ... *K*<sub>m</sub>`:`*V*<sub>m</sub>`}`» | = | `B7` «*K*<sub>1</sub>» «*V*<sub>1</sub>» ... «*K*<sub>m</sub>» «*V*<sub>m</sub>» `84`
|
||||
|
||||
{:.postcard-grammar.binarysyntax}
|
||||
**varint**(*n*) | = | <span class="outputish">*n*</span> | if *n* < 128
|
||||
| | <span class="outputish">(*n* & 127) | 128</span> **varint**(*n* >> 7) | if *n* ≥ 128
|
||||
|
||||
{:.postcard-grammar.binarysyntax}
|
||||
**intbytes**(*n*) | = | *the empty byte sequence* | if *n* = 0
|
||||
| | **signedBigEndian**(*n*) | otherwise
|
||||
**signedBigEndian**(*n*) | = | <span class="outputish">*n* & 255</span> | if -128 ≤ *n* ≤ 127
|
||||
| | **signedBigEndian**(*n* >> 8) <span class="outputish">*n* & 255</span> | otherwise
|
||||
|
||||
The functions <span class="postcard-grammar binarysyntax">**binary32**(*F*)</span> and <span
|
||||
class="postcard-grammar binarysyntax">**binary64**(*D*)</span> yield big-endian 4- and 8-byte
|
||||
IEEE 754 binary representations of <span class="postcard-grammar binarysyntax">*F*</span> and
|
||||
<span class="postcard-grammar binarysyntax">*D*</span>, respectively.
|
||||
|
||||
<!--
|
||||
Together, <span class="postcard-grammar binarysyntax">**div**</span> and <span
|
||||
class="postcard-grammar binarysyntax">**mod**</span> give [Euclidean
|
||||
division](https://en.wikipedia.org/wiki/Euclidean_division); that is, if
|
||||
<span class="postcard-grammar binarysyntax">*n* **div** *d* = *q*</span> and
|
||||
<span class="postcard-grammar binarysyntax">*n* **mod** *d* = *r*</span>, then
|
||||
<span class="postcard-grammar binarysyntax">*n* = *dq* + *r*</span> and
|
||||
<span class="postcard-grammar binarysyntax">0 ≤ *r* < |d|</span>.
|
||||
-->
|
||||
|
||||
<!--
|
||||
For a value `V`, we write `«V»` for the binary encoding of `V`.
|
||||
|
||||
«#f» = [0x80]
|
||||
|
@ -28,3 +82,4 @@ The functions `binary32(F)` and `binary64(D)` yield big-endian 4- and
|
|||
The function `intbytes(x)` is a big-endian two's-complement signed binary representation of
|
||||
`x`, taking exactly as many whole bytes as needed to unambiguously identify the value and its
|
||||
sign. In particular, `intbytes(0)` is the empty byte sequence.
|
||||
-->
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
{:.postcard-grammar}
|
||||
| *Document* | := | *Value* **ws** |
|
||||
| *Value* | := | **ws** (*Record* | *Collection* | *Atom* | *Embedded* | *Annotated*) |
|
||||
| *Collection* | := | *Sequence* | *Dictionary* | *Set* |
|
||||
| *Atom* | := | *Boolean* | *String* | *ByteString* | *QuotedSymbol* | *Symbol* | *Number* |
|
||||
| **ws** | := | (**space** | **tab** | **cr** | **lf** |`,`)<sup>⋆</sup> |
|
||||
|
||||
{:.postcard-grammar}
|
||||
| *Record* | := | `<`*Value*<sup>+</sup> **ws**`>` |
|
||||
| *Sequence* | := | `[`*Value*<sup>⋆</sup> **ws**`]` |
|
||||
| *Dictionary* | := | `{` (*Value* **ws**`:`*Value*)<sup>⋆</sup> **ws**`}` |
|
||||
| *Set* | := | `#{`*Value*<sup>⋆</sup> **ws**`}` |
|
||||
|
||||
{:.postcard-grammar}
|
||||
| *Boolean* | := | `#t`|`#f` |
|
||||
| *String* | := | `"` (*unescaped* |`|`| (*escaped* |`\"`|`\u`*hex* *hex* *hex* *hex*))<sup>⋆</sup> `"` |
|
||||
| *ByteString* | := | `#"`*binchar*<sup>⋆</sup> `"`|`#x"` (**ws** | *hex* *hex*)<sup>⋆</sup> **ws**`"`|`#[` (**ws** | *base64char*)<sup>⋆</sup> **ws**`]` |
|
||||
| *QuotedSymbol* | := | `|` (*unescaped* |`"`| (*escaped* |`\|`|`\u`*hex* *hex* *hex* *hex*))<sup>⋆</sup> `|` |
|
||||
| *Symbol* | := | (`A`..`Z`|`a`..`z`|`0`..`9`| *sympunct* | *symuchar*)<sup>+</sup> |
|
||||
| *Number* | := | *Float* | *Double* | *SignedInteger* |
|
||||
| *Float* | := | *flt* (`f`|`F`) |`#xf"` (**ws** *hex* *hex*)<sup>4</sup> **ws**`"` |
|
||||
| *Double* | := | *flt* |`#xd"` (**ws** *hex* *hex*)<sup>8</sup> **ws**`"` |
|
||||
| *SignedInteger* | := | *int* |
|
||||
|
||||
{:.postcard-grammar}
|
||||
| *Embedded* | := | `#!`*Value* |
|
||||
| *Annotated* | := | *Annotation* *Value* |
|
||||
| *Annotation* | := | `@`*Value* |`;`« any unicode scalar value except **cr** or **lf** » (**cr** | **lf**) |
|
||||
|
||||
{:.postcard-grammar}
|
||||
| *escaped* | := | `\\`|`\/`|`\b`|`\f`|`\n`|`\r`|`\t` |
|
||||
| *unescaped* | := | « any unicode scalar value except `"`, `\`, or `|` » |
|
||||
| *binchar* | := | *binunescaped* | (*escaped* |`\"`|`\x`*hex* *hex*) |
|
||||
| *binunescaped* | := | « any unicode scalar value between 32 and 126, except `"` or `\` » |
|
||||
| *base64char* | := | `A`..`Z`|`a`..`z`|`0`..`9`|`+`|`/`|`-`|`_`|`=` |
|
||||
| *sympunct* | := | `~`|`!`|`$`|`%`|`^`|`&`|`*`|`?`|`_`|`=`|`+`|`-`|`/`|`.` |
|
||||
| *symuchar* | := | « any scalar value greater than 127 whose Unicode category is Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Po, Sc, Sm, Sk, So, or Co » |
|
||||
|
||||
{:.postcard-grammar}
|
||||
| *flt* | := | *int* ( *frac* *exp* | *frac* | *exp* ) |
|
||||
| *int* | := | (`-`|`+`) (`0`..`9`)<sup>+</sup> |
|
||||
| *frac* | := | `.` (`0`..`9`)<sup>+</sup> |
|
||||
| *exp* | := | (`e`|`E`) (`-`|`+`) (`0`..`9`)<sup>+</sup> |
|
||||
| *hex* | := | `A`..`F`|`a`..`f`|`0`..`9` |
|
|
@ -0,0 +1,129 @@
|
|||
#lang racket
|
||||
|
||||
(define (base-bytes v)
|
||||
(define byte-count (cond [(zero? v) 0]
|
||||
[else (define raw-bit-count (+ (integer-length v) 1))
|
||||
(quotient (+ raw-bit-count 7) 8)]))
|
||||
(for/list [(shift (in-range (* byte-count 8) 0 -8))]
|
||||
(bitwise-bit-field v (- shift 8) shift)))
|
||||
|
||||
(define (mod n d) (modulo n d)) ;; sign equal to sign of d
|
||||
(define (div n d) (/ (- n (mod n d)) d)) ;; sign equal to sign of n, or zero
|
||||
|
||||
;;---------------------------------------------------------------------------
|
||||
;; One
|
||||
;;
|
||||
;; (define (intbytes* v)
|
||||
;; (cond [(zero? v) '()]
|
||||
;; [(= v -1) '()]
|
||||
;; [else (append (intbytes* (div v 256)) (list (mod v 256)))]))
|
||||
;;
|
||||
;; (define (intbytes v)
|
||||
;; (define bs (intbytes* v))
|
||||
;; (define looks-negative (and (pair? bs) (>= (car bs) 128)))
|
||||
;; (if (xor (negative? v) looks-negative)
|
||||
;; (cons (if (negative? v) 255 0) bs)
|
||||
;; bs))
|
||||
|
||||
;;---------------------------------------------------------------------------
|
||||
;; Two
|
||||
;;
|
||||
;; (define (intbytes** v)
|
||||
;; (cond [(zero? v) '()]
|
||||
;; [else (append (intbytes** (quotient v 256)) (list (remainder v 256)))]))
|
||||
;;
|
||||
;; (define (intbytes* v)
|
||||
;; (define bs (intbytes** v))
|
||||
;; (if (and (pair? bs) (>= (car bs) 128))
|
||||
;; (cons 0 bs)
|
||||
;; bs))
|
||||
;;
|
||||
;; (define (intbytes v)
|
||||
;; (if (negative? v)
|
||||
;; (map (lambda (n) (- 255 n)) (intbytes* (- (+ v 1))))
|
||||
;; (intbytes* v)))
|
||||
|
||||
;;---------------------------------------------------------------------------
|
||||
;; Three
|
||||
;;
|
||||
;; (define (intbytes+ v)
|
||||
;; (cond [(>= v 128) (append (intbytes+ (div v 256)) (list (mod v 256)))]
|
||||
;; [else (list (mod v 256))]))
|
||||
;;
|
||||
;; (define (intbytes- v)
|
||||
;; (cond [(< v -128) (append (intbytes- (div v 256)) (list (mod v 256)))]
|
||||
;; [else (list (mod v 256))]))
|
||||
;;
|
||||
;; (define (intbytes v)
|
||||
;; (cond [(negative? v) (intbytes- v)]
|
||||
;; [(zero? v) '()]
|
||||
;; [(positive? v) (intbytes+ v)]))
|
||||
|
||||
;;---------------------------------------------------------------------------
|
||||
;; Four
|
||||
;;
|
||||
;; (define (intbytes* v)
|
||||
;; (append (if (<= -128 v 127)
|
||||
;; '()
|
||||
;; (intbytes* (div v 256)))
|
||||
;; (list (mod v 256))))
|
||||
;;
|
||||
;; (define (intbytes v)
|
||||
;; (if (zero? v)
|
||||
;; '()
|
||||
;; (intbytes* v)))
|
||||
|
||||
;;---------------------------------------------------------------------------
|
||||
;; Five
|
||||
;;
|
||||
;; (define (intbytes* v)
|
||||
;; (if (<= -128 v 127)
|
||||
;; (list (mod v 256))
|
||||
;; (append (intbytes* (div v 256)) (list (mod v 256)))))
|
||||
;;
|
||||
;; (define (intbytes v)
|
||||
;; (if (zero? v)
|
||||
;; '()
|
||||
;; (intbytes* v)))
|
||||
|
||||
;;---------------------------------------------------------------------------
|
||||
;; Six
|
||||
|
||||
(define (intbytes* v)
|
||||
(if (<= -128 v 127)
|
||||
(list (bitwise-and v 255))
|
||||
(append (intbytes* (arithmetic-shift v -8)) (list (bitwise-and v 255)))))
|
||||
|
||||
(define (intbytes v)
|
||||
(if (zero? v)
|
||||
'()
|
||||
(intbytes* v)))
|
||||
|
||||
(define cases `(
|
||||
(-257 (#xfe #xff))
|
||||
(-256 (#xff #x00))
|
||||
(-255 (#xff #x01))
|
||||
(-129 (#xff #x7f))
|
||||
(-128 (#x80))
|
||||
(-127 (#x81))
|
||||
(-2 (#xfe))
|
||||
(-1 (#xff))
|
||||
(0 ())
|
||||
(1 (#x01))
|
||||
(127 (#x7f))
|
||||
(128 (#x00 #x80))
|
||||
(255 (#x00 #xff))
|
||||
(256 (#x01 #x00))
|
||||
(32767 (#x7f #xff))
|
||||
(32768 (#x00 #x80 #x00))
|
||||
(65535 (#x00 #xff #xff))
|
||||
(65536 (#x01 #x00 #x00))
|
||||
))
|
||||
|
||||
(module+ test
|
||||
(require rackunit)
|
||||
(for [(c (in-list cases))]
|
||||
(match-define (list input output) c)
|
||||
(writeln (list input output (base-bytes input) (intbytes input)))
|
||||
(check-equal? output (base-bytes input))
|
||||
(check-equal? output (intbytes input))))
|
|
@ -0,0 +1,17 @@
|
|||
Here are a few example values, written using the [text
|
||||
syntax](https://preserves.dev/preserves-text.html):
|
||||
|
||||
Boolean : #t #f
|
||||
Float : 1.0f 10.4e3f -100.6f
|
||||
Double : 1.0 10.4e3 -100.6
|
||||
Integer : 1 0 -100
|
||||
String : "Hello, world!\n"
|
||||
ByteString : #"bin\x00str\x00" #[YmluAHN0cgA] #x"62696e0073747200"
|
||||
Symbol : hello-world |hello world| = ! hello? || ...
|
||||
Record : <label field1 field2 ...>
|
||||
Sequence : [value1 value2 ...]
|
||||
Set : #{value1 value2 ...}
|
||||
Dictionary : {key1: value1 key2: value2 ...: ...}
|
||||
Embedded : #!value
|
||||
|
||||
Commas are optional in sequences, sets, and dictionaries.
|
|
@ -9,3 +9,7 @@ Tony Garnock-Jones <tonyg@leastfixedpoint.com>
|
|||
## Machine-Oriented Binary Syntax
|
||||
|
||||
{% include cheatsheet-binary.md %}
|
||||
|
||||
## Human-Oriented Text Syntax
|
||||
|
||||
{% include cheatsheet-text.md %}
|
||||
|
|
|
@ -98,10 +98,17 @@ serializing in some other implementation-defined order.
|
|||
|
||||
The function `intbytes(x)` gives the big-endian two's-complement
|
||||
binary representation of `x`, taking exactly as many whole bytes as
|
||||
needed to unambiguously identify the value and its sign. The value 0
|
||||
needs zero bytes to identify the value; non-zero values need at least
|
||||
one byte, and the most-significant bit in the first byte is the sign
|
||||
bit. See the [examples in the appendix below](#signedinteger-examples).
|
||||
needed to unambiguously identify the value and its sign. `intbytes(0)`
|
||||
is special-cased to be the empty byte
|
||||
sequence;[^empty-intbytes-sequence] otherwise, the most-significant
|
||||
bit of the first byte is the sign bit. See the [examples in the
|
||||
appendix below](#signedinteger-examples).
|
||||
|
||||
[^empty-intbytes-sequence]: Without the special case of
|
||||
`intbytes(0)` yielding the empty byte sequence, no input to
|
||||
`intbytes` would result in an empty sequence. In principle, either
|
||||
`0` or `-1` could be special-cased to the empty sequence; here, we
|
||||
arbitrarily choose `0`.
|
||||
|
||||
### Strings, ByteStrings and Symbols.
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
:root {
|
||||
--sans-font: -apple-system, BlinkMacSystemFont, avenir next, avenir, segoe ui, helvetica neue, helvetica, Cantarell, Ubuntu, roboto, noto, arial, sans-serif;
|
||||
--sans-font: "Open Sans", -apple-system, BlinkMacSystemFont, avenir next, avenir, segoe ui, helvetica neue, helvetica, Cantarell, Ubuntu, roboto, noto, arial, sans-serif;
|
||||
--serif-font: palatino, "Palatino Linotype", "Palatino LT STD", "URW Palladio L", "TeX Gyre Pagella", serif;
|
||||
}
|
||||
body {
|
||||
|
@ -96,7 +96,8 @@ body > nav ul > li {
|
|||
@media print {
|
||||
@page { size: letter; margin: 4rem 0rem 4.333rem 0rem; }
|
||||
body > nav { display: none; }
|
||||
main { margin-left: 4.5rem; margin-right: 4.5rem; font-size: 10.5pt; }
|
||||
main { margin-left: 4.5rem; margin-right: 4.5rem; }
|
||||
html { font-size: 10.5pt; }
|
||||
h1, h2 { page-break-before: always; margin-top: 0; }
|
||||
hr+* { page-break-before: always; margin-top: 0; }
|
||||
hr { display: none; }
|
||||
|
@ -179,6 +180,35 @@ td {
|
|||
padding-right: 0.5rem;
|
||||
}
|
||||
|
||||
.postcard-grammar {
|
||||
font-family: var(--sans-font);
|
||||
font-size: 1rem;
|
||||
color: #888;
|
||||
}
|
||||
table.postcard-grammar { background: #e9f0f9; }
|
||||
.postcard-grammar.binarysyntax { color: black; }
|
||||
.postcard-grammar tr > *:nth-child(1) { width: 7em; text-align: right; }
|
||||
.postcard-grammar.binarysyntax tr > *:nth-child(1) { width: 13em; text-align: right; }
|
||||
.postcard-grammar tr > *:nth-child(2) { width: 1em; }
|
||||
.postcard-grammar sup {
|
||||
color: #040;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
.postcard-grammar em { color: black; }
|
||||
.postcard-grammar strong { color: #400; font-weight: normal; }
|
||||
.postcard-grammar td {
|
||||
vertical-align: top;
|
||||
}
|
||||
.postcard-grammar pre, .postcard-grammar code {
|
||||
color: black;
|
||||
display: inline-block;
|
||||
font-size: 1rem;
|
||||
padding: 0 0.33rem;
|
||||
}
|
||||
.postcard-grammar .outputish { background: #eee; color: black; }
|
||||
.postcard-grammar .outputish::before { content: '⌜'; }
|
||||
.postcard-grammar .outputish::after { content: '⌝'; }
|
||||
|
||||
blockquote {
|
||||
padding: 0.5rem 1rem;
|
||||
border-left: solid #4f81bd 2px;
|
||||
|
|
Loading…
Reference in New Issue