Work on cheatsheets

This commit is contained in:
Tony Garnock-Jones 2023-10-17 00:31:10 +02:00
parent 01a766c974
commit 3360e013a4
7 changed files with 292 additions and 6 deletions

View File

@ -1,3 +1,57 @@
For a value <span class="postcard-grammar binarysyntax">*V*</span>, we write <span
class="postcard-grammar binarysyntax">«*V*»</span> for the binary encoding of <span
class="postcard-grammar binarysyntax">*V*</span>.
{:.postcard-grammar.binarysyntax}
«`#f`» | = | `80`
«`#t`» | = | `81`
{:.postcard-grammar.binarysyntax}
«`@`*W* *V*» | = | `85` «*W*» «*V*»
«`#!`*V*» | = | `86` «*V*»
{:.postcard-grammar.binarysyntax}
«*V*» | = | `87``04` **binary32**(*V*) | if *V* ∈ Float
«*V*» | = | `87``08` **binary64**(*V*) | if *V* ∈ Double
{:.postcard-grammar.binarysyntax}
«*V*» | = | `B0` **varint**(&#124;**intbytes**(*V*)&#124;) **intbytes**(*V*) | if *V* ∈ SignedInteger
«*V*» | = | `B1` **varint**(&#124;**utf8**(*V*)&#124;) **utf8**(*V*) | if *V* ∈ String
«*V*» | = | `B2` **varint**(&#124;*V*&#124;) *V* | if *V* ∈ ByteString
«*V*» | = | `B3` **varint**(&#124;**utf8**(*V*)&#124;) **utf8**(*V*) | if *V* ∈ Symbol
{:.postcard-grammar.binarysyntax}
«`<`*L* *F*<sub>1</sub> ... *F*<sub>m</sub>`>`» | = | `B4` «*L*» «*F*<sub>1</sub>» ... «*F*<sub>m</sub>» `84`
«`[`*X*<sub>1</sub> ... *X*<sub>m</sub>`]`» | = | `B5` «*X*<sub>1</sub>» ... «*X*<sub>m</sub>» `84`
«`#{`*E*<sub>1</sub> ... *E*<sub>m</sub>`}`» | = | `B6` «*E*<sub>1</sub>» ... «*E*<sub>m</sub>» `84`
«`{`*K*<sub>1</sub>`:`*V*<sub>1</sub> ... *K*<sub>m</sub>`:`*V*<sub>m</sub>`}`» | = | `B7` «*K*<sub>1</sub>» «*V*<sub>1</sub>» ... «*K*<sub>m</sub>» «*V*<sub>m</sub>» `84`
{:.postcard-grammar.binarysyntax}
**varint**(*n*) | = | <span class="outputish">*n*</span> | if *n* &lt; 128
| | <span class="outputish">(*n* &amp; 127) &#124; 128</span> **varint**(*n* &gt;&gt; 7) | if *n* ≥ 128
{:.postcard-grammar.binarysyntax}
**intbytes**(*n*) | = | *the empty byte sequence* | if *n* = 0
| | **signedBigEndian**(*n*) | otherwise
**signedBigEndian**(*n*) | = | <span class="outputish">*n* &amp; 255</span> | if -128 ≤ *n* ≤ 127
| | **signedBigEndian**(*n* &gt;&gt; 8) <span class="outputish">*n* &amp; 255</span> | otherwise
The functions <span class="postcard-grammar binarysyntax">**binary32**(*F*)</span> and <span
class="postcard-grammar binarysyntax">**binary64**(*D*)</span> yield big-endian 4- and 8-byte
IEEE 754 binary representations of <span class="postcard-grammar binarysyntax">*F*</span> and
<span class="postcard-grammar binarysyntax">*D*</span>, respectively.
<!--
Together, <span class="postcard-grammar binarysyntax">**div**</span> and <span
class="postcard-grammar binarysyntax">**mod**</span> give [Euclidean
division](https://en.wikipedia.org/wiki/Euclidean_division); that is, if
<span class="postcard-grammar binarysyntax">*n* **div** *d* = *q*</span> and
<span class="postcard-grammar binarysyntax">*n* **mod** *d* = *r*</span>, then
<span class="postcard-grammar binarysyntax">*n* = *dq* + *r*</span> and
<span class="postcard-grammar binarysyntax">0 ≤ *r* &lt; |d|</span>.
-->
<!--
For a value `V`, we write `«V»` for the binary encoding of `V`.
«#f» = [0x80]
@ -28,3 +82,4 @@ The functions `binary32(F)` and `binary64(D)` yield big-endian 4- and
The function `intbytes(x)` is a big-endian two's-complement signed binary representation of
`x`, taking exactly as many whole bytes as needed to unambiguously identify the value and its
sign. In particular, `intbytes(0)` is the empty byte sequence.
-->

View File

@ -0,0 +1,44 @@
{:.postcard-grammar}
| *Document* | := | *Value* **ws** |
| *Value* | := | **ws** (*Record* &#124; *Collection* &#124; *Atom* &#124; *Embedded* &#124; *Annotated*) |
| *Collection* | := | *Sequence* &#124; *Dictionary* &#124; *Set* |
| *Atom* | := | *Boolean* &#124; *String* &#124; *ByteString* &#124; *QuotedSymbol* &#124; *Symbol* &#124; *Number* |
| **ws** | := | (**space** &#124; **tab** &#124; **cr** &#124; **lf** &#124;`,`)<sup></sup> |
{:.postcard-grammar}
| *Record* | := | `<`*Value*<sup>+</sup> **ws**`>` |
| *Sequence* | := | `[`*Value*<sup></sup> **ws**`]` |
| *Dictionary* | := | `{` (*Value* **ws**`:`*Value*)<sup></sup> **ws**`}` |
| *Set* | := | `#{`*Value*<sup></sup> **ws**`}` |
{:.postcard-grammar}
| *Boolean* | := | `#t`&#124;`#f` |
| *String* | := | `"` (*unescaped* &#124;`|`&#124; (*escaped* &#124;`\"`&#124;`\u`*hex* *hex* *hex* *hex*))<sup>⋆</sup> `"` |
| *ByteString* | := | `#"`*binchar*<sup></sup> `"`&#124;`#x"` (**ws** &#124; *hex* *hex*)<sup></sup> **ws**`"`&#124;`#[` (**ws** &#124; *base64char*)<sup></sup> **ws**`]` |
| *QuotedSymbol* | := | `|` (*unescaped* &#124;`"`&#124; (*escaped* &#124;`\|`&#124;`\u`*hex* *hex* *hex* *hex*))<sup>⋆</sup> `|` |
| *Symbol* | := | (`A`..`Z`&#124;`a`..`z`&#124;`0`..`9`&#124; *sympunct* &#124; *symuchar*)<sup>+</sup> |
| *Number* | := | *Float* &#124; *Double* &#124; *SignedInteger* |
| *Float* | := | *flt* (`f`&#124;`F`) &#124;`#xf"` (**ws** *hex* *hex*)<sup>4</sup> **ws**`"` |
| *Double* | := | *flt* &#124;`#xd"` (**ws** *hex* *hex*)<sup>8</sup> **ws**`"` |
| *SignedInteger* | := | *int* |
{:.postcard-grammar}
| *Embedded* | := | `#!`*Value* |
| *Annotated* | := | *Annotation* *Value* |
| *Annotation* | := | `@`*Value* &#124;`;`« any unicode scalar value except **cr** or **lf** » (**cr** &#124; **lf**) |
{:.postcard-grammar}
| *escaped* | := | `\\`&#124;`\/`&#124;`\b`&#124;`\f`&#124;`\n`&#124;`\r`&#124;`\t` |
| *unescaped* | := | « any unicode scalar value except `"`, `\`, or `|` » |
| *binchar* | := | *binunescaped* &#124; (*escaped* &#124;`\"`&#124;`\x`*hex* *hex*) |
| *binunescaped* | := | « any unicode scalar value between 32 and 126, except `"` or `\` » |
| *base64char* | := | `A`..`Z`&#124;`a`..`z`&#124;`0`..`9`&#124;`+`&#124;`/`&#124;`-`&#124;`_`&#124;`=` |
| *sympunct* | := | `~`&#124;`!`&#124;`$`&#124;`%`&#124;`^`&#124;`&`&#124;`*`&#124;`?`&#124;`_`&#124;`=`&#124;`+`&#124;`-`&#124;`/`&#124;`.` |
| *symuchar* | := | « any scalar value greater than 127 whose Unicode category is Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Po, Sc, Sm, Sk, So, or Co » |
{:.postcard-grammar}
| *flt* | := | *int* ( *frac* *exp* &#124; *frac* &#124; *exp* ) |
| *int* | := | (`-`&#124;`+`) (`0`..`9`)<sup>+</sup> |
| *frac* | := | `.` (`0`..`9`)<sup>+</sup> |
| *exp* | := | (`e`&#124;`E`) (`-`&#124;`+`) (`0`..`9`)<sup>+</sup> |
| *hex* | := | `A`..`F`&#124;`a`..`f`&#124;`0`..`9` |

129
_includes/intbytes.rkt Normal file
View File

@ -0,0 +1,129 @@
#lang racket
(define (base-bytes v)
(define byte-count (cond [(zero? v) 0]
[else (define raw-bit-count (+ (integer-length v) 1))
(quotient (+ raw-bit-count 7) 8)]))
(for/list [(shift (in-range (* byte-count 8) 0 -8))]
(bitwise-bit-field v (- shift 8) shift)))
(define (mod n d) (modulo n d)) ;; sign equal to sign of d
(define (div n d) (/ (- n (mod n d)) d)) ;; sign equal to sign of n, or zero
;;---------------------------------------------------------------------------
;; One
;;
;; (define (intbytes* v)
;; (cond [(zero? v) '()]
;; [(= v -1) '()]
;; [else (append (intbytes* (div v 256)) (list (mod v 256)))]))
;;
;; (define (intbytes v)
;; (define bs (intbytes* v))
;; (define looks-negative (and (pair? bs) (>= (car bs) 128)))
;; (if (xor (negative? v) looks-negative)
;; (cons (if (negative? v) 255 0) bs)
;; bs))
;;---------------------------------------------------------------------------
;; Two
;;
;; (define (intbytes** v)
;; (cond [(zero? v) '()]
;; [else (append (intbytes** (quotient v 256)) (list (remainder v 256)))]))
;;
;; (define (intbytes* v)
;; (define bs (intbytes** v))
;; (if (and (pair? bs) (>= (car bs) 128))
;; (cons 0 bs)
;; bs))
;;
;; (define (intbytes v)
;; (if (negative? v)
;; (map (lambda (n) (- 255 n)) (intbytes* (- (+ v 1))))
;; (intbytes* v)))
;;---------------------------------------------------------------------------
;; Three
;;
;; (define (intbytes+ v)
;; (cond [(>= v 128) (append (intbytes+ (div v 256)) (list (mod v 256)))]
;; [else (list (mod v 256))]))
;;
;; (define (intbytes- v)
;; (cond [(< v -128) (append (intbytes- (div v 256)) (list (mod v 256)))]
;; [else (list (mod v 256))]))
;;
;; (define (intbytes v)
;; (cond [(negative? v) (intbytes- v)]
;; [(zero? v) '()]
;; [(positive? v) (intbytes+ v)]))
;;---------------------------------------------------------------------------
;; Four
;;
;; (define (intbytes* v)
;; (append (if (<= -128 v 127)
;; '()
;; (intbytes* (div v 256)))
;; (list (mod v 256))))
;;
;; (define (intbytes v)
;; (if (zero? v)
;; '()
;; (intbytes* v)))
;;---------------------------------------------------------------------------
;; Five
;;
;; (define (intbytes* v)
;; (if (<= -128 v 127)
;; (list (mod v 256))
;; (append (intbytes* (div v 256)) (list (mod v 256)))))
;;
;; (define (intbytes v)
;; (if (zero? v)
;; '()
;; (intbytes* v)))
;;---------------------------------------------------------------------------
;; Six
(define (intbytes* v)
(if (<= -128 v 127)
(list (bitwise-and v 255))
(append (intbytes* (arithmetic-shift v -8)) (list (bitwise-and v 255)))))
(define (intbytes v)
(if (zero? v)
'()
(intbytes* v)))
(define cases `(
(-257 (#xfe #xff))
(-256 (#xff #x00))
(-255 (#xff #x01))
(-129 (#xff #x7f))
(-128 (#x80))
(-127 (#x81))
(-2 (#xfe))
(-1 (#xff))
(0 ())
(1 (#x01))
(127 (#x7f))
(128 (#x00 #x80))
(255 (#x00 #xff))
(256 (#x01 #x00))
(32767 (#x7f #xff))
(32768 (#x00 #x80 #x00))
(65535 (#x00 #xff #xff))
(65536 (#x01 #x00 #x00))
))
(module+ test
(require rackunit)
(for [(c (in-list cases))]
(match-define (list input output) c)
(writeln (list input output (base-bytes input) (intbytes input)))
(check-equal? output (base-bytes input))
(check-equal? output (intbytes input))))

View File

@ -0,0 +1,17 @@
Here are a few example values, written using the [text
syntax](https://preserves.dev/preserves-text.html):
Boolean : #t #f
Float : 1.0f 10.4e3f -100.6f
Double : 1.0 10.4e3 -100.6
Integer : 1 0 -100
String : "Hello, world!\n"
ByteString : #"bin\x00str\x00" #[YmluAHN0cgA] #x"62696e0073747200"
Symbol : hello-world |hello world| = ! hello? || ...
Record : <label field1 field2 ...>
Sequence : [value1 value2 ...]
Set : #{value1 value2 ...}
Dictionary : {key1: value1 key2: value2 ...: ...}
Embedded : #!value
Commas are optional in sequences, sets, and dictionaries.

View File

@ -9,3 +9,7 @@ Tony Garnock-Jones <tonyg@leastfixedpoint.com>
## Machine-Oriented Binary Syntax
{% include cheatsheet-binary.md %}
## Human-Oriented Text Syntax
{% include cheatsheet-text.md %}

View File

@ -98,10 +98,17 @@ serializing in some other implementation-defined order.
The function `intbytes(x)` gives the big-endian two's-complement
binary representation of `x`, taking exactly as many whole bytes as
needed to unambiguously identify the value and its sign. The value 0
needs zero bytes to identify the value; non-zero values need at least
one byte, and the most-significant bit in the first byte is the sign
bit. See the [examples in the appendix below](#signedinteger-examples).
needed to unambiguously identify the value and its sign. `intbytes(0)`
is special-cased to be the empty byte
sequence;[^empty-intbytes-sequence] otherwise, the most-significant
bit of the first byte is the sign bit. See the [examples in the
appendix below](#signedinteger-examples).
[^empty-intbytes-sequence]: Without the special case of
`intbytes(0)` yielding the empty byte sequence, no input to
`intbytes` would result in an empty sequence. In principle, either
`0` or `-1` could be special-cased to the empty sequence; here, we
arbitrarily choose `0`.
### Strings, ByteStrings and Symbols.

View File

@ -1,5 +1,5 @@
:root {
--sans-font: -apple-system, BlinkMacSystemFont, avenir next, avenir, segoe ui, helvetica neue, helvetica, Cantarell, Ubuntu, roboto, noto, arial, sans-serif;
--sans-font: "Open Sans", -apple-system, BlinkMacSystemFont, avenir next, avenir, segoe ui, helvetica neue, helvetica, Cantarell, Ubuntu, roboto, noto, arial, sans-serif;
--serif-font: palatino, "Palatino Linotype", "Palatino LT STD", "URW Palladio L", "TeX Gyre Pagella", serif;
}
body {
@ -96,7 +96,8 @@ body > nav ul > li {
@media print {
@page { size: letter; margin: 4rem 0rem 4.333rem 0rem; }
body > nav { display: none; }
main { margin-left: 4.5rem; margin-right: 4.5rem; font-size: 10.5pt; }
main { margin-left: 4.5rem; margin-right: 4.5rem; }
html { font-size: 10.5pt; }
h1, h2 { page-break-before: always; margin-top: 0; }
hr+* { page-break-before: always; margin-top: 0; }
hr { display: none; }
@ -179,6 +180,35 @@ td {
padding-right: 0.5rem;
}
.postcard-grammar {
font-family: var(--sans-font);
font-size: 1rem;
color: #888;
}
table.postcard-grammar { background: #e9f0f9; }
.postcard-grammar.binarysyntax { color: black; }
.postcard-grammar tr > *:nth-child(1) { width: 7em; text-align: right; }
.postcard-grammar.binarysyntax tr > *:nth-child(1) { width: 13em; text-align: right; }
.postcard-grammar tr > *:nth-child(2) { width: 1em; }
.postcard-grammar sup {
color: #040;
font-size: 0.9rem;
}
.postcard-grammar em { color: black; }
.postcard-grammar strong { color: #400; font-weight: normal; }
.postcard-grammar td {
vertical-align: top;
}
.postcard-grammar pre, .postcard-grammar code {
color: black;
display: inline-block;
font-size: 1rem;
padding: 0 0.33rem;
}
.postcard-grammar .outputish { background: #eee; color: black; }
.postcard-grammar .outputish::before { content: '⌜'; }
.postcard-grammar .outputish::after { content: '⌝'; }
blockquote {
padding: 0.5rem 1rem;
border-left: solid #4f81bd 2px;